diff --git a/cpp/examples/arrow/parquet_column_encryption.cc b/cpp/examples/arrow/parquet_column_encryption.cc index 2ea4f44f172..7b2d5d80bf4 100644 --- a/cpp/examples/arrow/parquet_column_encryption.cc +++ b/cpp/examples/arrow/parquet_column_encryption.cc @@ -19,6 +19,7 @@ #include "arrow/dataset/file_parquet.h" #include "arrow/dataset/parquet_encryption_config.h" #include "arrow/filesystem/localfs.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/crypto_factory.h" #include "parquet/encryption/test_in_memory_kms.h" @@ -106,9 +107,9 @@ arrow::Result> GetTable() { std::shared_ptr GetCryptoFactory() { // Configure KMS. - std::unordered_map key_map; - key_map.emplace("footerKeyId", "0123456789012345"); - key_map.emplace("columnKeyId", "1234567890123456"); + std::unordered_map key_map; + key_map.emplace("footerKeyId", arrow::util::SecureString("0123456789012345")); + key_map.emplace("columnKeyId", arrow::util::SecureString("1234567890123456")); auto crypto_factory = std::make_shared(); auto kms_client_factory = diff --git a/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc b/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc index e0a1f80b6b1..8e39ca100fc 100644 --- a/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc +++ b/cpp/examples/parquet/low_level_api/encryption_reader_writer.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include @@ -39,9 +40,9 @@ constexpr int NUM_ROWS_PER_ROW_GROUP = 500; const char* PARQUET_FILENAME = "parquet_cpp_example.parquet.encrypted"; -const char* kFooterEncryptionKey = "0123456789012345"; // 128bit/16 -const char* kColumnEncryptionKey1 = "1234567890123450"; -const char* kColumnEncryptionKey2 = "1234567890123451"; +const arrow::util::SecureString kFooterEncryptionKey("0123456789012345"); +const arrow::util::SecureString kColumnEncryptionKey1("1234567890123450"); +const arrow::util::SecureString kColumnEncryptionKey2("1234567890123451"); int main(int argc, char** argv) { /********************************************************************************** diff --git a/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc b/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc index 6b6e9b6b1b4..98859ba9fc6 100644 --- a/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc +++ b/cpp/examples/parquet/low_level_api/encryption_reader_writer_all_crypto_options.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -92,9 +93,9 @@ constexpr int NUM_ROWS_PER_ROW_GROUP = 500; -const char* kFooterEncryptionKey = "0123456789012345"; // 128bit/16 -const char* kColumnEncryptionKey1 = "1234567890123450"; -const char* kColumnEncryptionKey2 = "1234567890123451"; +const arrow::util::SecureString kFooterEncryptionKey("0123456789012345"); +const arrow::util::SecureString kColumnEncryptionKey1("1234567890123450"); +const arrow::util::SecureString kColumnEncryptionKey2("1234567890123451"); const char* fileName = "tester"; using FileClass = ::arrow::io::FileOutputStream; diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc index d2e1763c62f..91d813530d4 100644 --- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc @@ -34,6 +34,7 @@ #include "arrow/testing/random.h" #include "arrow/type.h" #include "arrow/util/future.h" +#include "arrow/util/secure_string.h" #include "arrow/util/thread_pool.h" #include "parquet/arrow/reader.h" #include "parquet/encryption/crypto_factory.h" @@ -41,12 +42,16 @@ #include "parquet/encryption/kms_client.h" #include "parquet/encryption/test_in_memory_kms.h" -constexpr std::string_view kFooterKeyMasterKey = "0123456789012345"; +using arrow::util::SecureString; + +const SecureString kFooterKeyMasterKey("0123456789012345"); constexpr std::string_view kFooterKeyMasterKeyId = "footer_key"; constexpr std::string_view kFooterKeyName = "footer_key"; -constexpr std::string_view kColumnMasterKey = "1234567890123450"; + +const SecureString kColumnMasterKey("1234567890123450"); constexpr std::string_view kColumnMasterKeyId = "col_key"; constexpr std::string_view kColumnKeyMapping = "col_key: a"; + constexpr std::string_view kBaseDir = ""; using arrow::internal::checked_pointer_cast; @@ -105,7 +110,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam key_map; + std::unordered_map key_map; key_map.emplace(kColumnMasterKeyId, kColumnMasterKey); key_map.emplace(kFooterKeyMasterKeyId, kFooterKeyMasterKey); @@ -145,7 +150,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam( - std::string(kFooterKeyMasterKey)) + kFooterKeyMasterKey) ->build(); auto writer_properties = std::make_unique() ->encryption(file_encryption_properties) @@ -230,7 +235,7 @@ class DatasetEncryptionTestBase : public testing::TestWithParam() - ->footer_key(std::string(kFooterKeyMasterKey)) + ->footer_key(kFooterKeyMasterKey) ->build(); parquet_scan_options->reader_properties->file_decryption_properties( file_decryption_properties); @@ -370,7 +375,7 @@ TEST_P(DatasetEncryptionTest, ReadSingleFile) { // Configure decryption keys via file decryption properties with static footer key. file_decryption_properties = std::make_unique() - ->footer_key(std::string(kFooterKeyMasterKey)) + ->footer_key(kFooterKeyMasterKey) ->build(); } auto reader_properties = parquet::default_reader_properties(); diff --git a/cpp/src/parquet/encryption/crypto_factory.cc b/cpp/src/parquet/encryption/crypto_factory.cc index f420a7307e3..50b07453788 100644 --- a/cpp/src/parquet/encryption/crypto_factory.cc +++ b/cpp/src/parquet/encryption/crypto_factory.cc @@ -18,6 +18,7 @@ #include #include "arrow/util/logging.h" +#include "arrow/util/secure_string.h" #include "arrow/util/string.h" #include "parquet/encryption/crypto_factory.h" @@ -26,6 +27,8 @@ #include "parquet/encryption/file_system_key_material_store.h" #include "parquet/encryption/key_toolkit_internal.h" +using arrow::util::SecureString; + namespace parquet::encryption { void CryptoFactory::RegisterKmsClientFactory( @@ -71,8 +74,8 @@ std::shared_ptr CryptoFactory::GetFileEncryptionProper int dek_length = dek_length_bits / 8; - std::string footer_key(dek_length, '\0'); - RandBytes(reinterpret_cast(footer_key.data()), footer_key.size()); + SecureString footer_key(dek_length, '\0'); + RandBytes(footer_key.as_span().data(), footer_key.size()); std::string footer_key_metadata = key_wrapper.GetEncryptionKeyMetadata(footer_key, footer_key_id, true); @@ -146,8 +149,9 @@ ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties column_name); } - std::string column_key(dek_length, '\0'); - RandBytes(reinterpret_cast(column_key.data()), column_key.size()); + SecureString column_key(dek_length, '\0'); + RandBytes(column_key.as_span().data(), column_key.size()); + std::string column_key_key_metadata = key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false); diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index 90850763902..e156ec00446 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -26,31 +26,35 @@ #include "arrow/util/utf8.h" #include "parquet/encryption/encryption_internal.h" -namespace parquet { +using ::arrow::util::SecureString; -// integer key retriever -void IntegerKeyIdRetriever::PutKey(uint32_t key_id, const std::string& key) { - key_map_.insert({key_id, key}); -} +namespace parquet { -std::string IntegerKeyIdRetriever::GetKey(const std::string& key_metadata) { - uint32_t key_id; - memcpy(reinterpret_cast(&key_id), key_metadata.c_str(), 4); +// any empty SecureString key is interpreted as if no key is given +// this instance is used when a SecureString reference is returned +static SecureString kNoKey = SecureString(); - return key_map_.at(key_id); +// integer key retriever +void IntegerKeyIdRetriever::PutKey(uint32_t key_id, SecureString key) { + key_map_.insert({key_id, std::move(key)}); } // string key retriever -void StringKeyIdRetriever::PutKey(const std::string& key_id, const std::string& key) { - key_map_.insert({key_id, key}); +void StringKeyIdRetriever::PutKey(std::string key_id, SecureString key) { + key_map_.insert({std::move(key_id), std::move(key)}); } -std::string StringKeyIdRetriever::GetKey(const std::string& key_id) { +SecureString StringKeyIdRetriever::GetKeyById(const std::string& key_id) { return key_map_.at(key_id); } ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( std::string column_key) { + return key(SecureString(std::move(column_key))); +} + +ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( + SecureString column_key) { if (column_key.empty()) return this; DCHECK(key_.empty()); @@ -92,6 +96,11 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( std::string footer_key) { + return this->footer_key(SecureString(std::move(footer_key))); +} + +FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( + SecureString footer_key) { if (footer_key.empty()) { return this; } @@ -129,10 +138,10 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::aad_prefix } ColumnDecryptionProperties::Builder* ColumnDecryptionProperties::Builder::key( - std::string key) { + SecureString key) { if (key.empty()) return this; - DCHECK(!key.empty()); + DCHECK(key_.empty()); key_ = std::move(key); return this; } @@ -182,41 +191,37 @@ FileEncryptionProperties::Builder::disable_aad_prefix_storage() { ColumnEncryptionProperties::ColumnEncryptionProperties(bool encrypted, std::string column_path, - std::string key, - std::string key_metadata) { - DCHECK(!column_path.empty()); - column_path_ = std::move(column_path); + SecureString key, + std::string key_metadata) + : column_path_(std::move(column_path)), + encrypted_(encrypted), + encrypted_with_footer_key_(encrypted && key.empty()), + key_(std::move(key)), + key_metadata_(std::move(key_metadata)) { + DCHECK(!column_path_.empty()); if (!encrypted) { - DCHECK(key.empty() && key_metadata.empty()); + DCHECK(key_.empty() && key_metadata_.empty()); } - - if (!key.empty()) { - DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); + if (!key_.empty()) { + DCHECK(key_.length() == 16 || key_.length() == 24 || key_.length() == 32); } - - encrypted_with_footer_key_ = (encrypted && key.empty()); if (encrypted_with_footer_key_) { - DCHECK(key_metadata.empty()); + DCHECK(key_metadata_.empty()); } - - encrypted_ = encrypted; - key_metadata_ = std::move(key_metadata); - key_ = std::move(key); } ColumnDecryptionProperties::ColumnDecryptionProperties(std::string column_path, - std::string key) { - DCHECK(!column_path.empty()); - column_path_ = std::move(column_path); + SecureString key) + : column_path_(std::move(column_path)), key_(std::move(key)) { + DCHECK(!column_path_.empty()); - if (!key.empty()) { - DCHECK(key.length() == 16 || key.length() == 24 || key.length() == 32); + if (!key_.empty()) { + DCHECK(key_.length() == 16 || key_.length() == 24 || key_.length() == 32); } - - key_ = std::move(key); } -std::string FileDecryptionProperties::column_key(const std::string& column_path) const { +const SecureString& FileDecryptionProperties::column_key( + const std::string& column_path) const { if (column_decryption_properties_.find(column_path) != column_decryption_properties_.end()) { auto column_prop = column_decryption_properties_.at(column_path); @@ -224,32 +229,31 @@ std::string FileDecryptionProperties::column_key(const std::string& column_path) return column_prop->key(); } } - return {}; + return kNoKey; } FileDecryptionProperties::FileDecryptionProperties( - std::string footer_key, std::shared_ptr key_retriever, + SecureString footer_key, std::shared_ptr key_retriever, bool check_plaintext_footer_integrity, std::string aad_prefix, std::shared_ptr aad_prefix_verifier, ColumnPathToDecryptionPropertiesMap column_decryption_properties, - bool plaintext_files_allowed) { - DCHECK(!footer_key.empty() || nullptr != key_retriever || - 0 != column_decryption_properties.size()); - - if (!footer_key.empty()) { - DCHECK(footer_key.length() == 16 || footer_key.length() == 24 || - footer_key.length() == 32); + bool plaintext_files_allowed) + : footer_key_(std::move(footer_key)), + aad_prefix_(std::move(aad_prefix)), + aad_prefix_verifier_(std::move(aad_prefix_verifier)), + column_decryption_properties_(std::move(column_decryption_properties)), + key_retriever_(std::move(key_retriever)), + check_plaintext_footer_integrity_(check_plaintext_footer_integrity), + plaintext_files_allowed_(plaintext_files_allowed) { + DCHECK(!footer_key_.empty() || nullptr != key_retriever_ || + 0 != column_decryption_properties_.size()); + if (!footer_key_.empty()) { + DCHECK(footer_key_.length() == 16 || footer_key_.length() == 24 || + footer_key_.length() == 32); } - if (footer_key.empty() && check_plaintext_footer_integrity) { - DCHECK(nullptr != key_retriever); + if (footer_key_.empty() && check_plaintext_footer_integrity) { + DCHECK(nullptr != key_retriever_); } - aad_prefix_verifier_ = std::move(aad_prefix_verifier); - footer_key_ = std::move(footer_key); - check_plaintext_footer_integrity_ = check_plaintext_footer_integrity; - key_retriever_ = std::move(key_retriever); - aad_prefix_ = std::move(aad_prefix); - column_decryption_properties_ = std::move(column_decryption_properties); - plaintext_files_allowed_ = plaintext_files_allowed; } FileEncryptionProperties::Builder* FileEncryptionProperties::Builder::footer_key_id( @@ -282,7 +286,7 @@ FileEncryptionProperties::column_encryption_properties(const std::string& column } FileEncryptionProperties::FileEncryptionProperties( - ParquetCipher::type cipher, std::string footer_key, std::string footer_key_metadata, + ParquetCipher::type cipher, SecureString footer_key, std::string footer_key_metadata, bool encrypted_footer, std::string aad_prefix, bool store_aad_prefix_in_file, ColumnPathToEncryptionPropertiesMap encrypted_columns) : footer_key_(std::move(footer_key)), diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 207687adc83..8d7bb9489af 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -17,11 +17,13 @@ #pragma once +#include #include #include #include #include +#include "arrow/util/secure_string.h" #include "parquet/exception.h" #include "parquet/schema.h" #include "parquet/types.h" @@ -46,28 +48,56 @@ using ColumnPathToEncryptionPropertiesMap = class PARQUET_EXPORT DecryptionKeyRetriever { public: - virtual std::string GetKey(const std::string& key_metadata) = 0; + /// \brief Retrieve a key. + /// \deprecated Deprecated since 22.0.0. + /// Implement GetKeyById(const std::string&) instead. + ARROW_DEPRECATED( + "Deprecated in 22.0.0. " + "Implement GetKeyById(const std::string&) instead.") + virtual std::string GetKey(const std::string& key_id) { + throw ParquetException("Not implemented"); + } + + /// \brief Retrieve a key by its id. + virtual ::arrow::util::SecureString GetKeyById(const std::string& key_id) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = ::arrow::util::SecureString(GetKey(key_id)); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } + virtual ~DecryptionKeyRetriever() {} }; /// Simple integer key retriever class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: - void PutKey(uint32_t key_id, const std::string& key); - std::string GetKey(const std::string& key_metadata) override; + void PutKey(uint32_t key_id, ::arrow::util::SecureString key); + + ::arrow::util::SecureString GetKeyById(const std::string& key_id_string) override { + // key_id_string is string but for IntegerKeyIdRetriever it encodes + // a native-endian 32 bit unsigned integer key_id + uint32_t key_id; + assert(key_id_string.size() == sizeof(key_id)); + memcpy(&key_id, key_id_string.data(), sizeof(key_id)); + + return GetKeyById(key_id); + } + + ::arrow::util::SecureString GetKeyById(uint32_t key_id) { return key_map_.at(key_id); } private: - std::map key_map_; + std::map key_map_; }; // Simple string key retriever class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: - void PutKey(const std::string& key_id, const std::string& key); - std::string GetKey(const std::string& key_metadata) override; + void PutKey(std::string key_id, ::arrow::util::SecureString key); + ::arrow::util::SecureString GetKeyById(const std::string& key_id) override; private: - std::map key_map_; + std::map key_map_; }; class PARQUET_EXPORT HiddenColumnException : public ParquetException { @@ -113,8 +143,13 @@ class PARQUET_EXPORT ColumnEncryptionProperties { /// be encrypted with the footer key. /// keyBytes Key length must be either 16, 24 or 32 bytes. /// Caller is responsible for wiping out the input key array. + /// \deprecated "Deprecated in 22.0.0. Use key(arrow::util::SecureString) instead." + ARROW_DEPRECATED("Deprecated in 22.0.0. Use key(arrow::util::SecureString) instead.") Builder* key(std::string column_key); + /// \copydoc key(std::string) + Builder* key(::arrow::util::SecureString column_key); + /// Set a key retrieval metadata. /// use either key_metadata() or key_id(), not both Builder* key_metadata(std::string key_metadata); @@ -133,27 +168,28 @@ class PARQUET_EXPORT ColumnEncryptionProperties { private: std::string column_path_; bool encrypted_; - std::string key_; + ::arrow::util::SecureString key_; std::string key_metadata_; Builder(std::string path, bool encrypted) : column_path_(std::move(path)), encrypted_(encrypted) {} }; - std::string column_path() const { return column_path_; } + const std::string& column_path() const { return column_path_; } bool is_encrypted() const { return encrypted_; } bool is_encrypted_with_footer_key() const { return encrypted_with_footer_key_; } - std::string key() const { return key_; } - std::string key_metadata() const { return key_metadata_; } + const ::arrow::util::SecureString& key() const { return key_; } + const std::string& key_metadata() const { return key_metadata_; } private: std::string column_path_; bool encrypted_; bool encrypted_with_footer_key_; - std::string key_; + ::arrow::util::SecureString key_; std::string key_metadata_; explicit ColumnEncryptionProperties(bool encrypted, std::string column_path, - std::string key, std::string key_metadata); + ::arrow::util::SecureString key, + std::string key_metadata); }; class PARQUET_EXPORT ColumnDecryptionProperties { @@ -168,26 +204,27 @@ class PARQUET_EXPORT ColumnDecryptionProperties { /// key metadata for this column the metadata will be ignored, /// the column will be decrypted with this key. /// key length must be either 16, 24 or 32 bytes. - Builder* key(std::string key); + Builder* key(::arrow::util::SecureString key); std::shared_ptr build(); private: std::string column_path_; - std::string key_; + ::arrow::util::SecureString key_; }; - std::string column_path() const { return column_path_; } - std::string key() const { return key_; } + const std::string& column_path() const { return column_path_; } + const ::arrow::util::SecureString& key() const { return key_; } private: std::string column_path_; - std::string key_; + ::arrow::util::SecureString key_; /// This class is only required for setting explicit column decryption keys - /// to override key retriever (or to provide keys when key metadata and/or /// key retriever are not available) - explicit ColumnDecryptionProperties(std::string column_path, std::string key); + explicit ColumnDecryptionProperties(std::string column_path, + ::arrow::util::SecureString key); }; class PARQUET_EXPORT AADPrefixVerifier { @@ -222,8 +259,16 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. + /// \deprecated Deprecated since 22.0.0. + /// Use footer_key(arrow::util::SecureString) instead. + ARROW_DEPRECATED( + "Deprecated in 22.0.0. " + "Use footer_key(arrow::util::SecureString) instead.") Builder* footer_key(std::string footer_key); + /// \copydoc footer_key(std::string footer_key) + Builder* footer_key(::arrow::util::SecureString footer_key); + /// Set explicit column keys (decryption properties). /// Its also possible to set a key retriever on this property object. /// Upon file decryption, availability of explicit keys is checked before @@ -279,7 +324,7 @@ class PARQUET_EXPORT FileDecryptionProperties { } private: - std::string footer_key_; + ::arrow::util::SecureString footer_key_; std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; ColumnPathToDecryptionPropertiesMap column_decryption_properties_; @@ -289,11 +334,11 @@ class PARQUET_EXPORT FileDecryptionProperties { bool plaintext_files_allowed_; }; - std::string column_key(const std::string& column_path) const; + const ::arrow::util::SecureString& column_key(const std::string& column_path) const; - std::string footer_key() const { return footer_key_; } + const ::arrow::util::SecureString& footer_key() const { return footer_key_; } - std::string aad_prefix() const { return aad_prefix_; } + const std::string& aad_prefix() const { return aad_prefix_; } const std::shared_ptr& key_retriever() const { return key_retriever_; @@ -310,18 +355,17 @@ class PARQUET_EXPORT FileDecryptionProperties { } private: - std::string footer_key_; + ::arrow::util::SecureString footer_key_; std::string aad_prefix_; std::shared_ptr aad_prefix_verifier_; - ColumnPathToDecryptionPropertiesMap column_decryption_properties_; - std::shared_ptr key_retriever_; bool check_plaintext_footer_integrity_; bool plaintext_files_allowed_; FileDecryptionProperties( - std::string footer_key, std::shared_ptr key_retriever, + ::arrow::util::SecureString footer_key, + std::shared_ptr key_retriever, bool check_plaintext_footer_integrity, std::string aad_prefix, std::shared_ptr aad_prefix_verifier, ColumnPathToDecryptionPropertiesMap column_decryption_properties, @@ -332,10 +376,18 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: + /// \deprecated Deprecated since 22.0.0. Use Builder(arrow::util::SecureString) + /// instead. + ARROW_DEPRECATED( + "Deprecated in 22.0.0. " + "Use Builder(arrow::util::SecureString) instead") explicit Builder(std::string footer_key) + : Builder(::arrow::util::SecureString(std::move(footer_key))) {} + + explicit Builder(::arrow::util::SecureString footer_key) : parquet_cipher_(kDefaultEncryptionAlgorithm), - encrypted_footer_(kDefaultEncryptedFooter) { - footer_key_ = std::move(footer_key); + encrypted_footer_(kDefaultEncryptedFooter), + footer_key_(std::move(footer_key)) { store_aad_prefix_in_file_ = false; } @@ -382,7 +434,7 @@ class PARQUET_EXPORT FileEncryptionProperties { private: ParquetCipher::type parquet_cipher_; bool encrypted_footer_; - std::string footer_key_; + ::arrow::util::SecureString footer_key_; std::string footer_key_metadata_; std::string aad_prefix_; @@ -394,22 +446,22 @@ class PARQUET_EXPORT FileEncryptionProperties { EncryptionAlgorithm algorithm() const { return algorithm_; } - std::string footer_key() const { return footer_key_; } + const ::arrow::util::SecureString& footer_key() const { return footer_key_; } - std::string footer_key_metadata() const { return footer_key_metadata_; } + const std::string& footer_key_metadata() const { return footer_key_metadata_; } - std::string file_aad() const { return file_aad_; } + const std::string& file_aad() const { return file_aad_; } std::shared_ptr column_encryption_properties( const std::string& column_path); - ColumnPathToEncryptionPropertiesMap encrypted_columns() const { + const ColumnPathToEncryptionPropertiesMap& encrypted_columns() const { return encrypted_columns_; } private: EncryptionAlgorithm algorithm_; - std::string footer_key_; + ::arrow::util::SecureString footer_key_; std::string footer_key_metadata_; bool encrypted_footer_; std::string file_aad_; @@ -417,7 +469,8 @@ class PARQUET_EXPORT FileEncryptionProperties { bool store_aad_prefix_in_file_; ColumnPathToEncryptionPropertiesMap encrypted_columns_; - FileEncryptionProperties(ParquetCipher::type cipher, std::string footer_key, + FileEncryptionProperties(ParquetCipher::type cipher, + ::arrow::util::SecureString footer_key, std::string footer_key_metadata, bool encrypted_footer, std::string aad_prefix, bool store_aad_prefix_in_file, ColumnPathToEncryptionPropertiesMap encrypted_columns); diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index d88aa6c52ac..d7463590358 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -22,6 +22,8 @@ #include "parquet/encryption/file_key_unwrapper.h" #include "parquet/encryption/key_metadata.h" +using ::arrow::util::SecureString; + namespace parquet::encryption { FileKeyUnwrapper::FileKeyUnwrapper( @@ -67,7 +69,7 @@ FileKeyUnwrapper::FileKeyUnwrapper( kms_connection_config.key_access_token(), cache_entry_lifetime_seconds_); } -std::string FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { +SecureString FileKeyUnwrapper::GetKeyById(const std::string& key_metadata_bytes) { // key_metadata is expected to be in UTF8 encoding ::arrow::util::InitializeUTF8(); if (!::arrow::util::ValidateUTF8( @@ -106,17 +108,17 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma const std::string& master_key_id = key_material.master_key_id(); const std::string& encoded_wrapped_dek = key_material.wrapped_dek(); - std::string data_key; + SecureString data_key; if (!double_wrapping) { - data_key = kms_client->UnwrapKey(encoded_wrapped_dek, master_key_id); + data_key = kms_client->UnWrapKey(encoded_wrapped_dek, master_key_id); } else { // Get Key Encryption Key const std::string& encoded_kek_id = key_material.kek_id(); const std::string& encoded_wrapped_kek = key_material.wrapped_kek(); - std::string kek_bytes = kek_per_kek_id_->GetOrInsert( + const SecureString kek_bytes = kek_per_kek_id_->GetOrInsert( encoded_kek_id, [kms_client, encoded_wrapped_kek, master_key_id]() { - return kms_client->UnwrapKey(encoded_wrapped_kek, master_key_id); + return kms_client->UnWrapKey(encoded_wrapped_kek, master_key_id); }); // Decrypt the data key diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index 6147abbecd3..d674b5cf2ac 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -18,6 +18,7 @@ #pragma once #include "arrow/util/concurrent_map.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" #include "parquet/encryption/file_system_key_material_store.h" @@ -64,7 +65,7 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr key_material_store); /// Get the data key from key metadata - std::string GetKey(const std::string& key_metadata) override; + ::arrow::util::SecureString GetKeyById(const std::string& key_metadata_bytes) override; /// Get the data key along with the master key id from key material KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material); @@ -81,7 +82,8 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { const KeyMaterial& key_material); /// A map of Key Encryption Key (KEK) ID -> KEK bytes, for the current token - std::shared_ptr<::arrow::util::ConcurrentMap> kek_per_kek_id_; + std::shared_ptr<::arrow::util::ConcurrentMap> + kek_per_kek_id_; std::shared_ptr key_toolkit_owner_; KeyToolkit* key_toolkit_; KmsConnectionConfig kms_connection_config_; diff --git a/cpp/src/parquet/encryption/file_key_wrapper.cc b/cpp/src/parquet/encryption/file_key_wrapper.cc index 8ce563e60d7..fd870ed1f3b 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.cc +++ b/cpp/src/parquet/encryption/file_key_wrapper.cc @@ -22,6 +22,8 @@ #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/exception.h" +using ::arrow::util::SecureString; + namespace parquet::encryption { FileKeyWrapper::FileKeyWrapper(KeyToolkit* key_toolkit, @@ -49,7 +51,7 @@ FileKeyWrapper::FileKeyWrapper(KeyToolkit* key_toolkit, } } -std::string FileKeyWrapper::GetEncryptionKeyMetadata(const std::string& data_key, +std::string FileKeyWrapper::GetEncryptionKeyMetadata(const SecureString& data_key, const std::string& master_key_id, bool is_footer_key, std::string key_id_in_file) { @@ -70,7 +72,7 @@ std::string FileKeyWrapper::GetEncryptionKeyMetadata(const std::string& data_key }); // Encrypt DEK with KEK const std::string& aad = key_encryption_key.kek_id(); - const std::string& kek_bytes = key_encryption_key.kek_bytes(); + const SecureString& kek_bytes = key_encryption_key.kek_bytes(); encoded_wrapped_dek = internal::EncryptKeyLocally(data_key, kek_bytes, aad); encoded_kek_id = key_encryption_key.encoded_kek_id(); encoded_wrapped_kek = key_encryption_key.encoded_wrapped_kek(); @@ -111,8 +113,8 @@ std::string FileKeyWrapper::GetEncryptionKeyMetadata(const std::string& data_key KeyEncryptionKey FileKeyWrapper::CreateKeyEncryptionKey( const std::string& master_key_id) { - std::string kek_bytes(kKeyEncryptionKeyLength, '\0'); - RandBytes(reinterpret_cast(kek_bytes.data()), kKeyEncryptionKeyLength); + SecureString kek_bytes(kKeyEncryptionKeyLength, '\0'); + RandBytes(kek_bytes.as_span().data(), kKeyEncryptionKeyLength); std::string kek_id(kKeyEncryptionKeyIdLength, '\0'); RandBytes(reinterpret_cast(kek_id.data()), kKeyEncryptionKeyIdLength); diff --git a/cpp/src/parquet/encryption/file_key_wrapper.h b/cpp/src/parquet/encryption/file_key_wrapper.h index 26b9719de64..aa6d878bafe 100644 --- a/cpp/src/parquet/encryption/file_key_wrapper.h +++ b/cpp/src/parquet/encryption/file_key_wrapper.h @@ -61,7 +61,7 @@ class PARQUET_EXPORT FileKeyWrapper { /// When external key material is used, an identifier is usually generated automatically /// but may be specified explicitly to support key rotation, /// which requires keeping the same identifiers. - std::string GetEncryptionKeyMetadata(const std::string& data_key, + std::string GetEncryptionKeyMetadata(const ::arrow::util::SecureString& data_key, const std::string& master_key_id, bool is_footer_key, std::string key_id_in_file = ""); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index 715807b4267..efd1ec8067c 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -18,20 +18,23 @@ #include "parquet/encryption/internal_file_decryptor.h" #include "arrow/util/logging.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" #include "parquet/encryption/encryption_internal.h" #include "parquet/metadata.h" +using arrow::util::SecureString; + namespace parquet { // Decryptor Decryptor::Decryptor(std::unique_ptr aes_decryptor, - const std::string& key, const std::string& file_aad, - const std::string& aad, ::arrow::MemoryPool* pool) + SecureString key, std::string file_aad, std::string aad, + ::arrow::MemoryPool* pool) : aes_decryptor_(std::move(aes_decryptor)), - key_(key), - file_aad_(file_aad), - aad_(aad), + key_(std::move(key)), + file_aad_(std::move(file_aad)), + aad_(std::move(aad)), pool_(pool) {} Decryptor::~Decryptor() = default; @@ -46,7 +49,7 @@ int32_t Decryptor::CiphertextLength(int32_t plaintext_len) const { int32_t Decryptor::Decrypt(::arrow::util::span ciphertext, ::arrow::util::span plaintext) { - return aes_decryptor_->Decrypt(ciphertext, str2span(key_), str2span(aad_), plaintext); + return aes_decryptor_->Decrypt(ciphertext, key_.as_span(), str2span(aad_), plaintext); } // InternalFileDecryptor @@ -60,36 +63,35 @@ InternalFileDecryptor::InternalFileDecryptor( footer_key_metadata_(footer_key_metadata), pool_(pool) {} -std::string InternalFileDecryptor::GetFooterKey() { +const SecureString& InternalFileDecryptor::GetFooterKey() { std::unique_lock lock(mutex_); if (!footer_key_.empty()) { return footer_key_; } - std::string footer_key = properties_->footer_key(); + // cache footer key to avoid repeated retrieval of key from the key_retriever + footer_key_ = properties_->footer_key(); // ignore footer key metadata if footer key is explicitly set via API - if (footer_key.empty()) { + if (footer_key_.empty()) { if (footer_key_metadata_.empty()) throw ParquetException("No footer key or key metadata"); if (properties_->key_retriever() == nullptr) throw ParquetException("No footer key or key retriever"); try { - footer_key = properties_->key_retriever()->GetKey(footer_key_metadata_); + footer_key_ = properties_->key_retriever()->GetKeyById(footer_key_metadata_); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "Footer key: access denied " << e.what() << "\n"; throw ParquetException(ss.str()); } } - if (footer_key.empty()) { + if (footer_key_.empty()) { throw ParquetException( "Footer key unavailable. Could not verify " "plaintext footer metadata"); } - // cache footer key to avoid repeated retrieval of key from the key_retriever - footer_key_ = footer_key; - return footer_key; + return footer_key_; } std::unique_ptr InternalFileDecryptor::GetFooterDecryptor() { @@ -99,7 +101,7 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor() { std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( const std::string& aad, bool metadata) { - std::string footer_key = GetFooterKey(); + const SecureString& footer_key = GetFooterKey(); auto key_len = static_cast(footer_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); @@ -107,23 +109,23 @@ std::unique_ptr InternalFileDecryptor::GetFooterDecryptor( pool_); } -std::string InternalFileDecryptor::GetColumnKey(const std::string& column_path, - const std::string& column_key_metadata) { - std::string column_key = properties_->column_key(column_path); +SecureString InternalFileDecryptor::GetColumnKey(const std::string& column_path, + const std::string& column_key_metadata) { + SecureString column_key = properties_->column_key(column_path); // No explicit column key given via API. Retrieve via key metadata. if (column_key.empty() && !column_key_metadata.empty() && properties_->key_retriever() != nullptr) { try { - column_key = properties_->key_retriever()->GetKey(column_key_metadata); + column_key = properties_->key_retriever()->GetKeyById(column_key_metadata); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; throw HiddenColumnException(ss.str()); } - } - if (column_key.empty()) { - throw HiddenColumnException("HiddenColumnException, path=" + column_path); + if (column_key.empty()) { + throw HiddenColumnException("HiddenColumnException, path=" + column_path); + } } return column_key; } @@ -131,7 +133,7 @@ std::string InternalFileDecryptor::GetColumnKey(const std::string& column_path, std::unique_ptr InternalFileDecryptor::GetColumnDecryptor( const std::string& column_path, const std::string& column_key_metadata, const std::string& aad, bool metadata) { - std::string column_key = GetColumnKey(column_path, column_key_metadata); + const SecureString& column_key = GetColumnKey(column_path, column_key_metadata); auto key_len = static_cast(column_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); return std::make_unique(std::move(aes_decryptor), column_key, file_aad_, aad, @@ -148,9 +150,9 @@ InternalFileDecryptor::GetColumnDecryptorFactory( // The column is encrypted with its own key const std::string& column_key_metadata = crypto_metadata->key_metadata(); const std::string column_path = crypto_metadata->path_in_schema()->ToDotString(); - std::string column_key = GetColumnKey(column_path, column_key_metadata); + const SecureString& column_key = GetColumnKey(column_path, column_key_metadata); - return [this, aad, metadata, column_key = std::move(column_key)]() { + return [this, aad, metadata, column_key = column_key]() { auto key_len = static_cast(column_key.size()); auto aes_decryptor = encryption::AesDecryptor::Make(algorithm_, key_len, metadata); return std::make_unique(std::move(aes_decryptor), column_key, file_aad_, diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.h b/cpp/src/parquet/encryption/internal_file_decryptor.h index cc0e315e029..a365b4df4bf 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.h +++ b/cpp/src/parquet/encryption/internal_file_decryptor.h @@ -22,6 +22,7 @@ #include #include +#include "arrow/util/secure_string.h" #include "parquet/schema.h" namespace parquet { @@ -32,6 +33,7 @@ class AesEncryptor; } // namespace encryption class ColumnCryptoMetaData; +class DecryptionKeyRetriever; class FileDecryptionProperties; // An object handling decryption using well-known encryption parameters @@ -39,8 +41,8 @@ class FileDecryptionProperties; // CAUTION: Decryptor objects are not thread-safe. class PARQUET_EXPORT Decryptor { public: - Decryptor(std::unique_ptr decryptor, const std::string& key, - const std::string& file_aad, const std::string& aad, + Decryptor(std::unique_ptr decryptor, + ::arrow::util::SecureString key, std::string file_aad, std::string aad, ::arrow::MemoryPool* pool); ~Decryptor(); @@ -55,7 +57,7 @@ class PARQUET_EXPORT Decryptor { private: std::unique_ptr aes_decryptor_; - std::string key_; + ::arrow::util::SecureString key_; std::string file_aad_; std::string aad_; ::arrow::MemoryPool* pool_; @@ -71,7 +73,7 @@ class InternalFileDecryptor { const std::string& file_aad() const { return file_aad_; } - std::string GetFooterKey(); + const ::arrow::util::SecureString& GetFooterKey(); ParquetCipher::type algorithm() const { return algorithm_; } @@ -127,10 +129,14 @@ class InternalFileDecryptor { // Protects footer_key_ updates std::mutex mutex_; - std::string footer_key_; + ::arrow::util::SecureString footer_key_; - std::string GetColumnKey(const std::string& column_path, - const std::string& column_key_metadata); + ::arrow::util::SecureString GetColumnKey(const std::string& column_path, + const std::string& column_key_metadata); + + static ::arrow::util::SecureString RetrieveColumnKeyIfEmpty( + ::arrow::util::SecureString column_key, const std::string& column_key_metadata, + const std::shared_ptr& key_retriever); std::unique_ptr GetFooterDecryptor(const std::string& aad, bool metadata); diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.cc b/cpp/src/parquet/encryption/internal_file_encryptor.cc index 9210ffba9cc..3623aa05c66 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_encryptor.cc @@ -16,19 +16,21 @@ // under the License. #include "parquet/encryption/internal_file_encryptor.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" #include "parquet/encryption/encryption_internal.h" +using arrow::util::SecureString; + namespace parquet { // Encryptor -Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& key, - const std::string& file_aad, const std::string& aad, - ::arrow::MemoryPool* pool) +Encryptor::Encryptor(encryption::AesEncryptor* aes_encryptor, SecureString key, + std::string file_aad, std::string aad, ::arrow::MemoryPool* pool) : aes_encryptor_(aes_encryptor), - key_(key), - file_aad_(file_aad), - aad_(aad), + key_(std::move(key)), + file_aad_(std::move(file_aad)), + aad_(std::move(aad)), pool_(pool) {} int32_t Encryptor::CiphertextLength(int64_t plaintext_len) const { @@ -37,7 +39,7 @@ int32_t Encryptor::CiphertextLength(int64_t plaintext_len) const { int32_t Encryptor::Encrypt(::arrow::util::span plaintext, ::arrow::util::span ciphertext) { - return aes_encryptor_->Encrypt(plaintext, str2span(key_), str2span(aad_), ciphertext); + return aes_encryptor_->Encrypt(plaintext, key_.as_span(), str2span(aad_), ciphertext); } // InternalFileEncryptor @@ -52,7 +54,7 @@ std::shared_ptr InternalFileEncryptor::GetFooterEncryptor() { ParquetCipher::type algorithm = properties_->algorithm().algorithm; std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); - std::string footer_key = properties_->footer_key(); + const SecureString& footer_key = properties_->footer_key(); auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_key.size()); footer_encryptor_ = std::make_shared( aes_encryptor, footer_key, properties_->file_aad(), footer_aad, pool_); @@ -66,7 +68,7 @@ std::shared_ptr InternalFileEncryptor::GetFooterSigningEncryptor() { ParquetCipher::type algorithm = properties_->algorithm().algorithm; std::string footer_aad = encryption::CreateFooterAad(properties_->file_aad()); - std::string footer_signing_key = properties_->footer_key(); + const SecureString& footer_signing_key = properties_->footer_key(); auto aes_encryptor = GetMetaAesEncryptor(algorithm, footer_signing_key.size()); footer_signing_encryptor_ = std::make_shared( aes_encryptor, footer_signing_key, properties_->file_aad(), footer_aad, pool_); @@ -101,12 +103,9 @@ InternalFileEncryptor::InternalFileEncryptor::GetColumnEncryptor( return nullptr; } - std::string key; - if (column_prop->is_encrypted_with_footer_key()) { - key = properties_->footer_key(); - } else { - key = column_prop->key(); - } + const SecureString& key = column_prop->is_encrypted_with_footer_key() + ? properties_->footer_key() + : column_prop->key(); ParquetCipher::type algorithm = properties_->algorithm().algorithm; auto aes_encryptor = metadata ? GetMetaAesEncryptor(algorithm, key.size()) diff --git a/cpp/src/parquet/encryption/internal_file_encryptor.h b/cpp/src/parquet/encryption/internal_file_encryptor.h index a7108ab66f6..ee15fe32de9 100644 --- a/cpp/src/parquet/encryption/internal_file_encryptor.h +++ b/cpp/src/parquet/encryption/internal_file_encryptor.h @@ -36,9 +36,8 @@ class ColumnEncryptionProperties; class PARQUET_EXPORT Encryptor { public: - Encryptor(encryption::AesEncryptor* aes_encryptor, const std::string& key, - const std::string& file_aad, const std::string& aad, - ::arrow::MemoryPool* pool); + Encryptor(encryption::AesEncryptor* aes_encryptor, ::arrow::util::SecureString key, + std::string file_aad, std::string aad, ::arrow::MemoryPool* pool); const std::string& file_aad() { return file_aad_; } void UpdateAad(const std::string& aad) { aad_ = aad; } ::arrow::MemoryPool* pool() { return pool_; } @@ -62,7 +61,7 @@ class PARQUET_EXPORT Encryptor { private: encryption::AesEncryptor* aes_encryptor_; - std::string key_; + ::arrow::util::SecureString key_; std::string file_aad_; std::string aad_; ::arrow::MemoryPool* pool_; diff --git a/cpp/src/parquet/encryption/key_encryption_key.h b/cpp/src/parquet/encryption/key_encryption_key.h index 62263ee3cd5..1157937632a 100644 --- a/cpp/src/parquet/encryption/key_encryption_key.h +++ b/cpp/src/parquet/encryption/key_encryption_key.h @@ -21,6 +21,7 @@ #include #include "arrow/util/base64.h" +#include "arrow/util/secure_string.h" namespace parquet::encryption { @@ -32,14 +33,14 @@ namespace parquet::encryption { // locally, and does not involve an interaction with a KMS server. class KeyEncryptionKey { public: - KeyEncryptionKey(std::string kek_bytes, std::string kek_id, + KeyEncryptionKey(::arrow::util::SecureString kek_bytes, std::string kek_id, std::string encoded_wrapped_kek) : kek_bytes_(std::move(kek_bytes)), kek_id_(std::move(kek_id)), encoded_kek_id_(::arrow::util::base64_encode(kek_id_)), encoded_wrapped_kek_(std::move(encoded_wrapped_kek)) {} - const std::string& kek_bytes() const { return kek_bytes_; } + const ::arrow::util::SecureString& kek_bytes() const { return kek_bytes_; } const std::string& kek_id() const { return kek_id_; } @@ -48,7 +49,7 @@ class KeyEncryptionKey { const std::string& encoded_wrapped_kek() const { return encoded_wrapped_kek_; } private: - std::string kek_bytes_; + ::arrow::util::SecureString kek_bytes_; std::string kek_id_; std::string encoded_kek_id_; std::string encoded_wrapped_kek_; diff --git a/cpp/src/parquet/encryption/key_management_test.cc b/cpp/src/parquet/encryption/key_management_test.cc index 2e43edee530..ed6d15dbb6a 100644 --- a/cpp/src/parquet/encryption/key_management_test.cc +++ b/cpp/src/parquet/encryption/key_management_test.cc @@ -46,8 +46,8 @@ class TestEncryptionKeyManagement : public ::testing::Test { FileEncryptor encryptor_; FileDecryptor decryptor_; - std::unordered_map key_list_; - std::unordered_map new_key_list_; + std::unordered_map key_list_; + std::unordered_map new_key_list_; std::string column_key_mapping_; KmsConnectionConfig kms_connection_config_; CryptoFactory crypto_factory_; diff --git a/cpp/src/parquet/encryption/key_toolkit.h b/cpp/src/parquet/encryption/key_toolkit.h index 339692a99a3..a0b929877ee 100644 --- a/cpp/src/parquet/encryption/key_toolkit.h +++ b/cpp/src/parquet/encryption/key_toolkit.h @@ -49,7 +49,7 @@ class PARQUET_EXPORT KeyToolkit { /// Key encryption key two level cache for unwrapping: token -> KeyEncryptionKeyId -> /// KeyEncryptionKeyBytes - TwoLevelCacheWithExpiration& kek_read_cache_per_token() { + TwoLevelCacheWithExpiration<::arrow::util::SecureString>& kek_read_cache_per_token() { return key_encryption_key_read_cache_; } @@ -82,7 +82,7 @@ class PARQUET_EXPORT KeyToolkit { private: TwoLevelCacheWithExpiration> kms_client_cache_; TwoLevelCacheWithExpiration key_encryption_key_write_cache_; - TwoLevelCacheWithExpiration key_encryption_key_read_cache_; + TwoLevelCacheWithExpiration<::arrow::util::SecureString> key_encryption_key_read_cache_; std::shared_ptr kms_client_factory_; mutable ::arrow::util::Mutex last_cache_clean_for_key_rotation_time_mutex_; internal::TimePoint last_cache_clean_for_key_rotation_time_; @@ -92,15 +92,15 @@ class PARQUET_EXPORT KeyToolkit { // parsing from "key material" class PARQUET_EXPORT KeyWithMasterId { public: - KeyWithMasterId(std::string key_bytes, std::string master_id) + KeyWithMasterId(::arrow::util::SecureString key_bytes, std::string master_id) : key_bytes_(std::move(key_bytes)), master_id_(std::move(master_id)) {} - const std::string& data_key() const { return key_bytes_; } + const ::arrow::util::SecureString& data_key() const { return key_bytes_; } const std::string& master_id() const { return master_id_; } private: - const std::string key_bytes_; - const std::string master_id_; + ::arrow::util::SecureString key_bytes_; + std::string master_id_; }; } // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.cc b/cpp/src/parquet/encryption/key_toolkit_internal.cc index 89a52a2bcd6..60a8a52206c 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.cc +++ b/cpp/src/parquet/encryption/key_toolkit_internal.cc @@ -16,18 +16,21 @@ // under the License. #include "arrow/util/base64.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption_internal.h" #include "parquet/encryption/key_toolkit_internal.h" +using arrow::util::SecureString; + namespace parquet::encryption::internal { // Acceptable key lengths in number of bits, used to validate the data key lengths // configured by users and the master key lengths fetched from KMS server. static constexpr const int32_t kAcceptableDataKeyLengths[] = {128, 192, 256}; -std::string EncryptKeyLocally(const std::string& key_bytes, const std::string& master_key, - const std::string& aad) { +std::string EncryptKeyLocally(const SecureString& key_bytes, + const SecureString& master_key, const std::string& aad) { AesEncryptor key_encryptor(ParquetCipher::AES_GCM_V1, static_cast(master_key.size()), false, false /*write_length*/); @@ -38,15 +41,15 @@ std::string EncryptKeyLocally(const std::string& key_bytes, const std::string& m ::arrow::util::span encrypted_key_span( reinterpret_cast(&encrypted_key[0]), encrypted_key_len); - encrypted_key_len = key_encryptor.Encrypt(str2span(key_bytes), str2span(master_key), + encrypted_key_len = key_encryptor.Encrypt(key_bytes.as_span(), master_key.as_span(), str2span(aad), encrypted_key_span); return ::arrow::util::base64_encode( ::std::string_view(encrypted_key.data(), encrypted_key_len)); } -std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, - const std::string& master_key, const std::string& aad) { +SecureString DecryptKeyLocally(const std::string& encoded_encrypted_key, + const SecureString& master_key, const std::string& aad) { std::string encrypted_key = ::arrow::util::base64_decode(encoded_encrypted_key); AesDecryptor key_decryptor(ParquetCipher::AES_GCM_V1, @@ -55,12 +58,10 @@ std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, int32_t decrypted_key_len = key_decryptor.PlaintextLength(static_cast(encrypted_key.size())); - std::string decrypted_key(decrypted_key_len, '\0'); - ::arrow::util::span decrypted_key_span( - reinterpret_cast(&decrypted_key[0]), decrypted_key_len); + SecureString decrypted_key(decrypted_key_len, '\0'); - decrypted_key_len = key_decryptor.Decrypt(str2span(encrypted_key), str2span(master_key), - str2span(aad), decrypted_key_span); + decrypted_key_len = key_decryptor.Decrypt(str2span(encrypted_key), master_key.as_span(), + str2span(aad), decrypted_key.as_span()); return decrypted_key; } diff --git a/cpp/src/parquet/encryption/key_toolkit_internal.h b/cpp/src/parquet/encryption/key_toolkit_internal.h index 8474a91fc1a..d3a75cb8495 100644 --- a/cpp/src/parquet/encryption/key_toolkit_internal.h +++ b/cpp/src/parquet/encryption/key_toolkit_internal.h @@ -19,19 +19,22 @@ #include +#include "arrow/util/secure_string.h" #include "parquet/platform.h" namespace parquet::encryption::internal { /// Encrypts "key" with "master_key", using AES-GCM and the "aad" PARQUET_EXPORT -std::string EncryptKeyLocally(const std::string& key, const std::string& master_key, +std::string EncryptKeyLocally(const ::arrow::util::SecureString& key, + const ::arrow::util::SecureString& master_key, const std::string& aad); /// Decrypts encrypted key with "master_key", using AES-GCM and the "aad" PARQUET_EXPORT -std::string DecryptKeyLocally(const std::string& encoded_encrypted_key, - const std::string& master_key, const std::string& aad); +::arrow::util::SecureString DecryptKeyLocally( + const std::string& encoded_encrypted_key, + const ::arrow::util::SecureString& master_key, const std::string& aad); PARQUET_EXPORT bool ValidateKeyLength(int32_t key_length_bits); diff --git a/cpp/src/parquet/encryption/key_wrapping_test.cc b/cpp/src/parquet/encryption/key_wrapping_test.cc index 198ceb9bf4b..4ff3903fb7e 100644 --- a/cpp/src/parquet/encryption/key_wrapping_test.cc +++ b/cpp/src/parquet/encryption/key_wrapping_test.cc @@ -86,14 +86,14 @@ class KeyWrappingTest : public ::testing::Test { FileKeyUnwrapper unwrapper(&key_toolkit, kms_connection_config_, cache_entry_lifetime_seconds, readable_file_path, file_system); - std::string footer_key = unwrapper.GetKey(key_metadata_json_footer); + SecureString footer_key = unwrapper.GetKeyById(key_metadata_json_footer); ASSERT_EQ(footer_key, kFooterEncryptionKey); - std::string column_key = unwrapper.GetKey(key_metadata_json_column); + SecureString column_key = unwrapper.GetKeyById(key_metadata_json_column); ASSERT_EQ(column_key, kColumnEncryptionKey1); } - std::unordered_map key_list_; + std::unordered_map key_list_; KmsConnectionConfig kms_connection_config_; }; diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index ef363d9c2cd..09133439da3 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -22,6 +22,7 @@ #include #include "arrow/util/mutex.h" +#include "arrow/util/secure_string.h" #include "parquet/exception.h" #include "parquet/platform.h" @@ -79,14 +80,47 @@ class PARQUET_EXPORT KmsClient { static constexpr const char kKmsInstanceUrlDefault[] = "DEFAULT"; static constexpr const char kKeyAccessTokenDefault[] = "DEFAULT"; - /// Wraps a key - encrypts it with the master key, encodes the result + /// \brief Wraps a key. + /// + /// Encrypts it with the master key, encodes the result /// and potentially adds a KMS-specific metadata. + /// + /// \deprecated Deprecated since 22.0.0. Implement + /// WrapKey(const SecureString&, const std::string&) instead. + ARROW_DEPRECATED( + "Deprecated in 22.0.0. " + "Implement WrapKey(const SecureString&, const std::string&) instead.") virtual std::string WrapKey(const std::string& key_bytes, - const std::string& master_key_identifier) = 0; + const std::string& master_key_identifier) { + throw ParquetException("Not implemented"); + } + + /// \copydoc WrapKey(const std::string&, const std::string&) + virtual std::string WrapKey(const ::arrow::util::SecureString& key_bytes, + const std::string& master_key_identifier) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = WrapKey(std::string(key_bytes.as_view()), master_key_identifier); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } - /// Decrypts (unwraps) a key with the master key. + /// \brief Decrypts (unwraps) a key with the master key. + /// \deprecated Deprecated since 22.0.0. Implement UnWrapKey instead. + ARROW_DEPRECATED("Deprecated in 22.0.0. Implement UnWrapKey instead.") virtual std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) = 0; + const std::string& master_key_identifier) { + throw ParquetException("Not implemented"); + } + + /// \copydoc UnwrapKey(const std::string&, const std::string&) + virtual ::arrow::util::SecureString UnWrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) { + ARROW_SUPPRESS_DEPRECATION_WARNING + auto key = ::arrow::util::SecureString(UnwrapKey(wrapped_key, master_key_identifier)); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + return key; + } + virtual ~KmsClient() {} }; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 23e28bb8e61..6bd80479a03 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -17,6 +17,7 @@ #include "arrow/json/object_parser.h" #include "arrow/json/object_writer.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/encryption/local_wrap_kms_client.h" @@ -24,6 +25,7 @@ using ::arrow::json::internal::ObjectParser; using ::arrow::json::internal::ObjectWriter; +using ::arrow::util::SecureString; namespace parquet::encryption { @@ -69,10 +71,10 @@ LocalWrapKmsClient::LocalWrapKmsClient(const KmsConnectionConfig& kms_connection master_key_cache_.Clear(); } -std::string LocalWrapKmsClient::WrapKey(const std::string& key_bytes, +std::string LocalWrapKmsClient::WrapKey(const SecureString& key_bytes, const std::string& master_key_identifier) { const auto master_key = master_key_cache_.GetOrInsert( - master_key_identifier, [this, master_key_identifier]() -> std::string { + master_key_identifier, [this, master_key_identifier]() -> SecureString { return this->GetKeyFromServer(master_key_identifier); }); const auto& aad = master_key_identifier; @@ -82,8 +84,8 @@ std::string LocalWrapKmsClient::WrapKey(const std::string& key_bytes, return LocalKeyWrap::CreateSerialized(encrypted_encoded_key); } -std::string LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) { +SecureString LocalWrapKmsClient::UnWrapKey(const std::string& wrapped_key, + const std::string& master_key_identifier) { LocalKeyWrap key_wrap = LocalKeyWrap::Parse(wrapped_key); const std::string& master_key_version = key_wrap.master_key_version(); if (kLocalWrapNoKeyVersion != master_key_version) { @@ -91,8 +93,8 @@ std::string LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, master_key_version); } const std::string& encrypted_encoded_key = key_wrap.encrypted_encoded_key(); - const std::string master_key = master_key_cache_.GetOrInsert( - master_key_identifier, [this, master_key_identifier]() -> std::string { + const SecureString& master_key = master_key_cache_.GetOrInsert( + master_key_identifier, [this, master_key_identifier]() -> const SecureString& { return this->GetKeyFromServer(master_key_identifier); }); const std::string& aad = master_key_identifier; @@ -100,8 +102,9 @@ std::string LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, return internal::DecryptKeyLocally(encrypted_encoded_key, master_key, aad); } -std::string LocalWrapKmsClient::GetKeyFromServer(const std::string& key_identifier) { - std::string master_key = GetMasterKeyFromServer(key_identifier); +const SecureString& LocalWrapKmsClient::GetKeyFromServer( + const std::string& key_identifier) { + const SecureString& master_key = GetMasterKeyFromServer(key_identifier); int32_t key_length_bits = static_cast(master_key.size() * 8); if (!internal::ValidateKeyLength(key_length_bits)) { std::ostringstream ss; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 3c90d829605..7eedbaaf77e 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -35,16 +35,16 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { explicit LocalWrapKmsClient(const KmsConnectionConfig& kms_connection_config); - std::string WrapKey(const std::string& key_bytes, + std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) override; + ::arrow::util::SecureString UnWrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) override; protected: /// Get master key from the remote KMS server. /// Note: this function might be called by multiple threads - virtual std::string GetMasterKeyFromServer( + virtual const ::arrow::util::SecureString& GetMasterKeyFromServer( const std::string& master_key_identifier) = 0; private: @@ -84,11 +84,12 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { std::string master_key_version_; }; - std::string GetKeyFromServer(const std::string& key_identifier); + const ::arrow::util::SecureString& GetKeyFromServer(const std::string& key_identifier); protected: KmsConnectionConfig kms_connection_config_; - ::arrow::util::ConcurrentMap master_key_cache_; + ::arrow::util::ConcurrentMap + master_key_cache_; }; } // namespace parquet::encryption diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 1ceda7ac032..3f39cc8eb64 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -224,9 +224,9 @@ TEST(TestDecryptionProperties, UseKeyRetriever) { std::shared_ptr props = builder.build(); auto out_key_retriever = props->key_retriever(); - ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKey("kf")); - ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKey("kc1")); - ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKey("kc2")); + ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKeyById("kf")); + ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKeyById("kc1")); + ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKeyById("kc2")); } TEST(TestDecryptionProperties, SupplyAadPrefix) { diff --git a/cpp/src/parquet/encryption/read_configurations_test.cc b/cpp/src/parquet/encryption/read_configurations_test.cc index 61a1296e86b..15c1e7df736 100644 --- a/cpp/src/parquet/encryption/read_configurations_test.cc +++ b/cpp/src/parquet/encryption/read_configurations_test.cc @@ -98,15 +98,15 @@ class TestDecryptionConfiguration protected: FileDecryptor decryptor_; - std::string path_to_double_field_ = kDoubleFieldName; - std::string path_to_float_field_ = kFloatFieldName; + const std::string path_to_double_field_ = kDoubleFieldName; + const std::string path_to_float_field_ = kFloatFieldName; // This vector will hold various decryption configurations. std::vector> vector_of_decryption_configurations_; - std::string kFooterEncryptionKey_ = std::string(kFooterEncryptionKey); - std::string kColumnEncryptionKey1_ = std::string(kColumnEncryptionKey1); - std::string kColumnEncryptionKey2_ = std::string(kColumnEncryptionKey2); - std::string kFileName_ = std::string(kFileName); + const SecureString kFooterEncryptionKey_ = kFooterEncryptionKey; + const SecureString kColumnEncryptionKey1_ = kColumnEncryptionKey1; + const SecureString kColumnEncryptionKey2_ = kColumnEncryptionKey2; + const std::string kFileName_ = std::string(kFileName); void CreateDecryptionConfigurations() { /********************************************************************************** diff --git a/cpp/src/parquet/encryption/test_encryption_util.cc b/cpp/src/parquet/encryption/test_encryption_util.cc index 1864e86f34a..86951f8264e 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.cc +++ b/cpp/src/parquet/encryption/test_encryption_util.cc @@ -49,17 +49,21 @@ std::string data_file(const char* file) { return ss.str(); } -std::unordered_map BuildKeyMap(const char* const* column_ids, - const char* const* column_keys, - const char* footer_id, - const char* footer_key) { - std::unordered_map key_map; +std::unordered_map BuildKeyMap(const char* const* column_ids, + const char* const* column_keys, + const char* footer_id, + const char* footer_key) { + std::unordered_map key_map; // add column keys for (int i = 0; i < 6; i++) { - key_map.insert({column_ids[i], column_keys[i]}); + // this is not safe to do as column_keys[i] is not protected by SecureString + // do not do outside test code + key_map.insert({column_ids[i], SecureString(column_keys[i])}); } // add footer key - key_map.insert({footer_id, footer_key}); + // this is not safe to do as footer_key[i] is not protected by SecureString + // do not do outside test code + key_map.insert({footer_id, SecureString(footer_key)}); return key_map; } diff --git a/cpp/src/parquet/encryption/test_encryption_util.h b/cpp/src/parquet/encryption/test_encryption_util.h index 9bfc774278d..43ac25744bc 100644 --- a/cpp/src/parquet/encryption/test_encryption_util.h +++ b/cpp/src/parquet/encryption/test_encryption_util.h @@ -31,6 +31,7 @@ #include "arrow/filesystem/localfs.h" #include "arrow/status.h" #include "arrow/util/io_util.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/encryption.h" #include "parquet/test_util.h" @@ -40,12 +41,13 @@ class ParquetFileReader; namespace encryption::test { using ::arrow::internal::TemporaryDir; +using ::arrow::util::SecureString; constexpr int kFixedLength = 10; -const char kFooterEncryptionKey[] = "0123456789012345"; // 128bit/16 -const char kColumnEncryptionKey1[] = "1234567890123450"; -const char kColumnEncryptionKey2[] = "1234567890123451"; +const SecureString kFooterEncryptionKey("0123456789012345"); +const SecureString kColumnEncryptionKey1("1234567890123450"); +const SecureString kColumnEncryptionKey2("1234567890123451"); const char kFileName[] = "tester"; // Get the path of file inside parquet test data directory @@ -82,10 +84,10 @@ const char* const kNewColumnMasterKeys[] = {"9234567890123450", "923456789012345 // The result of this function will be used to set into TestOnlyInMemoryKmsClientFactory // as the key mapping to look at. -std::unordered_map BuildKeyMap(const char* const* column_ids, - const char* const* column_keys, - const char* footer_id, - const char* footer_key); +std::unordered_map BuildKeyMap(const char* const* column_ids, + const char* const* column_keys, + const char* footer_id, + const char* footer_key); // The result of this function will be used to set into EncryptionConfiguration // as column keys. diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index e1339ab48b5..969d6df858c 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -16,22 +16,25 @@ // under the License. #include "arrow/util/base64.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/key_toolkit_internal.h" #include "parquet/encryption/test_in_memory_kms.h" #include "parquet/exception.h" +using arrow::util::SecureString; + namespace parquet::encryption { -std::unordered_map +std::unordered_map TestOnlyLocalWrapInMemoryKms::master_key_map_; -std::unordered_map +std::unordered_map TestOnlyInServerWrapKms::unwrapping_master_key_map_; -std::unordered_map +std::unordered_map TestOnlyInServerWrapKms::wrapping_master_key_map_; void TestOnlyLocalWrapInMemoryKms::InitializeMasterKeys( - const std::unordered_map& master_keys_map) { + const std::unordered_map& master_keys_map) { master_key_map_ = master_keys_map; } @@ -39,20 +42,20 @@ TestOnlyLocalWrapInMemoryKms::TestOnlyLocalWrapInMemoryKms( const KmsConnectionConfig& kms_connection_config) : LocalWrapKmsClient(kms_connection_config) {} -std::string TestOnlyLocalWrapInMemoryKms::GetMasterKeyFromServer( +const SecureString& TestOnlyLocalWrapInMemoryKms::GetMasterKeyFromServer( const std::string& master_key_identifier) { // Always return the latest key version return master_key_map_.at(master_key_identifier); } void TestOnlyInServerWrapKms::InitializeMasterKeys( - const std::unordered_map& master_keys_map) { + const std::unordered_map& master_keys_map) { unwrapping_master_key_map_ = master_keys_map; wrapping_master_key_map_ = unwrapping_master_key_map_; } void TestOnlyInServerWrapKms::StartKeyRotation( - const std::unordered_map& new_master_key_map) { + const std::unordered_map& new_master_key_map) { if (new_master_key_map.empty()) { throw ParquetException("No encryption key list"); } @@ -63,32 +66,32 @@ void TestOnlyInServerWrapKms::FinishKeyRotation() { unwrapping_master_key_map_ = wrapping_master_key_map_; } -std::string TestOnlyInServerWrapKms::WrapKey(const std::string& key_bytes, +std::string TestOnlyInServerWrapKms::WrapKey(const SecureString& key_bytes, const std::string& master_key_identifier) { // Always use the latest key version for writing if (wrapping_master_key_map_.find(master_key_identifier) == wrapping_master_key_map_.end()) { throw ParquetException("Key not found: " + master_key_identifier); } - const std::string& master_key = wrapping_master_key_map_.at(master_key_identifier); + const SecureString& master_key = wrapping_master_key_map_.at(master_key_identifier); std::string aad = master_key_identifier; return internal::EncryptKeyLocally(key_bytes, master_key, aad); } -std::string TestOnlyInServerWrapKms::UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) { +SecureString TestOnlyInServerWrapKms::UnWrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) { if (unwrapping_master_key_map_.find(master_key_identifier) == unwrapping_master_key_map_.end()) { throw ParquetException("Key not found: " + master_key_identifier); } - const std::string& master_key = unwrapping_master_key_map_.at(master_key_identifier); + const SecureString& master_key = unwrapping_master_key_map_.at(master_key_identifier); std::string aad = master_key_identifier; return internal::DecryptKeyLocally(wrapped_key, master_key, aad); } -std::string TestOnlyInServerWrapKms::GetMasterKeyFromServer( +SecureString TestOnlyInServerWrapKms::GetMasterKeyFromServer( const std::string& master_key_identifier) { // Always return the latest key version return wrapping_master_key_map_.at(master_key_identifier); diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index c5fdc797b8c..df63984546a 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -34,13 +34,15 @@ class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient { explicit TestOnlyLocalWrapInMemoryKms(const KmsConnectionConfig& kms_connection_config); static void InitializeMasterKeys( - const std::unordered_map& master_keys_map); + const std::unordered_map& + master_keys_map); protected: - std::string GetMasterKeyFromServer(const std::string& master_key_identifier) override; + const ::arrow::util::SecureString& GetMasterKeyFromServer( + const std::string& master_key_identifier) override; private: - static std::unordered_map master_key_map_; + static std::unordered_map master_key_map_; }; // This is a mock class, built for testing only. Don't use it as an example of KmsClient @@ -48,25 +50,30 @@ class TestOnlyLocalWrapInMemoryKms : public LocalWrapKmsClient { class TestOnlyInServerWrapKms : public KmsClient { public: static void InitializeMasterKeys( - const std::unordered_map& master_keys_map); + const std::unordered_map& + master_keys_map); - std::string WrapKey(const std::string& key_bytes, + std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) override; + ::arrow::util::SecureString UnWrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) override; static void StartKeyRotation( - const std::unordered_map& new_master_keys_map); + const std::unordered_map& + new_master_keys_map); static void FinishKeyRotation(); private: - std::string GetMasterKeyFromServer(const std::string& master_key_identifier); + ::arrow::util::SecureString GetMasterKeyFromServer( + const std::string& master_key_identifier); // Different wrapping and unwrapping key maps to imitate versioning // and support key rotation. - static std::unordered_map unwrapping_master_key_map_; - static std::unordered_map wrapping_master_key_map_; + static std::unordered_map + unwrapping_master_key_map_; + static std::unordered_map + wrapping_master_key_map_; }; // This is a mock class, built for testing only. Don't use it as an example of @@ -75,7 +82,7 @@ class TestOnlyInMemoryKmsClientFactory : public KmsClientFactory { public: TestOnlyInMemoryKmsClientFactory( bool wrap_locally, - const std::unordered_map& master_keys_map) + const std::unordered_map& master_keys_map) : KmsClientFactory(wrap_locally) { TestOnlyLocalWrapInMemoryKms::InitializeMasterKeys(master_keys_map); TestOnlyInServerWrapKms::InitializeMasterKeys(master_keys_map); diff --git a/cpp/src/parquet/encryption/write_configurations_test.cc b/cpp/src/parquet/encryption/write_configurations_test.cc index 8e3d13551d2..ae86b51242a 100644 --- a/cpp/src/parquet/encryption/write_configurations_test.cc +++ b/cpp/src/parquet/encryption/write_configurations_test.cc @@ -76,9 +76,9 @@ class TestEncryptionConfiguration : public ::testing::Test { std::string path_to_double_field_ = kDoubleFieldName; std::string path_to_float_field_ = kFloatFieldName; std::string file_name_; - std::string kFooterEncryptionKey_ = std::string(kFooterEncryptionKey); - std::string kColumnEncryptionKey1_ = std::string(kColumnEncryptionKey1); - std::string kColumnEncryptionKey2_ = std::string(kColumnEncryptionKey2); + SecureString kFooterEncryptionKey_ = kFooterEncryptionKey; + SecureString kColumnEncryptionKey1_ = kColumnEncryptionKey1; + SecureString kColumnEncryptionKey2_ = kColumnEncryptionKey2; std::string kFileName_ = std::string(kFileName); void EncryptFile( diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index 54df6922a1e..a7f50162daf 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -307,13 +307,6 @@ class SerializedFile : public ParquetFileReader::Contents { PARQUET_ASSIGN_OR_THROW(source_size_, source_->GetSize()); } - ~SerializedFile() override { - try { - Close(); - } catch (...) { - } - } - void Close() override {} std::shared_ptr GetRowGroup(int i) override { diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 97e502f46be..22e3fd4d05c 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -41,6 +41,8 @@ #include "parquet/statistics.h" #include "parquet/thrift_internal.h" +using ::arrow::util::SecureString; + namespace parquet { const ApplicationVersion& ApplicationVersion::PARQUET_251_FIXED_VERSION() { @@ -798,8 +800,8 @@ class FileMetaData::FileMetaDataImpl { encryption::kNonceLength); auto tag = reinterpret_cast(signature) + encryption::kNonceLength; - std::string key = file_decryptor_->GetFooterKey(); - std::string aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); + const SecureString& key = file_decryptor_->GetFooterKey(); + const std::string& aad = encryption::CreateFooterAad(file_decryptor_->file_aad()); auto aes_encryptor = encryption::AesEncryptor::Make(file_decryptor_->algorithm(), static_cast(key.size()), @@ -808,7 +810,7 @@ class FileMetaData::FileMetaDataImpl { std::shared_ptr encrypted_buffer = AllocateBuffer( file_decryptor_->pool(), aes_encryptor->CiphertextLength(serialized_len)); int32_t encrypted_len = aes_encryptor->SignedFooterEncrypt( - serialized_data_span, str2span(key), str2span(aad), nonce, + serialized_data_span, key.as_span(), str2span(aad), nonce, encrypted_buffer->mutable_span_as()); return 0 == memcmp(encrypted_buffer->data() + encrypted_len - encryption::kGcmTagLength,