From 79923908a56686e29c4db8c49b41fe89dcad30f4 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Mon, 7 Jul 2025 16:44:04 +0200 Subject: [PATCH 1/7] Remove deprecations to move users to more secure methods --- cpp/src/parquet/encryption/encryption.cc | 12 +---- cpp/src/parquet/encryption/encryption.h | 46 ++----------------- .../parquet/encryption/file_key_unwrapper.cc | 6 +-- .../parquet/encryption/file_key_unwrapper.h | 2 +- .../encryption/internal_file_decryptor.cc | 4 +- .../parquet/encryption/key_wrapping_test.cc | 4 +- cpp/src/parquet/encryption/kms_client.h | 36 ++------------- .../encryption/local_wrap_kms_client.cc | 2 +- .../encryption/local_wrap_kms_client.h | 2 +- cpp/src/parquet/encryption/properties_test.cc | 6 +-- .../parquet/encryption/test_in_memory_kms.cc | 2 +- .../parquet/encryption/test_in_memory_kms.h | 2 +- 12 files changed, 23 insertions(+), 101 deletions(-) diff --git a/cpp/src/parquet/encryption/encryption.cc b/cpp/src/parquet/encryption/encryption.cc index e156ec00446..701b21ee7f1 100644 --- a/cpp/src/parquet/encryption/encryption.cc +++ b/cpp/src/parquet/encryption/encryption.cc @@ -44,15 +44,10 @@ void StringKeyIdRetriever::PutKey(std::string key_id, SecureString key) { key_map_.insert({std::move(key_id), std::move(key)}); } -SecureString StringKeyIdRetriever::GetKeyById(const std::string& key_id) { +SecureString StringKeyIdRetriever::GetKey(const std::string& key_id) { return key_map_.at(key_id); } -ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( - std::string column_key) { - return key(SecureString(std::move(column_key))); -} - ColumnEncryptionProperties::Builder* ColumnEncryptionProperties::Builder::key( SecureString column_key) { if (column_key.empty()) return this; @@ -94,11 +89,6 @@ FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::column_key return this; } -FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( - std::string footer_key) { - return this->footer_key(SecureString(std::move(footer_key))); -} - FileDecryptionProperties::Builder* FileDecryptionProperties::Builder::footer_key( SecureString footer_key) { if (footer_key.empty()) { diff --git a/cpp/src/parquet/encryption/encryption.h b/cpp/src/parquet/encryption/encryption.h index 8d7bb9489af..d822cc3c184 100644 --- a/cpp/src/parquet/encryption/encryption.h +++ b/cpp/src/parquet/encryption/encryption.h @@ -49,22 +49,7 @@ using ColumnPathToEncryptionPropertiesMap = class PARQUET_EXPORT DecryptionKeyRetriever { public: /// \brief Retrieve a key. - /// \deprecated Deprecated since 22.0.0. - /// Implement GetKeyById(const std::string&) instead. - ARROW_DEPRECATED( - "Deprecated in 22.0.0. " - "Implement GetKeyById(const std::string&) instead.") - virtual std::string GetKey(const std::string& key_id) { - throw ParquetException("Not implemented"); - } - - /// \brief Retrieve a key by its id. - virtual ::arrow::util::SecureString GetKeyById(const std::string& key_id) { - ARROW_SUPPRESS_DEPRECATION_WARNING - auto key = ::arrow::util::SecureString(GetKey(key_id)); - ARROW_UNSUPPRESS_DEPRECATION_WARNING - return key; - } + virtual ::arrow::util::SecureString GetKey(const std::string& key_id) = 0; virtual ~DecryptionKeyRetriever() {} }; @@ -74,18 +59,16 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(uint32_t key_id, ::arrow::util::SecureString key); - ::arrow::util::SecureString GetKeyById(const std::string& key_id_string) override { + ::arrow::util::SecureString GetKey(const std::string& key_id_string) override { // key_id_string is string but for IntegerKeyIdRetriever it encodes // a native-endian 32 bit unsigned integer key_id uint32_t key_id; assert(key_id_string.size() == sizeof(key_id)); memcpy(&key_id, key_id_string.data(), sizeof(key_id)); - return GetKeyById(key_id); + return key_map_.at(key_id); } - ::arrow::util::SecureString GetKeyById(uint32_t key_id) { return key_map_.at(key_id); } - private: std::map key_map_; }; @@ -94,7 +77,7 @@ class PARQUET_EXPORT IntegerKeyIdRetriever : public DecryptionKeyRetriever { class PARQUET_EXPORT StringKeyIdRetriever : public DecryptionKeyRetriever { public: void PutKey(std::string key_id, ::arrow::util::SecureString key); - ::arrow::util::SecureString GetKeyById(const std::string& key_id) override; + ::arrow::util::SecureString GetKey(const std::string& key_id) override; private: std::map key_map_; @@ -143,11 +126,6 @@ class PARQUET_EXPORT ColumnEncryptionProperties { /// be encrypted with the footer key. /// keyBytes Key length must be either 16, 24 or 32 bytes. /// Caller is responsible for wiping out the input key array. - /// \deprecated "Deprecated in 22.0.0. Use key(arrow::util::SecureString) instead." - ARROW_DEPRECATED("Deprecated in 22.0.0. Use key(arrow::util::SecureString) instead.") - Builder* key(std::string column_key); - - /// \copydoc key(std::string) Builder* key(::arrow::util::SecureString column_key); /// Set a key retrieval metadata. @@ -259,14 +237,6 @@ class PARQUET_EXPORT FileDecryptionProperties { /// will be wiped out (array values set to 0). /// Caller is responsible for wiping out the input key array. /// param footerKey Key length must be either 16, 24 or 32 bytes. - /// \deprecated Deprecated since 22.0.0. - /// Use footer_key(arrow::util::SecureString) instead. - ARROW_DEPRECATED( - "Deprecated in 22.0.0. " - "Use footer_key(arrow::util::SecureString) instead.") - Builder* footer_key(std::string footer_key); - - /// \copydoc footer_key(std::string footer_key) Builder* footer_key(::arrow::util::SecureString footer_key); /// Set explicit column keys (decryption properties). @@ -376,14 +346,6 @@ class PARQUET_EXPORT FileEncryptionProperties { public: class PARQUET_EXPORT Builder { public: - /// \deprecated Deprecated since 22.0.0. Use Builder(arrow::util::SecureString) - /// instead. - ARROW_DEPRECATED( - "Deprecated in 22.0.0. " - "Use Builder(arrow::util::SecureString) instead") - explicit Builder(std::string footer_key) - : Builder(::arrow::util::SecureString(std::move(footer_key))) {} - explicit Builder(::arrow::util::SecureString footer_key) : parquet_cipher_(kDefaultEncryptionAlgorithm), encrypted_footer_(kDefaultEncryptedFooter), diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index d7463590358..4dc1492a0b7 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -69,7 +69,7 @@ FileKeyUnwrapper::FileKeyUnwrapper( kms_connection_config.key_access_token(), cache_entry_lifetime_seconds_); } -SecureString FileKeyUnwrapper::GetKeyById(const std::string& key_metadata_bytes) { +SecureString FileKeyUnwrapper::GetKey(const std::string& key_metadata_bytes) { // key_metadata is expected to be in UTF8 encoding ::arrow::util::InitializeUTF8(); if (!::arrow::util::ValidateUTF8( @@ -110,7 +110,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma SecureString data_key; if (!double_wrapping) { - data_key = kms_client->UnWrapKey(encoded_wrapped_dek, master_key_id); + data_key = kms_client->UnwrapKey(encoded_wrapped_dek, master_key_id); } else { // Get Key Encryption Key const std::string& encoded_kek_id = key_material.kek_id(); @@ -118,7 +118,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma const SecureString kek_bytes = kek_per_kek_id_->GetOrInsert( encoded_kek_id, [kms_client, encoded_wrapped_kek, master_key_id]() { - return kms_client->UnWrapKey(encoded_wrapped_kek, master_key_id); + return kms_client->UnwrapKey(encoded_wrapped_kek, master_key_id); }); // Decrypt the data key diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.h b/cpp/src/parquet/encryption/file_key_unwrapper.h index d674b5cf2ac..c86f68121c8 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.h +++ b/cpp/src/parquet/encryption/file_key_unwrapper.h @@ -65,7 +65,7 @@ class PARQUET_EXPORT FileKeyUnwrapper : public DecryptionKeyRetriever { std::shared_ptr key_material_store); /// Get the data key from key metadata - ::arrow::util::SecureString GetKeyById(const std::string& key_metadata_bytes) override; + ::arrow::util::SecureString GetKey(const std::string& key_metadata_bytes) override; /// Get the data key along with the master key id from key material KeyWithMasterId GetDataEncryptionKey(const KeyMaterial& key_material); diff --git a/cpp/src/parquet/encryption/internal_file_decryptor.cc b/cpp/src/parquet/encryption/internal_file_decryptor.cc index efd1ec8067c..b90d3158559 100644 --- a/cpp/src/parquet/encryption/internal_file_decryptor.cc +++ b/cpp/src/parquet/encryption/internal_file_decryptor.cc @@ -78,7 +78,7 @@ const SecureString& InternalFileDecryptor::GetFooterKey() { if (properties_->key_retriever() == nullptr) throw ParquetException("No footer key or key retriever"); try { - footer_key_ = properties_->key_retriever()->GetKeyById(footer_key_metadata_); + footer_key_ = properties_->key_retriever()->GetKey(footer_key_metadata_); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "Footer key: access denied " << e.what() << "\n"; @@ -117,7 +117,7 @@ SecureString InternalFileDecryptor::GetColumnKey(const std::string& column_path, if (column_key.empty() && !column_key_metadata.empty() && properties_->key_retriever() != nullptr) { try { - column_key = properties_->key_retriever()->GetKeyById(column_key_metadata); + column_key = properties_->key_retriever()->GetKey(column_key_metadata); } catch (KeyAccessDeniedException& e) { std::stringstream ss; ss << "HiddenColumnException, path=" + column_path + " " << e.what() << "\n"; diff --git a/cpp/src/parquet/encryption/key_wrapping_test.cc b/cpp/src/parquet/encryption/key_wrapping_test.cc index 4ff3903fb7e..04494d8cc21 100644 --- a/cpp/src/parquet/encryption/key_wrapping_test.cc +++ b/cpp/src/parquet/encryption/key_wrapping_test.cc @@ -86,10 +86,10 @@ class KeyWrappingTest : public ::testing::Test { FileKeyUnwrapper unwrapper(&key_toolkit, kms_connection_config_, cache_entry_lifetime_seconds, readable_file_path, file_system); - SecureString footer_key = unwrapper.GetKeyById(key_metadata_json_footer); + SecureString footer_key = unwrapper.GetKey(key_metadata_json_footer); ASSERT_EQ(footer_key, kFooterEncryptionKey); - SecureString column_key = unwrapper.GetKeyById(key_metadata_json_column); + SecureString column_key = unwrapper.GetKey(key_metadata_json_column); ASSERT_EQ(column_key, kColumnEncryptionKey1); } diff --git a/cpp/src/parquet/encryption/kms_client.h b/cpp/src/parquet/encryption/kms_client.h index 09133439da3..9c67e7cae49 100644 --- a/cpp/src/parquet/encryption/kms_client.h +++ b/cpp/src/parquet/encryption/kms_client.h @@ -84,42 +84,12 @@ class PARQUET_EXPORT KmsClient { /// /// Encrypts it with the master key, encodes the result /// and potentially adds a KMS-specific metadata. - /// - /// \deprecated Deprecated since 22.0.0. Implement - /// WrapKey(const SecureString&, const std::string&) instead. - ARROW_DEPRECATED( - "Deprecated in 22.0.0. " - "Implement WrapKey(const SecureString&, const std::string&) instead.") - virtual std::string WrapKey(const std::string& key_bytes, - const std::string& master_key_identifier) { - throw ParquetException("Not implemented"); - } - - /// \copydoc WrapKey(const std::string&, const std::string&) virtual std::string WrapKey(const ::arrow::util::SecureString& key_bytes, - const std::string& master_key_identifier) { - ARROW_SUPPRESS_DEPRECATION_WARNING - auto key = WrapKey(std::string(key_bytes.as_view()), master_key_identifier); - ARROW_UNSUPPRESS_DEPRECATION_WARNING - return key; - } + const std::string& master_key_identifier) = 0; /// \brief Decrypts (unwraps) a key with the master key. - /// \deprecated Deprecated since 22.0.0. Implement UnWrapKey instead. - ARROW_DEPRECATED("Deprecated in 22.0.0. Implement UnWrapKey instead.") - virtual std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) { - throw ParquetException("Not implemented"); - } - - /// \copydoc UnwrapKey(const std::string&, const std::string&) - virtual ::arrow::util::SecureString UnWrapKey( - const std::string& wrapped_key, const std::string& master_key_identifier) { - ARROW_SUPPRESS_DEPRECATION_WARNING - auto key = ::arrow::util::SecureString(UnwrapKey(wrapped_key, master_key_identifier)); - ARROW_UNSUPPRESS_DEPRECATION_WARNING - return key; - } + virtual ::arrow::util::SecureString UnwrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) = 0; virtual ~KmsClient() {} }; diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.cc b/cpp/src/parquet/encryption/local_wrap_kms_client.cc index 6bd80479a03..80543c2932a 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.cc +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.cc @@ -84,7 +84,7 @@ std::string LocalWrapKmsClient::WrapKey(const SecureString& key_bytes, return LocalKeyWrap::CreateSerialized(encrypted_encoded_key); } -SecureString LocalWrapKmsClient::UnWrapKey(const std::string& wrapped_key, +SecureString LocalWrapKmsClient::UnwrapKey(const std::string& wrapped_key, const std::string& master_key_identifier) { LocalKeyWrap key_wrap = LocalKeyWrap::Parse(wrapped_key); const std::string& master_key_version = key_wrap.master_key_version(); diff --git a/cpp/src/parquet/encryption/local_wrap_kms_client.h b/cpp/src/parquet/encryption/local_wrap_kms_client.h index 7eedbaaf77e..607c75a4c2e 100644 --- a/cpp/src/parquet/encryption/local_wrap_kms_client.h +++ b/cpp/src/parquet/encryption/local_wrap_kms_client.h @@ -38,7 +38,7 @@ class PARQUET_EXPORT LocalWrapKmsClient : public KmsClient { std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - ::arrow::util::SecureString UnWrapKey( + ::arrow::util::SecureString UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) override; protected: diff --git a/cpp/src/parquet/encryption/properties_test.cc b/cpp/src/parquet/encryption/properties_test.cc index 3f39cc8eb64..1ceda7ac032 100644 --- a/cpp/src/parquet/encryption/properties_test.cc +++ b/cpp/src/parquet/encryption/properties_test.cc @@ -224,9 +224,9 @@ TEST(TestDecryptionProperties, UseKeyRetriever) { std::shared_ptr props = builder.build(); auto out_key_retriever = props->key_retriever(); - ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKeyById("kf")); - ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKeyById("kc1")); - ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKeyById("kc2")); + ASSERT_EQ(kFooterEncryptionKey, out_key_retriever->GetKey("kf")); + ASSERT_EQ(kColumnEncryptionKey1, out_key_retriever->GetKey("kc1")); + ASSERT_EQ(kColumnEncryptionKey2, out_key_retriever->GetKey("kc2")); } TEST(TestDecryptionProperties, SupplyAadPrefix) { diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.cc b/cpp/src/parquet/encryption/test_in_memory_kms.cc index 969d6df858c..6af15d177fd 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.cc +++ b/cpp/src/parquet/encryption/test_in_memory_kms.cc @@ -79,7 +79,7 @@ std::string TestOnlyInServerWrapKms::WrapKey(const SecureString& key_bytes, return internal::EncryptKeyLocally(key_bytes, master_key, aad); } -SecureString TestOnlyInServerWrapKms::UnWrapKey( +SecureString TestOnlyInServerWrapKms::UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) { if (unwrapping_master_key_map_.find(master_key_identifier) == unwrapping_master_key_map_.end()) { diff --git a/cpp/src/parquet/encryption/test_in_memory_kms.h b/cpp/src/parquet/encryption/test_in_memory_kms.h index df63984546a..b9d4169c634 100644 --- a/cpp/src/parquet/encryption/test_in_memory_kms.h +++ b/cpp/src/parquet/encryption/test_in_memory_kms.h @@ -56,7 +56,7 @@ class TestOnlyInServerWrapKms : public KmsClient { std::string WrapKey(const ::arrow::util::SecureString& key_bytes, const std::string& master_key_identifier) override; - ::arrow::util::SecureString UnWrapKey( + ::arrow::util::SecureString UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) override; static void StartKeyRotation( From 13b89968f08cdf4f8cf3dd841323f8c29ab4635d Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 6 Jun 2025 13:20:41 +0200 Subject: [PATCH 2/7] Make Python KmsClient work with SecureString-based CKmsClient interface --- python/pyarrow/_parquet_encryption.pyx | 21 ++++++++++++++++--- python/pyarrow/includes/libarrow.pxd | 9 ++++++++ .../includes/libparquet_encryption.pxd | 11 +++++----- .../src/arrow/python/parquet_encryption.cc | 12 +++++------ .../src/arrow/python/parquet_encryption.h | 12 ++++++----- 5 files changed, 46 insertions(+), 19 deletions(-) diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index 95e167cc53f..feecabd9580 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -28,6 +28,12 @@ from pyarrow.lib cimport _Weakrefable from pyarrow.lib import tobytes, frombytes +cdef extern from "Python.h": + # To let us get a PyObject* and avoid Cython auto-ref-counting + PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"( + char *v, Py_ssize_t len) except NULL + + cdef ParquetCipher cipher_from_name(name): name = name.upper() if name == 'AES_GCM_V1': @@ -300,8 +306,12 @@ cdef class KmsConnectionConfig(_Weakrefable): # Callback definitions for CPyKmsClientVtable cdef void _cb_wrap_key( - handler, const c_string& key_bytes, + handler, const CSecureString& key, const c_string& master_key_identifier, c_string* out) except *: + cdef: + cpp_string_view view = key.as_view() + key_bytes = PyObject_to_object( + PyBytes_FromStringAndSizeNative(view.data(), view.size())) mkid_str = frombytes(master_key_identifier) wrapped_key = handler.wrap_key(key_bytes, mkid_str) out[0] = tobytes(wrapped_key) @@ -309,11 +319,16 @@ cdef void _cb_wrap_key( cdef void _cb_unwrap_key( handler, const c_string& wrapped_key, - const c_string& master_key_identifier, c_string* out) except *: + const c_string& master_key_identifier, shared_ptr[CSecureString]* out) except *: mkid_str = frombytes(master_key_identifier) wk_str = frombytes(wrapped_key) key = handler.unwrap_key(wk_str, mkid_str) - out[0] = tobytes(key) + + cdef: + c_string cstr = tobytes(key) + shared_ptr[CSecureString] css = shared_ptr[CSecureString](new CSecureString(move(cstr))) + + out[0] = css cdef class KmsClient(_Weakrefable): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 336ab615a67..667d3be8ce9 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -3130,6 +3130,15 @@ cdef extern from "arrow/util/iterator.h" namespace "arrow" nogil: RangeIterator end() CIterator[T] MakeVectorIterator[T](vector[T] v) + +cdef extern from "arrow/util/secure_string.h" namespace "arrow" nogil: + cdef cppclass CSecureString" arrow::util::SecureString": + CSecureString(c_string s) + CSecureString(const CSecureString& s) + CSecureString(size_t n, char c) + cpp_string_view as_view() + + cdef extern from "arrow/util/thread_pool.h" namespace "arrow" nogil: int GetCpuThreadPoolCapacity() CStatus SetCpuThreadPoolCapacity(int threads) diff --git a/python/pyarrow/includes/libparquet_encryption.pxd b/python/pyarrow/includes/libparquet_encryption.pxd index 4041bf53aac..fbb8df1b23f 100644 --- a/python/pyarrow/includes/libparquet_encryption.pxd +++ b/python/pyarrow/includes/libparquet_encryption.pxd @@ -18,6 +18,7 @@ # distutils: language = c++ from pyarrow.includes.common cimport * +from pyarrow.includes.libarrow cimport CSecureString from pyarrow._parquet cimport (ParquetCipher, CFileEncryptionProperties, CFileDecryptionProperties, @@ -28,10 +29,10 @@ from pyarrow._parquet cimport (ParquetCipher, cdef extern from "parquet/encryption/kms_client.h" \ namespace "parquet::encryption" nogil: cdef cppclass CKmsClient" parquet::encryption::KmsClient": - c_string WrapKey(const c_string& key_bytes, + c_string WrapKey(const CSecureString& key, const c_string& master_key_identifier) except + - c_string UnwrapKey(const c_string& wrapped_key, - const c_string& master_key_identifier) except + + CSecureString UnwrapKey(const c_string& wrapped_key, + const c_string& master_key_identifier) except + cdef cppclass CKeyAccessToken" parquet::encryption::KeyAccessToken": CKeyAccessToken(const c_string value) @@ -49,9 +50,9 @@ cdef extern from "parquet/encryption/kms_client.h" \ # Callbacks for implementing Python kms clients # Use typedef to emulate syntax for std::function ctypedef void CallbackWrapKey( - object, const c_string&, const c_string&, c_string*) + object, const CSecureString&, const c_string&, c_string*) ctypedef void CallbackUnwrapKey( - object, const c_string&, const c_string&, c_string*) + object, const c_string&, const c_string&, shared_ptr[CSecureString]*) cdef extern from "parquet/encryption/kms_client_factory.h" \ namespace "parquet::encryption" nogil: diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.cc b/python/pyarrow/src/arrow/python/parquet_encryption.cc index a5f924bce78..de835fccacc 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.cc +++ b/python/pyarrow/src/arrow/python/parquet_encryption.cc @@ -30,11 +30,11 @@ PyKmsClient::PyKmsClient(PyObject* handler, PyKmsClientVtable vtable) PyKmsClient::~PyKmsClient() {} -std::string PyKmsClient::WrapKey(const std::string& key_bytes, +std::string PyKmsClient::WrapKey(const ::arrow::util::SecureString& key, const std::string& master_key_identifier) { std::string wrapped; auto st = SafeCallIntoPython([&]() -> Status { - vtable_.wrap_key(handler_.obj(), key_bytes, master_key_identifier, &wrapped); + vtable_.wrap_key(handler_.obj(), key, master_key_identifier, &wrapped); return CheckPyError(); }); if (!st.ok()) { @@ -43,9 +43,9 @@ std::string PyKmsClient::WrapKey(const std::string& key_bytes, return wrapped; } -std::string PyKmsClient::UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) { - std::string unwrapped; +::arrow::util::SecureString PyKmsClient::UnwrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) { + std::shared_ptr unwrapped; auto st = SafeCallIntoPython([&]() -> Status { vtable_.unwrap_key(handler_.obj(), wrapped_key, master_key_identifier, &unwrapped); return CheckPyError(); @@ -53,7 +53,7 @@ std::string PyKmsClient::UnwrapKey(const std::string& wrapped_key, if (!st.ok()) { throw ::parquet::ParquetStatusException(st); } - return unwrapped; + return *unwrapped; } PyKmsClientFactory::PyKmsClientFactory(PyObject* handler, PyKmsClientFactoryVtable vtable) diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h index 7a107c89f0b..ecf3be27cb8 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.h +++ b/python/pyarrow/src/arrow/python/parquet_encryption.h @@ -22,6 +22,7 @@ #include "arrow/python/common.h" #include "arrow/python/visibility.h" #include "arrow/util/macros.h" +#include "arrow/util/secure_string.h" #include "parquet/encryption/crypto_factory.h" #include "parquet/encryption/kms_client.h" #include "parquet/encryption/kms_client_factory.h" @@ -56,11 +57,12 @@ namespace encryption { /// Python. class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable { public: - std::function wrap_key; std::function + const std::string& master_key_identifier, + std::shared_ptr<::arrow::util::SecureString>* out)> unwrap_key; }; @@ -71,11 +73,11 @@ class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClient PyKmsClient(PyObject* handler, PyKmsClientVtable vtable); ~PyKmsClient() override; - std::string WrapKey(const std::string& key_bytes, + std::string WrapKey(const ::arrow::util::SecureString& key, const std::string& master_key_identifier) override; - std::string UnwrapKey(const std::string& wrapped_key, - const std::string& master_key_identifier) override; + ::arrow::util::SecureString UnwrapKey( + const std::string& wrapped_key, const std::string& master_key_identifier) override; private: OwnedRefNoGIL handler_; From a586588d1e24b60712b92a77ea6963594b02fb73 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 22 Aug 2025 07:29:33 +0200 Subject: [PATCH 3/7] Return CSecureString via `CSecureString*` not `shared_ptr[CSecureString]*` --- python/pyarrow/_parquet_encryption.pyx | 4 ++-- python/pyarrow/includes/libarrow.pxd | 1 + python/pyarrow/includes/libparquet_encryption.pxd | 2 +- python/pyarrow/src/arrow/python/parquet_encryption.cc | 4 ++-- python/pyarrow/src/arrow/python/parquet_encryption.h | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index feecabd9580..6d6a52f3eb7 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -319,14 +319,14 @@ cdef void _cb_wrap_key( cdef void _cb_unwrap_key( handler, const c_string& wrapped_key, - const c_string& master_key_identifier, shared_ptr[CSecureString]* out) except *: + const c_string& master_key_identifier, CSecureString* out) except *: mkid_str = frombytes(master_key_identifier) wk_str = frombytes(wrapped_key) key = handler.unwrap_key(wk_str, mkid_str) cdef: c_string cstr = tobytes(key) - shared_ptr[CSecureString] css = shared_ptr[CSecureString](new CSecureString(move(cstr))) + CSecureString css = CSecureString(move(cstr)) out[0] = css diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 667d3be8ce9..39dc3a77d98 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -3133,6 +3133,7 @@ cdef extern from "arrow/util/iterator.h" namespace "arrow" nogil: cdef extern from "arrow/util/secure_string.h" namespace "arrow" nogil: cdef cppclass CSecureString" arrow::util::SecureString": + CSecureString() CSecureString(c_string s) CSecureString(const CSecureString& s) CSecureString(size_t n, char c) diff --git a/python/pyarrow/includes/libparquet_encryption.pxd b/python/pyarrow/includes/libparquet_encryption.pxd index fbb8df1b23f..7e031925af6 100644 --- a/python/pyarrow/includes/libparquet_encryption.pxd +++ b/python/pyarrow/includes/libparquet_encryption.pxd @@ -52,7 +52,7 @@ cdef extern from "parquet/encryption/kms_client.h" \ ctypedef void CallbackWrapKey( object, const CSecureString&, const c_string&, c_string*) ctypedef void CallbackUnwrapKey( - object, const c_string&, const c_string&, shared_ptr[CSecureString]*) + object, const c_string&, const c_string&, CSecureString*) cdef extern from "parquet/encryption/kms_client_factory.h" \ namespace "parquet::encryption" nogil: diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.cc b/python/pyarrow/src/arrow/python/parquet_encryption.cc index de835fccacc..1016cdd3a37 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.cc +++ b/python/pyarrow/src/arrow/python/parquet_encryption.cc @@ -45,7 +45,7 @@ std::string PyKmsClient::WrapKey(const ::arrow::util::SecureString& key, ::arrow::util::SecureString PyKmsClient::UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) { - std::shared_ptr unwrapped; + arrow::util::SecureString unwrapped; auto st = SafeCallIntoPython([&]() -> Status { vtable_.unwrap_key(handler_.obj(), wrapped_key, master_key_identifier, &unwrapped); return CheckPyError(); @@ -53,7 +53,7 @@ ::arrow::util::SecureString PyKmsClient::UnwrapKey( if (!st.ok()) { throw ::parquet::ParquetStatusException(st); } - return *unwrapped; + return unwrapped; } PyKmsClientFactory::PyKmsClientFactory(PyObject* handler, PyKmsClientFactoryVtable vtable) diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h index ecf3be27cb8..3e57a761945 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.h +++ b/python/pyarrow/src/arrow/python/parquet_encryption.h @@ -62,7 +62,7 @@ class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable { wrap_key; std::function* out)> + ::arrow::util::SecureString* out)> unwrap_key; }; From 40bead518a9d74c2705ee4d43d1df530485b62b5 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 22 Aug 2025 07:47:39 +0200 Subject: [PATCH 4/7] Return CSecureString via `CSecureString&` not `CSecureString*` --- python/pyarrow/_parquet_encryption.pyx | 13 ++++++------- python/pyarrow/includes/libparquet_encryption.pxd | 2 +- .../pyarrow/src/arrow/python/parquet_encryption.cc | 2 +- .../pyarrow/src/arrow/python/parquet_encryption.h | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index 6d6a52f3eb7..ac1760545cd 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -319,16 +319,15 @@ cdef void _cb_wrap_key( cdef void _cb_unwrap_key( handler, const c_string& wrapped_key, - const c_string& master_key_identifier, CSecureString* out) except *: + const c_string& master_key_identifier, CSecureString& out) except *: + cdef: + c_string cstr + mkid_str = frombytes(master_key_identifier) wk_str = frombytes(wrapped_key) key = handler.unwrap_key(wk_str, mkid_str) - - cdef: - c_string cstr = tobytes(key) - CSecureString css = CSecureString(move(cstr)) - - out[0] = css + cstr = tobytes(key) + out = CSecureString(move(cstr)) cdef class KmsClient(_Weakrefable): diff --git a/python/pyarrow/includes/libparquet_encryption.pxd b/python/pyarrow/includes/libparquet_encryption.pxd index 7e031925af6..1d21b84f916 100644 --- a/python/pyarrow/includes/libparquet_encryption.pxd +++ b/python/pyarrow/includes/libparquet_encryption.pxd @@ -52,7 +52,7 @@ cdef extern from "parquet/encryption/kms_client.h" \ ctypedef void CallbackWrapKey( object, const CSecureString&, const c_string&, c_string*) ctypedef void CallbackUnwrapKey( - object, const c_string&, const c_string&, CSecureString*) + object, const c_string&, const c_string&, CSecureString&) cdef extern from "parquet/encryption/kms_client_factory.h" \ namespace "parquet::encryption" nogil: diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.cc b/python/pyarrow/src/arrow/python/parquet_encryption.cc index 1016cdd3a37..dc2604eebf2 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.cc +++ b/python/pyarrow/src/arrow/python/parquet_encryption.cc @@ -47,7 +47,7 @@ ::arrow::util::SecureString PyKmsClient::UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) { arrow::util::SecureString unwrapped; auto st = SafeCallIntoPython([&]() -> Status { - vtable_.unwrap_key(handler_.obj(), wrapped_key, master_key_identifier, &unwrapped); + vtable_.unwrap_key(handler_.obj(), wrapped_key, master_key_identifier, unwrapped); return CheckPyError(); }); if (!st.ok()) { diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h index 3e57a761945..457bb45aa1a 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.h +++ b/python/pyarrow/src/arrow/python/parquet_encryption.h @@ -62,7 +62,7 @@ class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable { wrap_key; std::function + ::arrow::util::SecureString& out)> unwrap_key; }; From a4a1176b84232ecd98424453877de41eeb8ccbb2 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 22 Aug 2025 08:03:31 +0200 Subject: [PATCH 5/7] Use PyBytes_FromStringAndSize through cimport --- python/pyarrow/_parquet_encryption.pyx | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index ac1760545cd..72d79a70793 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -20,6 +20,7 @@ from datetime import timedelta +from cpython.bytes cimport PyBytes_FromStringAndSize from cython.operator cimport dereference as deref from pyarrow.includes.common cimport * @@ -28,12 +29,6 @@ from pyarrow.lib cimport _Weakrefable from pyarrow.lib import tobytes, frombytes -cdef extern from "Python.h": - # To let us get a PyObject* and avoid Cython auto-ref-counting - PyObject* PyBytes_FromStringAndSizeNative" PyBytes_FromStringAndSize"( - char *v, Py_ssize_t len) except NULL - - cdef ParquetCipher cipher_from_name(name): name = name.upper() if name == 'AES_GCM_V1': @@ -310,8 +305,7 @@ cdef void _cb_wrap_key( const c_string& master_key_identifier, c_string* out) except *: cdef: cpp_string_view view = key.as_view() - key_bytes = PyObject_to_object( - PyBytes_FromStringAndSizeNative(view.data(), view.size())) + key_bytes = PyBytes_FromStringAndSize(view.data(), view.size()) mkid_str = frombytes(master_key_identifier) wrapped_key = handler.wrap_key(key_bytes, mkid_str) out[0] = tobytes(wrapped_key) From f01a81b718fe1bbb3faa753602b67b0c1dcb60cd Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Fri, 22 Aug 2025 09:54:42 +0200 Subject: [PATCH 6/7] Remove cdef from callback implementations --- python/pyarrow/_parquet_encryption.pyx | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index 72d79a70793..cd3822d74cb 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -303,8 +303,7 @@ cdef class KmsConnectionConfig(_Weakrefable): cdef void _cb_wrap_key( handler, const CSecureString& key, const c_string& master_key_identifier, c_string* out) except *: - cdef: - cpp_string_view view = key.as_view() + view = key.as_view() key_bytes = PyBytes_FromStringAndSize(view.data(), view.size()) mkid_str = frombytes(master_key_identifier) wrapped_key = handler.wrap_key(key_bytes, mkid_str) @@ -314,13 +313,10 @@ cdef void _cb_wrap_key( cdef void _cb_unwrap_key( handler, const c_string& wrapped_key, const c_string& master_key_identifier, CSecureString& out) except *: - cdef: - c_string cstr - mkid_str = frombytes(master_key_identifier) wk_str = frombytes(wrapped_key) key = handler.unwrap_key(wk_str, mkid_str) - cstr = tobytes(key) + cstr = tobytes(key) out = CSecureString(move(cstr)) From b12503141711f45b55da8fe5d368992101544c1b Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 27 Aug 2025 13:38:09 +0200 Subject: [PATCH 7/7] Return CSecureString* instead of CSecureString& --- python/pyarrow/_parquet_encryption.pyx | 4 ++-- python/pyarrow/includes/libparquet_encryption.pxd | 2 +- python/pyarrow/src/arrow/python/parquet_encryption.cc | 2 +- python/pyarrow/src/arrow/python/parquet_encryption.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/pyarrow/_parquet_encryption.pyx b/python/pyarrow/_parquet_encryption.pyx index cd3822d74cb..f95464e3031 100644 --- a/python/pyarrow/_parquet_encryption.pyx +++ b/python/pyarrow/_parquet_encryption.pyx @@ -312,12 +312,12 @@ cdef void _cb_wrap_key( cdef void _cb_unwrap_key( handler, const c_string& wrapped_key, - const c_string& master_key_identifier, CSecureString& out) except *: + const c_string& master_key_identifier, CSecureString* out) except *: mkid_str = frombytes(master_key_identifier) wk_str = frombytes(wrapped_key) key = handler.unwrap_key(wk_str, mkid_str) cstr = tobytes(key) - out = CSecureString(move(cstr)) + out[0] = CSecureString(move(cstr)) cdef class KmsClient(_Weakrefable): diff --git a/python/pyarrow/includes/libparquet_encryption.pxd b/python/pyarrow/includes/libparquet_encryption.pxd index 1d21b84f916..7e031925af6 100644 --- a/python/pyarrow/includes/libparquet_encryption.pxd +++ b/python/pyarrow/includes/libparquet_encryption.pxd @@ -52,7 +52,7 @@ cdef extern from "parquet/encryption/kms_client.h" \ ctypedef void CallbackWrapKey( object, const CSecureString&, const c_string&, c_string*) ctypedef void CallbackUnwrapKey( - object, const c_string&, const c_string&, CSecureString&) + object, const c_string&, const c_string&, CSecureString*) cdef extern from "parquet/encryption/kms_client_factory.h" \ namespace "parquet::encryption" nogil: diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.cc b/python/pyarrow/src/arrow/python/parquet_encryption.cc index dc2604eebf2..1016cdd3a37 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.cc +++ b/python/pyarrow/src/arrow/python/parquet_encryption.cc @@ -47,7 +47,7 @@ ::arrow::util::SecureString PyKmsClient::UnwrapKey( const std::string& wrapped_key, const std::string& master_key_identifier) { arrow::util::SecureString unwrapped; auto st = SafeCallIntoPython([&]() -> Status { - vtable_.unwrap_key(handler_.obj(), wrapped_key, master_key_identifier, unwrapped); + vtable_.unwrap_key(handler_.obj(), wrapped_key, master_key_identifier, &unwrapped); return CheckPyError(); }); if (!st.ok()) { diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h index 457bb45aa1a..3e57a761945 100644 --- a/python/pyarrow/src/arrow/python/parquet_encryption.h +++ b/python/pyarrow/src/arrow/python/parquet_encryption.h @@ -62,7 +62,7 @@ class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable { wrap_key; std::function + ::arrow::util::SecureString* out)> unwrap_key; };