From 36be975b3aa8a3ddae16462d1e1f55a76a61a8ea Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Tue, 25 Mar 2025 21:44:56 -0400 Subject: [PATCH] fix: convert to cstrings in PyString::from_object fixes #5005 This only fixes the API, and adds a test of the API, it does not deprecate the API or introduce a version which takes `&CStr` directly, this can be done later. --- newsfragments/5008.fixed.md | 1 + src/types/string.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 newsfragments/5008.fixed.md diff --git a/newsfragments/5008.fixed.md b/newsfragments/5008.fixed.md new file mode 100644 index 00000000000..6b431d6e437 --- /dev/null +++ b/newsfragments/5008.fixed.md @@ -0,0 +1 @@ +Fix `PyString::from_object`, avoid out of bounds reads by null terminating the `encoding` and `errors` parameters \ No newline at end of file diff --git a/src/types/string.rs b/src/types/string.rs index 0b0de39c681..a389f0df234 100644 --- a/src/types/string.rs +++ b/src/types/string.rs @@ -10,6 +10,7 @@ use crate::types::PyBytes; use crate::IntoPy; use crate::{ffi, Bound, Py, PyAny, PyResult, Python}; use std::borrow::Cow; +use std::ffi::CString; use std::str; /// Deprecated alias for [`PyString`]. @@ -216,6 +217,8 @@ impl PyString { encoding: &str, errors: &str, ) -> PyResult> { + let encoding = CString::new(encoding)?; + let errors = CString::new(errors)?; unsafe { ffi::PyUnicode_FromEncodedObject( src.as_ptr(), @@ -670,6 +673,31 @@ mod tests { }) } + #[test] + fn test_string_from_object() { + Python::with_gil(|py| { + let py_bytes = PyBytes::new(py, b"ab\xFFcd"); + + let py_string = PyString::from_object(&py_bytes, "utf-8", "ignore").unwrap(); + + let result = py_string.to_cow().unwrap(); + assert_eq!(result, "abcd"); + }); + } + + #[test] + fn test_string_from_obect_with_invalid_encoding_errors() { + Python::with_gil(|py| { + let py_bytes = PyBytes::new(py, b"abcd"); + + let result = PyString::from_object(&py_bytes, "utf\0-8", "ignore"); + assert!(result.is_err()); + + let result = PyString::from_object(&py_bytes, "utf-8", "ign\0ore"); + assert!(result.is_err()); + }); + } + #[test] #[cfg(not(any(Py_LIMITED_API, PyPy)))] fn test_string_data_ucs1() {