diff --git a/newsfragments/5008.fixed.md b/newsfragments/5008.fixed.md new file mode 100644 index 00000000000..6b431d6e437 --- /dev/null +++ b/newsfragments/5008.fixed.md @@ -0,0 +1 @@ +Fix `PyString::from_object`, avoid out of bounds reads by null terminating the `encoding` and `errors` parameters \ No newline at end of file diff --git a/src/types/string.rs b/src/types/string.rs index 0b0de39c681..a389f0df234 100644 --- a/src/types/string.rs +++ b/src/types/string.rs @@ -10,6 +10,7 @@ use crate::types::PyBytes; use crate::IntoPy; use crate::{ffi, Bound, Py, PyAny, PyResult, Python}; use std::borrow::Cow; +use std::ffi::CString; use std::str; /// Deprecated alias for [`PyString`]. @@ -216,6 +217,8 @@ impl PyString { encoding: &str, errors: &str, ) -> PyResult> { + let encoding = CString::new(encoding)?; + let errors = CString::new(errors)?; unsafe { ffi::PyUnicode_FromEncodedObject( src.as_ptr(), @@ -670,6 +673,31 @@ mod tests { }) } + #[test] + fn test_string_from_object() { + Python::with_gil(|py| { + let py_bytes = PyBytes::new(py, b"ab\xFFcd"); + + let py_string = PyString::from_object(&py_bytes, "utf-8", "ignore").unwrap(); + + let result = py_string.to_cow().unwrap(); + assert_eq!(result, "abcd"); + }); + } + + #[test] + fn test_string_from_obect_with_invalid_encoding_errors() { + Python::with_gil(|py| { + let py_bytes = PyBytes::new(py, b"abcd"); + + let result = PyString::from_object(&py_bytes, "utf\0-8", "ignore"); + assert!(result.is_err()); + + let result = PyString::from_object(&py_bytes, "utf-8", "ign\0ore"); + assert!(result.is_err()); + }); + } + #[test] #[cfg(not(any(Py_LIMITED_API, PyPy)))] fn test_string_data_ucs1() {