From 3a3227ccd921214a97461bf5cfd578d1cf59deaf Mon Sep 17 00:00:00 2001 From: Martin Habovstiak Date: Thu, 29 Jan 2026 14:23:14 +0100 Subject: [PATCH] Implement `TryFrom` for `String` Being able to generically convert strings can be beneficial for argument parsing code and similar situations especially in case of conditional conversions. The standard library already provided this converion, just not via a trait. This commit fills the gap by adding the impl. This addition was approved in [ACP 732]. It was requested that `FromUtf8Error` should be made generic over the input and this commit obeys the request. However some challenges were encountered: * The fields were private and the type had no constructor - solved by making the fields public but unstable and `#[doc(hidden)]`. * There is a method to perform lossy conversion and it looks like it should be provided for `OsString` as well - this is not yet resolved. * `into_os_string` method was requested but the types are in different crates - solved with `#[rustc_allow_incoherent_impl]`. [ACP 732]: https://github.com/rust-lang/libs-team/issues/732 --- library/alloc/src/string.rs | 25 ++++++++++++++--------- library/std/src/ffi/os_str.rs | 38 +++++++++++++++++++++++++++++++++++ library/std/src/lib.rs | 1 + 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index 4100ee55a4c7b..7fefb36893318 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -386,11 +386,16 @@ pub struct String { /// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); /// ``` #[stable(feature = "rust1", since = "1.0.0")] +#[rustc_has_incoherent_inherent_impls] #[cfg_attr(not(no_global_oom_handling), derive(Clone))] #[derive(Debug, PartialEq, Eq)] -pub struct FromUtf8Error { - bytes: Vec, - error: Utf8Error, +pub struct FromUtf8Error> { + #[doc(hidden)] + #[unstable(feature = "from_utf8_error_internals", issue = "none")] + pub input: Input, + #[doc(hidden)] + #[unstable(feature = "from_utf8_error_internals", issue = "none")] + pub error: Utf8Error, } /// A possible error value when converting a `String` from a UTF-16 byte slice. @@ -560,7 +565,7 @@ impl String { pub fn from_utf8(vec: Vec) -> Result { match str::from_utf8(&vec) { Ok(..) => Ok(String { vec }), - Err(e) => Err(FromUtf8Error { bytes: vec, error: e }), + Err(e) => Err(FromUtf8Error { input: vec, error: e }), } } @@ -2224,7 +2229,7 @@ impl FromUtf8Error { #[must_use] #[stable(feature = "from_utf8_error_as_bytes", since = "1.26.0")] pub fn as_bytes(&self) -> &[u8] { - &self.bytes[..] + &self.input[..] } /// Converts the bytes into a `String` lossily, substituting invalid UTF-8 @@ -2251,11 +2256,11 @@ impl FromUtf8Error { const REPLACEMENT: &str = "\u{FFFD}"; let mut res = { - let mut v = Vec::with_capacity(self.bytes.len()); + let mut v = Vec::with_capacity(self.input.len()); // `Utf8Error::valid_up_to` returns the maximum index of validated // UTF-8 bytes. Copy the valid bytes into the output buffer. - v.extend_from_slice(&self.bytes[..self.error.valid_up_to()]); + v.extend_from_slice(&self.input[..self.error.valid_up_to()]); // SAFETY: This is safe because the only bytes present in the buffer // were validated as UTF-8 by the call to `String::from_utf8` which @@ -2263,7 +2268,7 @@ impl FromUtf8Error { unsafe { String::from_utf8_unchecked(v) } }; - let iter = self.bytes[self.error.valid_up_to()..].utf8_chunks(); + let iter = self.input[self.error.valid_up_to()..].utf8_chunks(); for chunk in iter { res.push_str(chunk.valid()); @@ -2294,9 +2299,11 @@ impl FromUtf8Error { #[must_use = "`self` will be dropped if the result is not used"] #[stable(feature = "rust1", since = "1.0.0")] pub fn into_bytes(self) -> Vec { - self.bytes + self.input } +} +impl FromUtf8Error { /// Fetch a `Utf8Error` to get more details about the conversion failure. /// /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 4e4d377ae2708..da714477ebaaa 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -11,6 +11,7 @@ use crate::hash::{Hash, Hasher}; use crate::ops::{self, Range}; use crate::rc::Rc; use crate::str::FromStr; +use crate::string::FromUtf8Error; use crate::sync::Arc; use crate::sys::os_str::{Buf, Slice}; use crate::sys::{AsInner, FromInner, IntoInner}; @@ -616,6 +617,43 @@ impl From for OsString { } } +#[stable(feature = "tryfrom_os_string_for_string", since = "CURRENT_RUSTC_VERSION")] +impl TryFrom for String { + type Error = FromUtf8Error; + + /// Attempts to convert an [`OsString`] into a [`String`]. + /// + /// This conversion does not allocate or copy memory. + fn try_from(s: OsString) -> Result { + unsafe { + match s.as_os_str().inner.to_str() { + Ok(_) => Ok(String::from_utf8_unchecked(s.into_encoded_bytes())), + Err(error) => Err(FromUtf8Error { input: s, error }), + } + } + } +} + +impl FromUtf8Error { + /// Returns an [`OsStr`] slice that was attempted to convert to a `String`. + #[stable(feature = "tryfrom_os_string_for_string", since = "CURRENT_RUSTC_VERSION")] + #[rustc_allow_incoherent_impl] + pub fn as_os_str(&self) -> &OsStr { + &self.input[..] + } + + /// Returns the [`OsString`] that was attempted to convert to a `String`. + /// + /// This method is carefully constructed to avoid allocation. It will + /// consume the error, moving out the string, so that a copy of the string + /// does not need to be made. + #[stable(feature = "tryfrom_os_string_for_string", since = "CURRENT_RUSTC_VERSION")] + #[rustc_allow_incoherent_impl] + pub fn into_os_string(self) -> OsString { + self.input + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl> From<&T> for OsString { /// Copies any value implementing [AsRef]<[OsStr]> diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index b213fa7491775..1b48a4436a3dc 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -287,6 +287,7 @@ #![feature(f128)] #![feature(ffi_const)] #![feature(formatting_options)] +#![feature(from_utf8_error_internals)] #![feature(funnel_shifts)] #![feature(if_let_guard)] #![feature(intra_doc_pointers)]