Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions library/alloc/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,11 +386,16 @@ pub struct String {
/// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes());
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_has_incoherent_inherent_impls]
#[cfg_attr(not(no_global_oom_handling), derive(Clone))]
#[derive(Debug, PartialEq, Eq)]
pub struct FromUtf8Error {
bytes: Vec<u8>,
error: Utf8Error,
pub struct FromUtf8Error<Input = Vec<u8>> {
#[doc(hidden)]
#[unstable(feature = "from_utf8_error_internals", issue = "none")]
pub input: Input,
#[doc(hidden)]
#[unstable(feature = "from_utf8_error_internals", issue = "none")]
pub error: Utf8Error,
}

/// A possible error value when converting a `String` from a UTF-16 byte slice.
Expand Down Expand Up @@ -560,7 +565,7 @@ impl String {
pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> {
match str::from_utf8(&vec) {
Ok(..) => Ok(String { vec }),
Err(e) => Err(FromUtf8Error { bytes: vec, error: e }),
Err(e) => Err(FromUtf8Error { input: vec, error: e }),
}
}

Expand Down Expand Up @@ -2224,7 +2229,7 @@ impl FromUtf8Error {
#[must_use]
#[stable(feature = "from_utf8_error_as_bytes", since = "1.26.0")]
pub fn as_bytes(&self) -> &[u8] {
&self.bytes[..]
&self.input[..]
}

/// Converts the bytes into a `String` lossily, substituting invalid UTF-8
Expand All @@ -2251,19 +2256,19 @@ impl FromUtf8Error {
const REPLACEMENT: &str = "\u{FFFD}";

let mut res = {
let mut v = Vec::with_capacity(self.bytes.len());
let mut v = Vec::with_capacity(self.input.len());

// `Utf8Error::valid_up_to` returns the maximum index of validated
// UTF-8 bytes. Copy the valid bytes into the output buffer.
v.extend_from_slice(&self.bytes[..self.error.valid_up_to()]);
v.extend_from_slice(&self.input[..self.error.valid_up_to()]);

// SAFETY: This is safe because the only bytes present in the buffer
// were validated as UTF-8 by the call to `String::from_utf8` which
// produced this `FromUtf8Error`.
unsafe { String::from_utf8_unchecked(v) }
};

let iter = self.bytes[self.error.valid_up_to()..].utf8_chunks();
let iter = self.input[self.error.valid_up_to()..].utf8_chunks();

for chunk in iter {
res.push_str(chunk.valid());
Expand Down Expand Up @@ -2294,9 +2299,11 @@ impl FromUtf8Error {
#[must_use = "`self` will be dropped if the result is not used"]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn into_bytes(self) -> Vec<u8> {
self.bytes
self.input
}
}

impl<T> FromUtf8Error<T> {
/// Fetch a `Utf8Error` to get more details about the conversion failure.
///
/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may
Expand Down
38 changes: 38 additions & 0 deletions library/std/src/ffi/os_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::hash::{Hash, Hasher};
use crate::ops::{self, Range};
use crate::rc::Rc;
use crate::str::FromStr;
use crate::string::FromUtf8Error;
use crate::sync::Arc;
use crate::sys::os_str::{Buf, Slice};
use crate::sys::{AsInner, FromInner, IntoInner};
Expand Down Expand Up @@ -616,6 +617,43 @@ impl From<String> for OsString {
}
}

#[stable(feature = "tryfrom_os_string_for_string", since = "CURRENT_RUSTC_VERSION")]
impl TryFrom<OsString> for String {
type Error = FromUtf8Error<OsString>;

/// Attempts to convert an [`OsString`] into a [`String`].
///
/// This conversion does not allocate or copy memory.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does technically copy memory, just not the contents of the OsString.

The docs should probably be based on the docs of OsString::into_string:

/// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
///
/// On failure, ownership of the original `OsString` is returned.
///
/// # Examples
///
/// ```
/// use std::ffi::OsString;
///
/// let os_string = OsString::from("foo");
/// let string = os_string.into_string();
/// assert_eq!(string, Ok(String::from("foo")));
/// ```

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I copied it from some other method that says the same thing in such scenario. I will look into it.

fn try_from(s: OsString) -> Result<Self, Self::Error> {
unsafe {
match s.as_os_str().inner.to_str() {
Ok(_) => Ok(String::from_utf8_unchecked(s.into_encoded_bytes())),
Err(error) => Err(FromUtf8Error { input: s, error }),
}
}
}
}

impl FromUtf8Error<OsString> {
/// Returns an [`OsStr`] slice that was attempted to convert to a `String`.
#[stable(feature = "tryfrom_os_string_for_string", since = "CURRENT_RUSTC_VERSION")]
#[rustc_allow_incoherent_impl]
pub fn as_os_str(&self) -> &OsStr {
&self.input[..]
}

/// Returns the [`OsString`] that was attempted to convert to a `String`.
///
/// This method is carefully constructed to avoid allocation. It will
/// consume the error, moving out the string, so that a copy of the string
/// does not need to be made.
#[stable(feature = "tryfrom_os_string_for_string", since = "CURRENT_RUSTC_VERSION")]
#[rustc_allow_incoherent_impl]
pub fn into_os_string(self) -> OsString {
self.input
}
Comment on lines +638 to +654
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These can be added unstably (unlike traits). I think it will probably be fine to put up a stabilization PR shortly after this merges.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

technically trait impls can be unstable too (afaik they actually work now), but idk if it's standard practice to use that yet.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎉 that's news to me! I think we may as well try it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see https://rustc-dev-guide.rust-lang.org/stability.html#unstable_feature_bound for docs.
You'll need to create a tracking issue, and replace the below NNN with that issue number.

#[unstable(feature = "tryfrom_os_string_for_string", issue = "NNN")]
#[unstable_feature_bound(tryfrom_os_string_for_string)]
impl TryFrom<OsString> for String {
    ...
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, you'll want to add #[must_use = ...] to those methods to match the methods on FromUtf8Error<Vec<u8>>

}

#[stable(feature = "rust1", since = "1.0.0")]
impl<T: ?Sized + AsRef<OsStr>> From<&T> for OsString {
/// Copies any value implementing <code>[AsRef]&lt;[OsStr]&gt;</code>
Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@
#![feature(f128)]
#![feature(ffi_const)]
#![feature(formatting_options)]
#![feature(from_utf8_error_internals)]
#![feature(funnel_shifts)]
#![feature(if_let_guard)]
#![feature(intra_doc_pointers)]
Expand Down
Loading