diff --git a/regex-automata/Cargo.toml b/regex-automata/Cargo.toml index 40a0ebfb9e..3e0abcf985 100644 --- a/regex-automata/Cargo.toml +++ b/regex-automata/Cargo.toml @@ -87,6 +87,7 @@ aho-corasick = { version = "1.0.0", optional = true, default-features = false } log = { version = "0.4.14", optional = true } memchr = { version = "2.6.0", optional = true, default-features = false } regex-syntax = { path = "../regex-syntax", version = "0.8.2", optional = true, default-features = false } +zerocopy = { version = "0.7.32", default-features = false, features = ["derive"] } [dev-dependencies] anyhow = "1.0.69" diff --git a/regex-automata/src/dfa/accel.rs b/regex-automata/src/dfa/accel.rs index c0ba18ea89..6120d92299 100644 --- a/regex-automata/src/dfa/accel.rs +++ b/regex-automata/src/dfa/accel.rs @@ -53,6 +53,8 @@ #[cfg(feature = "dfa-build")] use alloc::{vec, vec::Vec}; +use zerocopy::{AsBytes, FromBytes}; + use crate::util::{ int::Pointer, memchr, @@ -208,15 +210,9 @@ impl<'a> Accels<&'a [AccelTy]> { wire::check_alignment::(slice)?; let accel_tys = &slice[..accel_tys_bytes_len]; slice = &slice[accel_tys_bytes_len..]; - // SAFETY: We've checked the length and alignment above, and since - // slice is just bytes and AccelTy is just a u32, we can safely cast to - // a slice of &[AccelTy]. - let accels = unsafe { - core::slice::from_raw_parts( - accel_tys.as_ptr().cast::(), - accel_tys_len, - ) - }; + // PANICS: We've just checked the length and alignment, so this is + // guaranteed to succeed. + let accels = ::slice_from(accel_tys).unwrap(); Ok((Accels { accels }, slice.as_ptr().as_usize() - slice_start)) } } @@ -235,15 +231,7 @@ impl> Accels { /// Return the bytes representing the serialization of the accelerators. pub fn as_bytes(&self) -> &[u8] { - let accels = self.accels.as_ref(); - // SAFETY: This is safe because accels is a just a slice of AccelTy, - // and u8 always has a smaller alignment. - unsafe { - core::slice::from_raw_parts( - accels.as_ptr().cast::(), - accels.len() * ACCEL_TY_SIZE, - ) - } + self.accels.as_ref().as_bytes() } /// Returns the memory usage, in bytes, of these accelerators. diff --git a/regex-automata/src/util/primitives.rs b/regex-automata/src/util/primitives.rs index 5c5d187b0e..2c5929d641 100644 --- a/regex-automata/src/util/primitives.rs +++ b/regex-automata/src/util/primitives.rs @@ -34,6 +34,8 @@ use core::num::NonZeroUsize; #[cfg(feature = "alloc")] use alloc::vec::Vec; +use zerocopy::{AsBytes, FromBytes, FromZeroes}; + use crate::util::int::{Usize, U16, U32, U64}; /// A `usize` that can never be `usize::MAX`. @@ -138,7 +140,18 @@ impl core::fmt::Debug for NonMaxUsize { /// an invalid value can be done in entirely safe code. This may in turn result /// in panics or silent logical errors. #[derive( - Clone, Copy, Debug, Default, Eq, Hash, PartialEq, PartialOrd, Ord, + Clone, + Copy, + Debug, + Default, + Eq, + Hash, + PartialEq, + PartialOrd, + Ord, + FromZeroes, + FromBytes, + AsBytes, )] #[repr(transparent)] pub struct SmallIndex(u32); @@ -746,7 +759,19 @@ pub struct PatternID(SmallIndex); /// /// See the [`SmallIndex`] type for more information about what it means for /// a state ID to be a "small index." -#[derive(Clone, Copy, Default, Eq, Hash, PartialEq, PartialOrd, Ord)] +#[derive( + Clone, + Copy, + Default, + Eq, + Hash, + PartialEq, + PartialOrd, + Ord, + FromZeroes, + FromBytes, + AsBytes, +)] #[repr(transparent)] pub struct StateID(SmallIndex); diff --git a/regex-automata/src/util/wire.rs b/regex-automata/src/util/wire.rs index b1351c7e91..ffca87f8c1 100644 --- a/regex-automata/src/util/wire.rs +++ b/regex-automata/src/util/wire.rs @@ -45,6 +45,7 @@ use core::{cmp, mem::size_of}; #[cfg(feature = "alloc")] use alloc::{vec, vec::Vec}; +use zerocopy::{AsBytes, FromBytes}; use crate::util::{ int::Pointer, @@ -268,32 +269,16 @@ impl core::fmt::Display for DeserializeError { /// Safely converts a `&[u32]` to `&[StateID]` with zero cost. #[cfg_attr(feature = "perf-inline", inline(always))] pub(crate) fn u32s_to_state_ids(slice: &[u32]) -> &[StateID] { - // SAFETY: This is safe because StateID is defined to have the same memory - // representation as a u32 (it is repr(transparent)). While not every u32 - // is a "valid" StateID, callers are not permitted to rely on the validity - // of StateIDs for memory safety. It can only lead to logical errors. (This - // is why StateID::new_unchecked is safe.) - unsafe { - core::slice::from_raw_parts( - slice.as_ptr().cast::(), - slice.len(), - ) - } + // PANICS: This is guaranteed to succeed since `u32` and `StateID` have the + // same size and alignment. + StateID::slice_from(slice.as_bytes()).unwrap() } /// Safely converts a `&mut [u32]` to `&mut [StateID]` with zero cost. pub(crate) fn u32s_to_state_ids_mut(slice: &mut [u32]) -> &mut [StateID] { - // SAFETY: This is safe because StateID is defined to have the same memory - // representation as a u32 (it is repr(transparent)). While not every u32 - // is a "valid" StateID, callers are not permitted to rely on the validity - // of StateIDs for memory safety. It can only lead to logical errors. (This - // is why StateID::new_unchecked is safe.) - unsafe { - core::slice::from_raw_parts_mut( - slice.as_mut_ptr().cast::(), - slice.len(), - ) - } + // PANICS: This is guaranteed to succeed since `u32` and `StateID` have the + // same size and alignment. + StateID::mut_slice_from(slice.as_bytes_mut()).unwrap() } /// Safely converts a `&[u32]` to `&[PatternID]` with zero cost.