diff --git a/cranelift/bitset/src/compound.rs b/cranelift/bitset/src/compound.rs index ebb9b4e7d703..651796797c23 100644 --- a/cranelift/bitset/src/compound.rs +++ b/cranelift/bitset/src/compound.rs @@ -1,6 +1,6 @@ //! Compound bit sets. -use crate::scalar::{self, ScalarBitSet}; +use crate::scalar::{self, ScalarBitSet, ScalarBitSetStorage}; use alloc::boxed::Box; use core::{cmp, iter, mem}; @@ -45,8 +45,8 @@ use core::{cmp, iter, mem}; feature = "enable-serde", derive(serde_derive::Serialize, serde_derive::Deserialize) )] -pub struct CompoundBitSet { - elems: Box<[ScalarBitSet]>, +pub struct CompoundBitSet { + elems: Box<[ScalarBitSet]>, max: Option, } @@ -57,8 +57,6 @@ impl core::fmt::Debug for CompoundBitSet { } } -const BITS_PER_WORD: usize = mem::size_of::() * 8; - impl CompoundBitSet { /// Construct a new, empty bit set. /// @@ -75,6 +73,10 @@ impl CompoundBitSet { pub fn new() -> Self { CompoundBitSet::default() } +} + +impl CompoundBitSet { + const BITS_PER_SCALAR: usize = mem::size_of::() * 8; /// Construct a new, empty bit set with space reserved to store any element /// `x` such that `x < capacity`. @@ -86,14 +88,14 @@ impl CompoundBitSet { /// ``` /// use cranelift_bitset::CompoundBitSet; /// - /// let bitset = CompoundBitSet::with_capacity(4096); + /// let bitset = CompoundBitSet::::with_capacity(4096); /// /// assert!(bitset.is_empty()); /// assert!(bitset.capacity() >= 4096); /// ``` #[inline] pub fn with_capacity(capacity: usize) -> Self { - let mut bitset = Self::new(); + let mut bitset = Self::default(); bitset.ensure_capacity(capacity); bitset } @@ -144,7 +146,7 @@ impl CompoundBitSet { /// assert!(bitset.capacity() >= 999); ///``` pub fn capacity(&self) -> usize { - self.elems.len() * BITS_PER_WORD + self.elems.len() * Self::BITS_PER_SCALAR } /// Is this bitset empty? @@ -172,8 +174,8 @@ impl CompoundBitSet { /// `ScalarBitSet` at `self.elems[word]`. #[inline] fn word_and_bit(i: usize) -> (usize, u8) { - let word = i / BITS_PER_WORD; - let bit = i % BITS_PER_WORD; + let word = i / Self::BITS_PER_SCALAR; + let bit = i % Self::BITS_PER_SCALAR; let bit = u8::try_from(bit).unwrap(); (word, bit) } @@ -183,8 +185,8 @@ impl CompoundBitSet { #[inline] fn elem(word: usize, bit: u8) -> usize { let bit = usize::from(bit); - debug_assert!(bit < BITS_PER_WORD); - word * BITS_PER_WORD + bit + debug_assert!(bit < Self::BITS_PER_SCALAR); + word * Self::BITS_PER_SCALAR + bit } /// Is `i` contained in this bitset? @@ -461,19 +463,63 @@ impl CompoundBitSet { /// ); /// ``` #[inline] - pub fn iter(&self) -> Iter<'_> { + pub fn iter(&self) -> Iter<'_, T> { Iter { bitset: self, word: 0, sub: None, } } + + /// Returns an iterator over the words of this bit-set or the in-memory + /// representation of the bit set. + /// + /// # Example + /// + /// ``` + /// use cranelift_bitset::{CompoundBitSet, ScalarBitSet}; + /// + /// let mut bitset = CompoundBitSet::::default(); + /// + /// assert_eq!( + /// bitset.iter_scalars().collect::>(), + /// [], + /// ); + /// + /// bitset.insert(0); + /// + /// assert_eq!( + /// bitset.iter_scalars().collect::>(), + /// [ScalarBitSet(0x1)], + /// ); + /// + /// bitset.insert(1); + /// + /// assert_eq!( + /// bitset.iter_scalars().collect::>(), + /// [ScalarBitSet(0x3)], + /// ); + /// + /// bitset.insert(32); + /// + /// assert_eq!( + /// bitset.iter_scalars().collect::>(), + /// [ScalarBitSet(0x3), ScalarBitSet(0x1)], + /// ); + /// ``` + pub fn iter_scalars(&self) -> impl Iterator> + '_ { + let nwords = match self.max { + Some(n) => 1 + (n as usize / Self::BITS_PER_SCALAR), + None => 0, + }; + self.elems.iter().copied().take(nwords) + } } -impl<'a> IntoIterator for &'a CompoundBitSet { +impl<'a, T: ScalarBitSetStorage> IntoIterator for &'a CompoundBitSet { type Item = usize; - type IntoIter = Iter<'a>; + type IntoIter = Iter<'a, T>; #[inline] fn into_iter(self) -> Self::IntoIter { @@ -482,13 +528,13 @@ impl<'a> IntoIterator for &'a CompoundBitSet { } /// An iterator over the elements in a [`CompoundBitSet`]. -pub struct Iter<'a> { - bitset: &'a CompoundBitSet, +pub struct Iter<'a, T = usize> { + bitset: &'a CompoundBitSet, word: usize, - sub: Option>, + sub: Option>, } -impl Iterator for Iter<'_> { +impl Iterator for Iter<'_, T> { type Item = usize; #[inline] @@ -496,7 +542,7 @@ impl Iterator for Iter<'_> { loop { if let Some(sub) = &mut self.sub { if let Some(bit) = sub.next() { - return Some(CompoundBitSet::elem(self.word, bit)); + return Some(CompoundBitSet::::elem(self.word, bit)); } else { self.word += 1; } diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 1b9e0fb167af..8f4110350f19 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -6,7 +6,6 @@ use crate::{array_call_signature, CompiledFunction, ModuleTextBuilder}; use crate::{builder::LinkOptions, wasm_call_signature, BuiltinFunctionSignatures}; use anyhow::{Context as _, Result}; use cranelift_codegen::binemit::CodeOffset; -use cranelift_codegen::bitset::CompoundBitSet; use cranelift_codegen::ir::condcodes::IntCC; use cranelift_codegen::ir::{self, InstBuilder, MemFlags, UserExternalName, UserFuncName, Value}; use cranelift_codegen::isa::{ @@ -23,14 +22,15 @@ use std::any::Any; use std::cmp; use std::collections::HashMap; use std::mem; +use std::ops::Range; use std::path; use std::sync::{Arc, Mutex}; use wasmparser::{FuncValidatorAllocations, FunctionBody}; use wasmtime_environ::{ AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, DefinedFuncIndex, FlagValue, FunctionBodyData, FunctionLoc, HostCall, ModuleTranslation, ModuleTypesBuilder, PtrSize, - RelocationTarget, StackMapInformation, StaticModuleIndex, TrapEncodingBuilder, TrapSentinel, - TripleExt, Tunables, VMOffsets, WasmFuncType, WasmFunctionInfo, WasmValType, + RelocationTarget, StackMapSection, StaticModuleIndex, TrapEncodingBuilder, TrapSentinel, + TripleExt, Tunables, VMOffsets, WasmFuncType, WasmValType, }; #[cfg(feature = "component-model")] @@ -187,7 +187,7 @@ impl wasmtime_environ::Compiler for Compiler { func_index: DefinedFuncIndex, input: FunctionBodyData<'_>, types: &ModuleTypesBuilder, - ) -> Result<(WasmFunctionInfo, Box), CompileError> { + ) -> Result, CompileError> { let isa = &*self.isa; let module = &translation.module; let func_index = module.func_index(func_index); @@ -275,7 +275,7 @@ impl wasmtime_environ::Compiler for Compiler { &mut func_env, )?; - let (info, func) = compiler.finish_with_info( + let func = compiler.finish_with_info( Some((&body, &self.tunables)), &format!("wasm_func_{}", func_index.as_u32()), )?; @@ -284,7 +284,7 @@ impl wasmtime_environ::Compiler for Compiler { log::debug!("{:?} translated in {:?}", func_index, timing.total()); log::trace!("{:?} timing info\n{}", func_index, timing); - Ok((info, Box::new(func))) + Ok(Box::new(func)) } fn compile_array_to_wasm_trampoline( @@ -450,6 +450,7 @@ impl wasmtime_environ::Compiler for Compiler { } let mut addrs = AddressMapSection::default(); let mut traps = TrapEncodingBuilder::default(); + let mut stack_maps = StackMapSection::default(); let mut ret = Vec::with_capacity(funcs.len()); for (i, (sym, func)) in funcs.iter().enumerate() { @@ -459,6 +460,11 @@ impl wasmtime_environ::Compiler for Compiler { let addr = func.address_map(); addrs.push(range.clone(), &addr.instructions); } + clif_to_env_stack_maps( + &mut stack_maps, + range.clone(), + func.buffer.user_stack_maps(), + ); traps.push(range.clone(), &func.traps().collect::>()); builder.append_padding(self.linkopts.padding_between_functions); let info = FunctionLoc { @@ -473,6 +479,7 @@ impl wasmtime_environ::Compiler for Compiler { if self.tunables.generate_address_map { addrs.append_to(obj); } + stack_maps.append_to(obj); traps.append_to(obj); Ok(ret) @@ -963,16 +970,14 @@ impl FunctionCompiler<'_> { } fn finish(self, clif_filename: &str) -> Result { - let (info, func) = self.finish_with_info(None, clif_filename)?; - assert!(info.stack_maps.is_empty()); - Ok(func) + self.finish_with_info(None, clif_filename) } fn finish_with_info( mut self, body_and_tunables: Option<(&FunctionBody<'_>, &Tunables)>, clif_filename: &str, - ) -> Result<(WasmFunctionInfo, CompiledFunction), CompileError> { + ) -> Result { let context = &mut self.cx.codegen_context; let isa = &*self.compiler.isa; @@ -994,7 +999,7 @@ impl FunctionCompiler<'_> { write!(output, "{}", context.func.display()).unwrap(); } - let mut compiled_code = compilation_result?; + let compiled_code = compilation_result?; // Give wasm functions, user defined code, a "preferred" alignment // instead of the minimum alignment as this can help perf in niche @@ -1054,45 +1059,35 @@ impl FunctionCompiler<'_> { } } - let stack_maps = - clif_to_env_stack_maps(compiled_code.buffer.take_user_stack_maps().into_iter()); compiled_function .set_sized_stack_slots(std::mem::take(&mut context.func.sized_stack_slots)); self.compiler.contexts.lock().unwrap().push(self.cx); - Ok(( - WasmFunctionInfo { - start_srcloc: compiled_function.metadata().address_map.start_srcloc, - stack_maps: stack_maps.into(), - }, - compiled_function, - )) + Ok(compiled_function) } } /// Convert from Cranelift's representation of a stack map to Wasmtime's /// compiler-agnostic representation. +/// +/// Here `section` is the wasmtime data section being created and `range` is the +/// range of the function being added. The `clif_stack_maps` entry is the raw +/// listing of stack maps from Cranelift. fn clif_to_env_stack_maps( - clif_stack_maps: impl ExactSizeIterator, -) -> Vec { - let mut stack_maps = Vec::with_capacity(clif_stack_maps.len()); - for (code_offset, mapped_bytes, stack_map) in clif_stack_maps { - let mut bitset = CompoundBitSet::new(); - for (ty, offset) in stack_map.entries() { + section: &mut StackMapSection, + range: Range, + clif_stack_maps: &[(CodeOffset, u32, ir::UserStackMap)], +) { + for (offset, frame_size, stack_map) in clif_stack_maps { + let mut frame_offsets = Vec::new(); + for (ty, frame_offset) in stack_map.entries() { assert_eq!(ty, ir::types::I32); - bitset.insert(usize::try_from(offset).unwrap()); - } - if bitset.is_empty() { - continue; + frame_offsets.push(frame_offset); } - let stack_map = wasmtime_environ::StackMap::new(mapped_bytes, bitset); - stack_maps.push(StackMapInformation { - code_offset, - stack_map, - }); + let code_offset = range.start + u64::from(*offset); + assert!(code_offset < range.end); + section.push(code_offset, *frame_size, frame_offsets.into_iter()); } - stack_maps.sort_unstable_by_key(|info| info.code_offset); - stack_maps } fn declare_and_call( diff --git a/crates/environ/Cargo.toml b/crates/environ/Cargo.toml index dfad9a9d9d34..e85aef15b347 100644 --- a/crates/environ/Cargo.toml +++ b/crates/environ/Cargo.toml @@ -42,6 +42,11 @@ smallvec = { workspace = true, features = ['serde'] } clap = { workspace = true, features = ['default'] } env_logger = { workspace = true } wat = { workspace = true } +# Fix a test parsing ELF files internally where the bytes themselves reside in a +# `Vec` with no alignment requirements on it. By enabling the `unaligned` +# feature we don't require anything to be aligned so it doesn't matter the +# alignment of the bytes that we're reading. +object = { workspace = true, features = ['unaligned'] } [[example]] name = "factc" diff --git a/crates/environ/src/address_map.rs b/crates/environ/src/address_map.rs index a707d69b4708..fe69dcebc65c 100644 --- a/crates/environ/src/address_map.rs +++ b/crates/environ/src/address_map.rs @@ -55,7 +55,8 @@ fn parse_address_map( section: &[u8], ) -> Option<(&[U32Bytes], &[U32Bytes])> { let mut section = Bytes(section); - // NB: this matches the encoding written by `append_to` above. + // NB: this matches the encoding written by `append_to` in the + // `compile::address_map` module. let count = section.read::>().ok()?; let count = usize::try_from(count.get(LittleEndian)).ok()?; let (offsets, section) = diff --git a/crates/environ/src/compile/mod.rs b/crates/environ/src/compile/mod.rs index 5e6db773b672..9eec78b0c946 100644 --- a/crates/environ/src/compile/mod.rs +++ b/crates/environ/src/compile/mod.rs @@ -5,7 +5,7 @@ use crate::prelude::*; use crate::{obj, Tunables}; use crate::{ BuiltinFunctionIndex, DefinedFuncIndex, FlagValue, FuncIndex, FunctionLoc, ObjectKind, - PrimaryMap, StaticModuleIndex, TripleExt, WasmError, WasmFuncType, WasmFunctionInfo, + PrimaryMap, StaticModuleIndex, TripleExt, WasmError, WasmFuncType, }; use anyhow::Result; use object::write::{Object, SymbolId}; @@ -20,12 +20,14 @@ mod address_map; mod module_artifacts; mod module_environ; mod module_types; +mod stack_maps; mod trap_encoding; pub use self::address_map::*; pub use self::module_artifacts::*; pub use self::module_environ::*; pub use self::module_types::*; +pub use self::stack_maps::*; pub use self::trap_encoding::*; /// An error while compiling WebAssembly to machine code. @@ -196,7 +198,7 @@ pub trait Compiler: Send + Sync { index: DefinedFuncIndex, data: FunctionBodyData<'_>, types: &ModuleTypesBuilder, - ) -> Result<(WasmFunctionInfo, Box), CompileError>; + ) -> Result, CompileError>; /// Compile a trampoline for an array-call host function caller calling the /// `index`th Wasm function. diff --git a/crates/environ/src/compile/stack_maps.rs b/crates/environ/src/compile/stack_maps.rs new file mode 100644 index 000000000000..a02b5e7162a9 --- /dev/null +++ b/crates/environ/src/compile/stack_maps.rs @@ -0,0 +1,230 @@ +use crate::obj::ELF_WASMTIME_STACK_MAP; +use crate::prelude::*; +use cranelift_bitset::CompoundBitSet; +use object::write::{Object, StandardSegment}; +use object::{LittleEndian, SectionKind, U32Bytes}; + +/// Builder for the `ELF_WASMTIME_STACK_MAP` section in compiled executables. +/// +/// This format is parsed by `crate::stack_map`. +/// +/// The current layout of the format is: +/// +/// ```text +/// ┌─────────────────────┬───── 0x00 (relative, not necessarily aligned) +/// │ count: 4-byte LE │ +/// ├─────────────────────┼───── 0x04 +/// │ pc1: 4-byte LE │ +/// │ pc2: 4-byte LE │ +/// │ ... │ +/// │ pcN: 4-byte LE │ +/// ├─────────────────────┼───── 0x04 + 4 * count +/// │ offset1: 4-byte LE │ +/// │ offset1: 4-byte LE │ +/// │ ... │ +/// │ offsetN: 4-byte LE │ +/// ├─────────────────────┼───── 0x04 + 8 * count +/// │ data[0]: 4-byte LE │ +/// │ data[1]: 4-byte LE │ +/// │ ... │ +/// │ data[M]: 4-byte LE │ +/// └─────────────────────┴───── 0x04 + 8 * count + 4 * M +/// ``` +/// +/// Here `count` is the size of the `pcN` and `offsetN` arrays. The two arrays +/// are the same size and have corresponding entries in one another. When +/// looking up a stack map for a particular program counter: +/// +/// * A binary search is performed on the `pcN` array. +/// * The corresponding `offsetM` value is looked up once the `pcM` entry, +/// matching the lookup pc, is found. +/// * The `offsetM` value is used to access `data[offsetM]` which is an array of +/// 4-byte entries located after the `offset*` array. This stack map is then +/// encoded as below. +/// +/// This encoding scheme is chosen so parsing this data structure effectively +/// isn't required. It's usable at-rest from a compiled artifact in a section of +/// an executable. Notably having offsets into the data array means that a stack +/// map is just a slice into the data array, and the entire data structure can +/// be "parsed" by reading `count` and otherwise just making sure various +/// offsets are in-bounds. +/// +/// A stack map located at `data[offsetM]` is encoded as: +/// +/// ```text +/// ┌───────────────────────────────────────────────────────┐ +/// │ data[offsetM + 0]: frame_size: 4-byte LE │ +/// ├───────────────────────────────────────────────────────┤ +/// │ data[offsetM + 1]: count: 4-byte LE │ +/// ├───────────────────────────────────────────────────────┤ +/// │ data[offsetM + 2 + 0]: bitmap: 4-byte LE │ +/// │ data[offsetM + 2 + 1]: bitmap: 4-byte LE │ +/// │ ... │ +/// │ data[offsetM + 2 + count - 1]: bitmap: 4-byte LE │ +/// └───────────────────────────────────────────────────────┘ +/// ``` +/// +/// Here `frame_size` and `count` are always greater than 0. Entries in the bit +/// map represent `stack_slot / 4` so must be multiplied by 4 to get the actual +/// stack offset entry. This is because all stack slots are aligned at 4 bytes +/// so by dividing them all by 4 we're able to compress the bit map that much +/// more. +#[derive(Default)] +pub struct StackMapSection { + pcs: Vec>, + pointers_to_stack_map: Vec>, + stack_map_data: Vec>, + last_offset: u32, +} + +impl StackMapSection { + /// Appends stack map information for `code_offset` which has the specified + /// `frame_size` and `frame_offsets` are the active GC references. + pub fn push( + &mut self, + code_offset: u64, + frame_size: u32, + frame_offsets: impl ExactSizeIterator, + ) { + // NB: for now this only supports <=4GB text sections in object files. + // Alternative schemes will need to be created for >32-bit offsets to + // avoid making this section overly large. + let code_offset = u32::try_from(code_offset).unwrap(); + + // Sanity-check to ensure that functions are pushed in-order, otherwise + // the `pcs` array won't be sorted which is our goal. + assert!(code_offset >= self.last_offset); + self.last_offset = code_offset; + + // Skip encoding information for this code offset if there's not + // actually anything in the stack map. + if frame_offsets.len() == 0 { + return; + } + + // Record parallel entries in `pcs`/`pointers_to_stack_map`. + self.pcs.push(U32Bytes::new(LittleEndian, code_offset)); + self.pointers_to_stack_map.push(U32Bytes::new( + LittleEndian, + u32::try_from(self.stack_map_data.len()).unwrap(), + )); + + // The frame data starts with the frame size and is then followed by + // `offsets` represented as a bit set. + self.stack_map_data + .push(U32Bytes::new(LittleEndian, frame_size)); + + let mut bits = CompoundBitSet::::default(); + for offset in frame_offsets { + assert!(offset % 4 == 0); + bits.insert((offset / 4) as usize); + } + let count = bits.iter_scalars().count(); + self.stack_map_data + .push(U32Bytes::new(LittleEndian, count as u32)); + for scalar in bits.iter_scalars() { + self.stack_map_data + .push(U32Bytes::new(LittleEndian, scalar.0)); + } + } + + /// Finishes encoding this section into the `Object` provided. + pub fn append_to(self, obj: &mut Object) { + // Don't append anything for this section if there weren't any actual + // stack maps present, no need to waste space! + if self.pcs.is_empty() { + return; + } + let section = obj.add_section( + obj.segment_name(StandardSegment::Data).to_vec(), + ELF_WASMTIME_STACK_MAP.as_bytes().to_vec(), + SectionKind::ReadOnlyData, + ); + + // NB: this matches the encoding expected by `lookup` in the + // `crate::stack_maps` module. + let amt = u32::try_from(self.pcs.len()).unwrap(); + obj.append_section_data(section, &amt.to_le_bytes(), 1); + obj.append_section_data(section, object::bytes_of_slice(&self.pcs), 1); + obj.append_section_data( + section, + object::bytes_of_slice(&self.pointers_to_stack_map), + 1, + ); + obj.append_section_data(section, object::bytes_of_slice(&self.stack_map_data), 1); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::stack_map::StackMap; + use object::{Object, ObjectSection}; + + fn roundtrip(maps: &[(u64, u32, &[u32])]) { + let mut section = StackMapSection::default(); + for (pc, frame, offsets) in maps { + println!("append {pc}"); + section.push(*pc, *frame, offsets.iter().copied()); + } + let mut object = object::write::Object::new( + object::BinaryFormat::Elf, + object::Architecture::X86_64, + object::Endianness::Little, + ); + section.append_to(&mut object); + let elf = object.write().unwrap(); + + let image = object::File::parse(&elf[..]).unwrap(); + let data = image + .sections() + .find(|s| s.name().ok() == Some(ELF_WASMTIME_STACK_MAP)) + .unwrap() + .data() + .unwrap(); + + for (pc, frame, offsets) in maps { + println!("lookup {pc}"); + let map = match StackMap::lookup(*pc as u32, data) { + Some(map) => map, + None => { + assert!(offsets.is_empty()); + continue; + } + }; + assert_eq!(map.frame_size(), *frame); + + let map_offsets = map.offsets().collect::>(); + assert_eq!(map_offsets, *offsets); + } + + let mut expected = maps.iter(); + 'outer: for (pc, map) in StackMap::iter(data).unwrap() { + while let Some((expected_pc, expected_frame, expected_offsets)) = expected.next() { + if expected_offsets.is_empty() { + continue; + } + assert_eq!(*expected_pc, u64::from(pc)); + assert_eq!(*expected_frame, map.frame_size()); + let offsets = map.offsets().collect::>(); + assert_eq!(offsets, *expected_offsets); + continue 'outer; + } + panic!("didn't find {pc:#x} in expected list"); + } + assert!(expected.next().is_none()); + } + + #[test] + fn roundtrip_many() { + roundtrip(&[(0, 4, &[0])]); + roundtrip(&[ + (0, 4, &[0]), + (4, 200, &[0, 4, 20, 180]), + (200, 20, &[12]), + (600, 0, &[]), + (800, 20, &[0, 4, 8, 12, 16]), + (1200, 2000, &[1800, 1804, 1808, 1900]), + ]); + } +} diff --git a/crates/environ/src/lib.rs b/crates/environ/src/lib.rs index 511b4a73cfec..db20ec4c3e16 100644 --- a/crates/environ/src/lib.rs +++ b/crates/environ/src/lib.rs @@ -46,7 +46,7 @@ pub use crate::module_artifacts::*; pub use crate::module_types::*; pub use crate::ref_bits::*; pub use crate::scopevec::ScopeVec; -pub use crate::stack_map::StackMap; +pub use crate::stack_map::*; pub use crate::trap_encoding::*; pub use crate::tunables::*; pub use crate::types::*; diff --git a/crates/environ/src/module_artifacts.rs b/crates/environ/src/module_artifacts.rs index 2fa82e536ee2..0d41a68f84aa 100644 --- a/crates/environ/src/module_artifacts.rs +++ b/crates/environ/src/module_artifacts.rs @@ -2,9 +2,7 @@ //! with `bincode` as part of a module's compilation process. use crate::prelude::*; -use crate::{ - DefinedFuncIndex, FilePos, FuncIndex, Module, ModuleInternedTypeIndex, PrimaryMap, StackMap, -}; +use crate::{DefinedFuncIndex, FilePos, FuncIndex, Module, ModuleInternedTypeIndex, PrimaryMap}; use core::fmt; use core::ops::Range; use core::str; @@ -13,8 +11,8 @@ use serde_derive::{Deserialize, Serialize}; /// Secondary in-memory results of function compilation. #[derive(Serialize, Deserialize)] pub struct CompiledFunctionInfo { - /// The [`WasmFunctionInfo`] for this function. - pub wasm_func_info: WasmFunctionInfo, + /// Where this function was found in the original wasm file. + pub start_srcloc: FilePos, /// The [`FunctionLoc`] indicating the location of this function in the text /// section of the competition artifact. pub wasm_func_loc: FunctionLoc, @@ -22,15 +20,6 @@ pub struct CompiledFunctionInfo { pub array_to_wasm_trampoline: Option, } -/// Information about a function, such as trap information, address map, -/// and stack maps. -#[derive(Serialize, Deserialize, Default)] -#[expect(missing_docs, reason = "self-describing fields")] -pub struct WasmFunctionInfo { - pub start_srcloc: FilePos, - pub stack_maps: Box<[StackMapInformation]>, -} - /// Description of where a function is located in the text section of a /// compiled image. #[derive(Copy, Clone, Debug, Serialize, Deserialize)] @@ -42,18 +31,6 @@ pub struct FunctionLoc { pub length: u32, } -/// The offset within a function of a GC safepoint, and its associated stack -/// map. -#[derive(Serialize, Deserialize, Debug)] -pub struct StackMapInformation { - /// The offset of the GC safepoint within the function's native code. It is - /// relative to the beginning of the function. - pub code_offset: u32, - - /// The stack map for identifying live GC refs at the GC safepoint. - pub stack_map: StackMap, -} - /// Secondary in-memory results of module compilation. /// /// This opaque structure can be optionally passed back to diff --git a/crates/environ/src/obj.rs b/crates/environ/src/obj.rs index 9d29e5e6cd14..2bf0c85e6866 100644 --- a/crates/environ/src/obj.rs +++ b/crates/environ/src/obj.rs @@ -52,6 +52,17 @@ pub const SH_WASMTIME_NOT_EXECUTED: u64 = 1 << 0; /// mean that >=4gb text sections are not supported. pub const ELF_WASMTIME_ADDRMAP: &str = ".wasmtime.addrmap"; +/// A custom Wasmtime-specific section of compilation which store information +/// about live gc references at various locations in the text section (stack +/// maps). +/// +/// This section has a custom binary encoding described in `stack_maps.rs` which +/// is used to implement the single query we want to satisy of: where are the +/// live GC references at this pc? Like the addrmap section this has an +/// alignment of 1 with unaligned reads, and it additionally doesn't support +/// >=4gb text sections. +pub const ELF_WASMTIME_STACK_MAP: &str = ".wasmtime.stackmap"; + /// A custom binary-encoded section of wasmtime compilation artifacts which /// encodes the ability to map an offset in the text section to the trap code /// that it corresponds to. diff --git a/crates/environ/src/stack_map.rs b/crates/environ/src/stack_map.rs index 2737e2642655..b998be1e3b5c 100644 --- a/crates/environ/src/stack_map.rs +++ b/crates/environ/src/stack_map.rs @@ -1,22 +1,93 @@ -use cranelift_bitset::CompoundBitSet; -use serde_derive::{Deserialize, Serialize}; +use cranelift_bitset::ScalarBitSet; +use object::{Bytes, LittleEndian, U32Bytes}; + +struct StackMapSection<'a> { + pcs: &'a [U32Bytes], + pointers_to_stack_map: &'a [U32Bytes], + stack_map_data: &'a [U32Bytes], +} + +impl<'a> StackMapSection<'a> { + fn parse(section: &'a [u8]) -> Option> { + let mut section = Bytes(section); + // NB: this matches the encoding written by `append_to` in the + // `compile::stack_map` module. + let pc_count = section.read::>().ok()?; + let pc_count = usize::try_from(pc_count.get(LittleEndian)).ok()?; + let (pcs, section) = + object::slice_from_bytes::>(section.0, pc_count).ok()?; + let (pointers_to_stack_map, section) = + object::slice_from_bytes::>(section, pc_count).ok()?; + let stack_map_data = + object::slice_from_all_bytes::>(section).ok()?; + Some(StackMapSection { + pcs, + pointers_to_stack_map, + stack_map_data, + }) + } + + fn lookup(&self, pc: u32) -> Option> { + let pc_index = self + .pcs + .binary_search_by_key(&pc, |v| v.get(LittleEndian)) + .ok()?; + self.get(pc_index) + } + + fn into_iter(self) -> impl Iterator)> + 'a { + self.pcs + .iter() + .enumerate() + .map(move |(i, pc)| (pc.get(LittleEndian), self.get(i).unwrap())) + } + + /// Returns the stack map corresponding to the `i`th pc. + fn get(&self, i: usize) -> Option> { + let pointer_to_stack_map = self.pointers_to_stack_map[i].get(LittleEndian) as usize; + let data = self.stack_map_data.get(pointer_to_stack_map..)?; + + let (frame_size, data) = data.split_first()?; + let (count, data) = data.split_first()?; + let data = data.get(..count.get(LittleEndian) as usize)?; + + Some(StackMap { + frame_size: frame_size.get(LittleEndian), + data, + }) + } +} /// A map for determining where live GC references live in a stack frame. /// /// Note that this is currently primarily documented as cranelift's /// `binemit::StackMap`, so for detailed documentation about this please read /// the docs over there. -#[derive(Debug, Serialize, Deserialize)] -pub struct StackMap { - bits: CompoundBitSet, +pub struct StackMap<'a> { frame_size: u32, + data: &'a [U32Bytes], } -impl StackMap { - /// Creates a new `StackMap`, typically from a preexisting - /// `binemit::StackMap`. - pub fn new(frame_size: u32, bits: CompoundBitSet) -> StackMap { - StackMap { bits, frame_size } +impl<'a> StackMap<'a> { + /// Looks up a stack map for `pc` within the `section` provided. + /// + /// The `section` should be produced by `StackMapSection` in the + /// `compile::stack_map` module. The `pc` should be relative to the start + /// of the `.text` section in the final executable. + pub fn lookup(pc: u32, section: &'a [u8]) -> Option> { + StackMapSection::parse(section)?.lookup(pc) + } + + /// Iterate over the stack maps contained in the given stack map section. + /// + /// This function takes a `section` as its first argument which must have + /// been created with `StackMapSection` builder. This is intended to be the + /// raw `ELF_WASMTIME_STACK_MAP` section from the compilation artifact. + /// + /// The yielded offsets are relative to the start of the text section for + /// this map's code object. + pub fn iter(section: &'a [u8]) -> Option)> + 'a> { + Some(StackMapSection::parse(section)?.into_iter()) } /// Returns the byte size of this stack map's frame. @@ -43,8 +114,9 @@ impl StackMap { /// The `sp` must be the stack pointer at the code offset that this stack /// map is associated with. pub unsafe fn live_gc_refs(&self, sp: *mut usize) -> impl Iterator + '_ { - self.bits.iter().map(move |i| { + self.offsets().map(move |i| { log::trace!("Live GC ref in frame at frame offset {:#x}", i); + let i = usize::try_from(i).unwrap(); let ptr_to_gc_ref = sp.byte_add(i); // Assert that the pointer is inside this stack map's frame. @@ -57,4 +129,17 @@ impl StackMap { ptr_to_gc_ref.cast::() }) } + + /// Returns the offsets that this stack map registers GC references at. + pub fn offsets(&self) -> impl Iterator + '_ { + // Here `self.data` is a bit set of offsets divided by 4, so iterate + // over all the bits in `self.data` and multiply their position by 4. + let bit_positions = self.data.iter().enumerate().flat_map(|(i, word)| { + ScalarBitSet(word.get(LittleEndian)) + .iter() + .map(move |bit| (i as u32) * 32 + u32::from(bit)) + }); + + bit_positions.map(|pos| pos * 4) + } } diff --git a/crates/wasmtime/src/compile.rs b/crates/wasmtime/src/compile.rs index 0012823fa52a..783d64fba1b2 100644 --- a/crates/wasmtime/src/compile.rs +++ b/crates/wasmtime/src/compile.rs @@ -37,9 +37,9 @@ use std::{ use wasmtime_environ::component::Translator; use wasmtime_environ::{ BuiltinFunctionIndex, CompiledFunctionInfo, CompiledModuleInfo, Compiler, DefinedFuncIndex, - FinishedObject, FunctionBodyData, ModuleEnvironment, ModuleInternedTypeIndex, + FilePos, FinishedObject, FunctionBodyData, ModuleEnvironment, ModuleInternedTypeIndex, ModuleTranslation, ModuleTypes, ModuleTypesBuilder, ObjectKind, PrimaryMap, RelocationTarget, - StaticModuleIndex, WasmFunctionInfo, + StaticModuleIndex, }; mod code_builder; @@ -319,7 +319,7 @@ struct CompileOutput { key: CompileKey, symbol: String, function: CompiledFunction>, - info: Option, + start_srcloc: FilePos, } /// The collection of things we need to compile for a Wasm module or component. @@ -376,7 +376,7 @@ impl<'a> CompileInputs<'a> { .compile_trampoline(component, types, idx, tunables) .with_context(|| format!("failed to compile {}", trampoline.symbol_name()))? .into(), - info: None, + start_srcloc: FilePos::default(), }) }); } @@ -398,7 +398,7 @@ impl<'a> CompileInputs<'a> { key: CompileKey::resource_drop_wasm_to_array_trampoline(), function: CompiledFunction::Function(trampoline), symbol, - info: None, + start_srcloc: FilePos::default(), }) }); } @@ -470,7 +470,10 @@ impl<'a> CompileInputs<'a> { func_index.as_u32() ), }; - let (info, function) = compiler + let data = func_body.body.get_binary_reader(); + let offset = data.original_position(); + let start_srcloc = FilePos::new(u32::try_from(offset).unwrap()); + let function = compiler .compile_function(translation, def_func_index, func_body, types) .with_context(|| format!("failed to compile: {symbol}"))?; @@ -478,7 +481,7 @@ impl<'a> CompileInputs<'a> { key: CompileKey::wasm_function(module, def_func_index), symbol, function: CompiledFunction::Function(function), - info: Some(info), + start_srcloc, }) }); @@ -498,7 +501,7 @@ impl<'a> CompileInputs<'a> { key: CompileKey::array_to_wasm_trampoline(module, def_func_index), symbol, function: CompiledFunction::Function(trampoline), - info: None, + start_srcloc: FilePos::default(), }) }); } @@ -524,7 +527,7 @@ impl<'a> CompileInputs<'a> { key: CompileKey::wasm_to_array_trampoline(trampoline_type_index), function: CompiledFunction::Function(trampoline), symbol, - info: None, + start_srcloc: FilePos::default(), }) }); } @@ -570,7 +573,7 @@ fn compile_required_builtins(engine: &Engine, raw_outputs: &mut Vec, - // A map from Wasm functions' compile keys to their infos. - wasm_function_infos: HashMap, + // A map of wasm functions and where they're located in the original file. + start_srclocs: HashMap, // The index of each compiled function, bucketed by compile key kind. indices: BTreeMap>>, @@ -804,7 +805,7 @@ impl FunctionIndices { .map(|(key, wasm_func_index)| { let wasm_func_index = wasm_func_index.unwrap_function(); let wasm_func_loc = symbol_ids_and_locs[wasm_func_index].1; - let wasm_func_info = self.wasm_function_infos.remove(&key).unwrap(); + let start_srcloc = self.start_srclocs.remove(&key).unwrap(); let array_to_wasm_trampoline = array_to_wasm_trampolines .remove(&CompileKey::array_to_wasm_trampoline( @@ -814,7 +815,7 @@ impl FunctionIndices { .map(|x| symbol_ids_and_locs[x.unwrap_function()].1); CompiledFunctionInfo { - wasm_func_info, + start_srcloc, wasm_func_loc, array_to_wasm_trampoline, } diff --git a/crates/wasmtime/src/runtime/code_memory.rs b/crates/wasmtime/src/runtime/code_memory.rs index 06e51b7c3563..a185c3f7a7c8 100644 --- a/crates/wasmtime/src/runtime/code_memory.rs +++ b/crates/wasmtime/src/runtime/code_memory.rs @@ -35,6 +35,7 @@ pub struct CodeMemory { trap_data: Range, wasm_data: Range, address_map_data: Range, + stack_map_data: Range, func_name_data: Range, info_data: Range, wasm_dwarf: Range, @@ -123,6 +124,7 @@ impl CodeMemory { let mut trap_data = 0..0; let mut wasm_data = 0..0; let mut address_map_data = 0..0; + let mut stack_map_data = 0..0; let mut func_name_data = 0..0; let mut info_data = 0..0; let mut wasm_dwarf = 0..0; @@ -183,6 +185,7 @@ impl CodeMemory { crate::runtime::vm::UnwindRegistration::SECTION_NAME => unwind = range, obj::ELF_WASM_DATA => wasm_data = range, obj::ELF_WASMTIME_ADDRMAP => address_map_data = range, + obj::ELF_WASMTIME_STACK_MAP => stack_map_data = range, obj::ELF_WASMTIME_TRAPS => trap_data = range, obj::ELF_NAME_DATA => func_name_data = range, obj::ELF_WASMTIME_INFO => info_data = range, @@ -215,6 +218,7 @@ impl CodeMemory { unwind, trap_data, address_map_data, + stack_map_data, func_name_data, wasm_dwarf, info_data, @@ -265,6 +269,12 @@ impl CodeMemory { &self.mmap[self.address_map_data.clone()] } + /// Returns the encoded stack map section used to pass to + /// `wasmtime_environ::StackMap::lookup`. + pub fn stack_map_data(&self) -> &[u8] { + &self.mmap[self.stack_map_data.clone()] + } + /// Returns the contents of the `ELF_WASMTIME_INFO` section, or an empty /// slice if it wasn't found. #[inline] diff --git a/crates/wasmtime/src/runtime/instantiate.rs b/crates/wasmtime/src/runtime/instantiate.rs index c36ac3194985..201b28b56642 100644 --- a/crates/wasmtime/src/runtime/instantiate.rs +++ b/crates/wasmtime/src/runtime/instantiate.rs @@ -9,9 +9,8 @@ use crate::{code_memory::CodeMemory, profiling_agent::ProfilingAgent}; use alloc::sync::Arc; use core::str; use wasmtime_environ::{ - CompiledFunctionInfo, CompiledModuleInfo, DefinedFuncIndex, FuncIndex, FunctionLoc, - FunctionName, Metadata, Module, ModuleInternedTypeIndex, PrimaryMap, StackMapInformation, - WasmFunctionInfo, + CompiledFunctionInfo, CompiledModuleInfo, DefinedFuncIndex, FilePos, FuncIndex, FunctionLoc, + FunctionName, Metadata, Module, ModuleInternedTypeIndex, PrimaryMap, }; /// A compiled wasm module, ready to be instantiated. @@ -174,19 +173,6 @@ impl CompiledModule { &self.text()[loc.start as usize..][..loc.length as usize] } - /// Returns the stack map information for all functions defined in this - /// module. - /// - /// The iterator returned iterates over the span of the compiled function in - /// memory with the stack maps associated with those bytes. - pub fn stack_maps(&self) -> impl Iterator { - self.finished_functions().map(|(_, f)| f).zip( - self.funcs - .values() - .map(|f| &f.wasm_func_info.stack_maps[..]), - ) - } - /// Lookups a defined function by a program counter value. /// /// Returns the defined function index and the relative address of @@ -231,13 +217,10 @@ impl CompiledModule { .wasm_func_loc } - /// Gets the function information for a given function index. - pub fn wasm_func_info(&self, index: DefinedFuncIndex) -> &WasmFunctionInfo { - &self - .funcs - .get(index) - .expect("defined function should be present") - .wasm_func_info + /// Returns the original binary offset in the file that `index` was defined + /// at. + pub fn func_start_srcloc(&self, index: DefinedFuncIndex) -> FilePos { + self.funcs[index].start_srcloc } /// Creates a new symbolication context which can be used to further diff --git a/crates/wasmtime/src/runtime/module.rs b/crates/wasmtime/src/runtime/module.rs index 92641732d899..b254925572ce 100644 --- a/crates/wasmtime/src/runtime/module.rs +++ b/crates/wasmtime/src/runtime/module.rs @@ -1117,28 +1117,10 @@ impl Module { /// Lookup the stack map at a program counter value. #[cfg(feature = "gc")] - pub(crate) fn lookup_stack_map(&self, pc: usize) -> Option<&wasmtime_environ::StackMap> { - let text_offset = pc - self.inner.module.text().as_ptr() as usize; - let (index, func_offset) = self.inner.module.func_by_text_offset(text_offset)?; - let info = self.inner.module.wasm_func_info(index); - - // Do a binary search to find the stack map for the given offset. - let index = match info - .stack_maps - .binary_search_by_key(&func_offset, |i| i.code_offset) - { - // Found it. - Ok(i) => i, - - // No stack map associated with this PC. - // - // Because we know we are in Wasm code, and we must be at some kind - // of call/safepoint, then the Cranelift backend must have avoided - // emitting a stack map for this location because no refs were live. - Err(_) => return None, - }; - - Some(&info.stack_maps[index].stack_map) + pub(crate) fn lookup_stack_map(&self, pc: usize) -> Option> { + let text_offset = u32::try_from(pc - self.inner.module.text().as_ptr() as usize).unwrap(); + let info = self.inner.code.code_memory().stack_map_data(); + wasmtime_environ::StackMap::lookup(text_offset, info) } } diff --git a/crates/wasmtime/src/runtime/trap.rs b/crates/wasmtime/src/runtime/trap.rs index 413cb33f8dfc..0a05f3d8d474 100644 --- a/crates/wasmtime/src/runtime/trap.rs +++ b/crates/wasmtime/src/runtime/trap.rs @@ -423,8 +423,7 @@ impl FrameInfo { pub(crate) fn new(module: Module, text_offset: usize) -> Option { let compiled_module = module.compiled_module(); let (index, _func_offset) = compiled_module.func_by_text_offset(text_offset)?; - let info = compiled_module.wasm_func_info(index); - let func_start = info.start_srcloc; + let func_start = compiled_module.func_start_srcloc(index); let instr = wasmtime_environ::lookup_file_pos( compiled_module.code_memory().address_map_data(), text_offset, diff --git a/crates/winch/src/compiler.rs b/crates/winch/src/compiler.rs index 8ee12653aad7..7d4fae8992c9 100644 --- a/crates/winch/src/compiler.rs +++ b/crates/winch/src/compiler.rs @@ -9,7 +9,7 @@ use wasmtime_cranelift::{CompiledFunction, ModuleTextBuilder}; use wasmtime_environ::{ AddressMapSection, BuiltinFunctionIndex, CompileError, DefinedFuncIndex, FunctionBodyData, FunctionLoc, ModuleTranslation, ModuleTypesBuilder, PrimaryMap, RelocationTarget, - StaticModuleIndex, TrapEncodingBuilder, Tunables, VMOffsets, WasmFunctionInfo, + StaticModuleIndex, TrapEncodingBuilder, Tunables, VMOffsets, }; use winch_codegen::{BuiltinFunctions, CallingConvention, TargetIsa}; @@ -95,7 +95,7 @@ impl wasmtime_environ::Compiler for Compiler { index: DefinedFuncIndex, data: FunctionBodyData<'_>, types: &ModuleTypesBuilder, - ) -> Result<(WasmFunctionInfo, Box), CompileError> { + ) -> Result, CompileError> { let index = translation.module.func_index(index); let sig = translation.module.functions[index] .signature @@ -132,13 +132,7 @@ impl wasmtime_environ::Compiler for Compiler { self.emit_unwind_info(&mut func)?; } - Ok(( - WasmFunctionInfo { - start_srcloc: func.metadata().address_map.start_srcloc, - stack_maps: Box::new([]), - }, - Box::new(func), - )) + Ok(Box::new(func)) } fn compile_array_to_wasm_trampoline(