Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 66 additions & 20 deletions cranelift/bitset/src/compound.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Compound bit sets.

use crate::scalar::{self, ScalarBitSet};
use crate::scalar::{self, ScalarBitSet, ScalarBitSetStorage};
use alloc::boxed::Box;
use core::{cmp, iter, mem};

Expand Down Expand Up @@ -45,8 +45,8 @@ use core::{cmp, iter, mem};
feature = "enable-serde",
derive(serde_derive::Serialize, serde_derive::Deserialize)
)]
pub struct CompoundBitSet {
elems: Box<[ScalarBitSet<usize>]>,
pub struct CompoundBitSet<T = usize> {
elems: Box<[ScalarBitSet<T>]>,
max: Option<u32>,
}
Comment on lines +48 to 51
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea here is that you want to use something of fixed size, rather than usize, for the bitsets serialized into the stack map section?

It shouldn't actually matter, since we can only ever run .cwasms that have the same size usize as the host (whether native or pulley) right? Given that, is it worth the complications that become necessary here to support non-usize scalars inside a compound bit set?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or alternatively, should we just fix the compound bit set's internal scalar bit set to u64 or u32 scalars, instead of usize?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an attempt to multiplex the (IMO correct) default behavior of pointer-sized-by-default with the cross-compilation behavior of "always use fixed-size things as it's easier". I don't want to use usize in the compiled ELF as that makes cross-compilation and debugging cross-compiled artifacts a bit harder.


Expand All @@ -57,8 +57,6 @@ impl core::fmt::Debug for CompoundBitSet {
}
}

const BITS_PER_WORD: usize = mem::size_of::<usize>() * 8;

impl CompoundBitSet {
/// Construct a new, empty bit set.
///
Expand All @@ -75,6 +73,10 @@ impl CompoundBitSet {
pub fn new() -> Self {
CompoundBitSet::default()
}
}

impl<T: ScalarBitSetStorage> CompoundBitSet<T> {
const BITS_PER_SCALAR: usize = mem::size_of::<T>() * 8;

/// Construct a new, empty bit set with space reserved to store any element
/// `x` such that `x < capacity`.
Expand All @@ -86,14 +88,14 @@ impl CompoundBitSet {
/// ```
/// use cranelift_bitset::CompoundBitSet;
///
/// let bitset = CompoundBitSet::with_capacity(4096);
/// let bitset = CompoundBitSet::<u32>::with_capacity(4096);
///
/// assert!(bitset.is_empty());
/// assert!(bitset.capacity() >= 4096);
/// ```
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
let mut bitset = Self::new();
let mut bitset = Self::default();
bitset.ensure_capacity(capacity);
bitset
}
Expand Down Expand Up @@ -144,7 +146,7 @@ impl CompoundBitSet {
/// assert!(bitset.capacity() >= 999);
///```
pub fn capacity(&self) -> usize {
self.elems.len() * BITS_PER_WORD
self.elems.len() * Self::BITS_PER_SCALAR
}

/// Is this bitset empty?
Expand Down Expand Up @@ -172,8 +174,8 @@ impl CompoundBitSet {
/// `ScalarBitSet<usize>` at `self.elems[word]`.
#[inline]
fn word_and_bit(i: usize) -> (usize, u8) {
let word = i / BITS_PER_WORD;
let bit = i % BITS_PER_WORD;
let word = i / Self::BITS_PER_SCALAR;
let bit = i % Self::BITS_PER_SCALAR;
let bit = u8::try_from(bit).unwrap();
(word, bit)
}
Expand All @@ -183,8 +185,8 @@ impl CompoundBitSet {
#[inline]
fn elem(word: usize, bit: u8) -> usize {
let bit = usize::from(bit);
debug_assert!(bit < BITS_PER_WORD);
word * BITS_PER_WORD + bit
debug_assert!(bit < Self::BITS_PER_SCALAR);
word * Self::BITS_PER_SCALAR + bit
}

/// Is `i` contained in this bitset?
Expand Down Expand Up @@ -461,19 +463,63 @@ impl CompoundBitSet {
/// );
/// ```
#[inline]
pub fn iter(&self) -> Iter<'_> {
pub fn iter(&self) -> Iter<'_, T> {
Iter {
bitset: self,
word: 0,
sub: None,
}
}

/// Returns an iterator over the words of this bit-set or the in-memory
/// representation of the bit set.
///
/// # Example
///
/// ```
/// use cranelift_bitset::{CompoundBitSet, ScalarBitSet};
///
/// let mut bitset = CompoundBitSet::<u32>::default();
///
/// assert_eq!(
/// bitset.iter_scalars().collect::<Vec<_>>(),
/// [],
/// );
///
/// bitset.insert(0);
///
/// assert_eq!(
/// bitset.iter_scalars().collect::<Vec<_>>(),
/// [ScalarBitSet(0x1)],
/// );
///
/// bitset.insert(1);
///
/// assert_eq!(
/// bitset.iter_scalars().collect::<Vec<_>>(),
/// [ScalarBitSet(0x3)],
/// );
///
/// bitset.insert(32);
///
/// assert_eq!(
/// bitset.iter_scalars().collect::<Vec<_>>(),
/// [ScalarBitSet(0x3), ScalarBitSet(0x1)],
/// );
/// ```
pub fn iter_scalars(&self) -> impl Iterator<Item = ScalarBitSet<T>> + '_ {
let nwords = match self.max {
Some(n) => 1 + (n as usize / Self::BITS_PER_SCALAR),
None => 0,
};
self.elems.iter().copied().take(nwords)
}
}

impl<'a> IntoIterator for &'a CompoundBitSet {
impl<'a, T: ScalarBitSetStorage> IntoIterator for &'a CompoundBitSet<T> {
type Item = usize;

type IntoIter = Iter<'a>;
type IntoIter = Iter<'a, T>;

#[inline]
fn into_iter(self) -> Self::IntoIter {
Expand All @@ -482,21 +528,21 @@ impl<'a> IntoIterator for &'a CompoundBitSet {
}

/// An iterator over the elements in a [`CompoundBitSet`].
pub struct Iter<'a> {
bitset: &'a CompoundBitSet,
pub struct Iter<'a, T = usize> {
bitset: &'a CompoundBitSet<T>,
word: usize,
sub: Option<scalar::Iter<usize>>,
sub: Option<scalar::Iter<T>>,
}

impl Iterator for Iter<'_> {
impl<T: ScalarBitSetStorage> Iterator for Iter<'_, T> {
type Item = usize;

#[inline]
fn next(&mut self) -> Option<usize> {
loop {
if let Some(sub) = &mut self.sub {
if let Some(bit) = sub.next() {
return Some(CompoundBitSet::elem(self.word, bit));
return Some(CompoundBitSet::<T>::elem(self.word, bit));
} else {
self.word += 1;
}
Expand Down
69 changes: 32 additions & 37 deletions crates/cranelift/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ use crate::{array_call_signature, CompiledFunction, ModuleTextBuilder};
use crate::{builder::LinkOptions, wasm_call_signature, BuiltinFunctionSignatures};
use anyhow::{Context as _, Result};
use cranelift_codegen::binemit::CodeOffset;
use cranelift_codegen::bitset::CompoundBitSet;
use cranelift_codegen::ir::condcodes::IntCC;
use cranelift_codegen::ir::{self, InstBuilder, MemFlags, UserExternalName, UserFuncName, Value};
use cranelift_codegen::isa::{
Expand All @@ -23,14 +22,15 @@ use std::any::Any;
use std::cmp;
use std::collections::HashMap;
use std::mem;
use std::ops::Range;
use std::path;
use std::sync::{Arc, Mutex};
use wasmparser::{FuncValidatorAllocations, FunctionBody};
use wasmtime_environ::{
AddressMapSection, BuiltinFunctionIndex, CacheStore, CompileError, DefinedFuncIndex, FlagValue,
FunctionBodyData, FunctionLoc, HostCall, ModuleTranslation, ModuleTypesBuilder, PtrSize,
RelocationTarget, StackMapInformation, StaticModuleIndex, TrapEncodingBuilder, TrapSentinel,
TripleExt, Tunables, VMOffsets, WasmFuncType, WasmFunctionInfo, WasmValType,
RelocationTarget, StackMapSection, StaticModuleIndex, TrapEncodingBuilder, TrapSentinel,
TripleExt, Tunables, VMOffsets, WasmFuncType, WasmValType,
};

#[cfg(feature = "component-model")]
Expand Down Expand Up @@ -187,7 +187,7 @@ impl wasmtime_environ::Compiler for Compiler {
func_index: DefinedFuncIndex,
input: FunctionBodyData<'_>,
types: &ModuleTypesBuilder,
) -> Result<(WasmFunctionInfo, Box<dyn Any + Send>), CompileError> {
) -> Result<Box<dyn Any + Send>, CompileError> {
let isa = &*self.isa;
let module = &translation.module;
let func_index = module.func_index(func_index);
Expand Down Expand Up @@ -275,7 +275,7 @@ impl wasmtime_environ::Compiler for Compiler {
&mut func_env,
)?;

let (info, func) = compiler.finish_with_info(
let func = compiler.finish_with_info(
Some((&body, &self.tunables)),
&format!("wasm_func_{}", func_index.as_u32()),
)?;
Expand All @@ -284,7 +284,7 @@ impl wasmtime_environ::Compiler for Compiler {
log::debug!("{:?} translated in {:?}", func_index, timing.total());
log::trace!("{:?} timing info\n{}", func_index, timing);

Ok((info, Box::new(func)))
Ok(Box::new(func))
}

fn compile_array_to_wasm_trampoline(
Expand Down Expand Up @@ -450,6 +450,7 @@ impl wasmtime_environ::Compiler for Compiler {
}
let mut addrs = AddressMapSection::default();
let mut traps = TrapEncodingBuilder::default();
let mut stack_maps = StackMapSection::default();

let mut ret = Vec::with_capacity(funcs.len());
for (i, (sym, func)) in funcs.iter().enumerate() {
Expand All @@ -459,6 +460,11 @@ impl wasmtime_environ::Compiler for Compiler {
let addr = func.address_map();
addrs.push(range.clone(), &addr.instructions);
}
clif_to_env_stack_maps(
&mut stack_maps,
range.clone(),
func.buffer.user_stack_maps(),
);
traps.push(range.clone(), &func.traps().collect::<Vec<_>>());
builder.append_padding(self.linkopts.padding_between_functions);
let info = FunctionLoc {
Expand All @@ -473,6 +479,7 @@ impl wasmtime_environ::Compiler for Compiler {
if self.tunables.generate_address_map {
addrs.append_to(obj);
}
stack_maps.append_to(obj);
traps.append_to(obj);

Ok(ret)
Expand Down Expand Up @@ -963,16 +970,14 @@ impl FunctionCompiler<'_> {
}

fn finish(self, clif_filename: &str) -> Result<CompiledFunction, CompileError> {
let (info, func) = self.finish_with_info(None, clif_filename)?;
assert!(info.stack_maps.is_empty());
Ok(func)
self.finish_with_info(None, clif_filename)
}

fn finish_with_info(
mut self,
body_and_tunables: Option<(&FunctionBody<'_>, &Tunables)>,
clif_filename: &str,
) -> Result<(WasmFunctionInfo, CompiledFunction), CompileError> {
) -> Result<CompiledFunction, CompileError> {
let context = &mut self.cx.codegen_context;
let isa = &*self.compiler.isa;

Expand All @@ -994,7 +999,7 @@ impl FunctionCompiler<'_> {
write!(output, "{}", context.func.display()).unwrap();
}

let mut compiled_code = compilation_result?;
let compiled_code = compilation_result?;

// Give wasm functions, user defined code, a "preferred" alignment
// instead of the minimum alignment as this can help perf in niche
Expand Down Expand Up @@ -1054,45 +1059,35 @@ impl FunctionCompiler<'_> {
}
}

let stack_maps =
clif_to_env_stack_maps(compiled_code.buffer.take_user_stack_maps().into_iter());
compiled_function
.set_sized_stack_slots(std::mem::take(&mut context.func.sized_stack_slots));
self.compiler.contexts.lock().unwrap().push(self.cx);

Ok((
WasmFunctionInfo {
start_srcloc: compiled_function.metadata().address_map.start_srcloc,
stack_maps: stack_maps.into(),
},
compiled_function,
))
Ok(compiled_function)
}
}

/// Convert from Cranelift's representation of a stack map to Wasmtime's
/// compiler-agnostic representation.
///
/// Here `section` is the wasmtime data section being created and `range` is the
/// range of the function being added. The `clif_stack_maps` entry is the raw
/// listing of stack maps from Cranelift.
fn clif_to_env_stack_maps(
clif_stack_maps: impl ExactSizeIterator<Item = (CodeOffset, u32, ir::UserStackMap)>,
) -> Vec<StackMapInformation> {
let mut stack_maps = Vec::with_capacity(clif_stack_maps.len());
for (code_offset, mapped_bytes, stack_map) in clif_stack_maps {
let mut bitset = CompoundBitSet::new();
for (ty, offset) in stack_map.entries() {
section: &mut StackMapSection,
range: Range<u64>,
clif_stack_maps: &[(CodeOffset, u32, ir::UserStackMap)],
) {
for (offset, frame_size, stack_map) in clif_stack_maps {
let mut frame_offsets = Vec::new();
for (ty, frame_offset) in stack_map.entries() {
assert_eq!(ty, ir::types::I32);
bitset.insert(usize::try_from(offset).unwrap());
}
if bitset.is_empty() {
continue;
frame_offsets.push(frame_offset);
}
let stack_map = wasmtime_environ::StackMap::new(mapped_bytes, bitset);
stack_maps.push(StackMapInformation {
code_offset,
stack_map,
});
let code_offset = range.start + u64::from(*offset);
assert!(code_offset < range.end);
section.push(code_offset, *frame_size, frame_offsets.into_iter());
}
stack_maps.sort_unstable_by_key(|info| info.code_offset);
stack_maps
}

fn declare_and_call(
Expand Down
5 changes: 5 additions & 0 deletions crates/environ/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ smallvec = { workspace = true, features = ['serde'] }
clap = { workspace = true, features = ['default'] }
env_logger = { workspace = true }
wat = { workspace = true }
# Fix a test parsing ELF files internally where the bytes themselves reside in a
# `Vec<u8>` with no alignment requirements on it. By enabling the `unaligned`
# feature we don't require anything to be aligned so it doesn't matter the
# alignment of the bytes that we're reading.
object = { workspace = true, features = ['unaligned'] }

[[example]]
name = "factc"
Expand Down
3 changes: 2 additions & 1 deletion crates/environ/src/address_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ fn parse_address_map(
section: &[u8],
) -> Option<(&[U32Bytes<LittleEndian>], &[U32Bytes<LittleEndian>])> {
let mut section = Bytes(section);
// NB: this matches the encoding written by `append_to` above.
// NB: this matches the encoding written by `append_to` in the
// `compile::address_map` module.
let count = section.read::<U32Bytes<LittleEndian>>().ok()?;
let count = usize::try_from(count.get(LittleEndian)).ok()?;
let (offsets, section) =
Expand Down
Loading