From 2a608b1c901aa14fd0cc8e57e750776eb1994024 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Mon, 18 Mar 2024 00:29:32 -0700 Subject: [PATCH] Cranelift: shrink ABIArgSlot ABIArgSlot is currently 16 bytes with 4 bytes of padding. Shrinking the offset field in its Stack variant from i64 to i32 therefore reduces the enum to 8 bytes. It also reduces its alignment to 4 bytes, which might reduce padding in containing structures. Some targets already limit stack frame sizes to 2GB or less, and in practice they must be much smaller than that, so an i32 is plenty. By making this type only one word in size, we can put two of them in ABIArgSlotVec for free, which guarantees that such SmallVecs will never spill to the heap. In principle we could use arrayvec or something that doesn't support spilling to the heap at all. There are many other places where we use i64 for stack frame offsets which would probably benefit from switching to i32, but I didn't want to change everything at once. --- cranelift/codegen/src/isa/aarch64/abi.rs | 4 ++-- cranelift/codegen/src/isa/riscv64/abi.rs | 2 +- cranelift/codegen/src/isa/s390x/abi.rs | 2 +- cranelift/codegen/src/isa/x64/abi.rs | 4 ++-- cranelift/codegen/src/machinst/abi.rs | 25 +++++++++++++----------- cranelift/codegen/src/prelude_lower.isle | 2 +- 6 files changed, 21 insertions(+), 18 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 440840c4d44e..3a842b6daa00 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -320,7 +320,7 @@ impl ABIMachineSpec for AArch64MachineDeps { Some((ty, slot_offset)) }) .map(|(ty, offset)| ABIArgSlot::Stack { - offset, + offset: i32::try_from(offset).unwrap(), ty, extension: param.extension, }) @@ -1325,7 +1325,7 @@ where let offset = i64::from(*next_stack); *next_stack += ty.bytes(); ABIArgSlot::Stack { - offset, + offset: i32::try_from(offset).unwrap(), ty, extension: ir::ArgumentExtension::None, } diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index 0f0d32e8fce0..c3d5d0ff4d3c 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -160,7 +160,7 @@ impl ABIMachineSpec for Riscv64MachineDeps { debug_assert!(size.is_power_of_two()); next_stack = align_to(next_stack, size); slots.push(ABIArgSlot::Stack { - offset: next_stack as i64, + offset: i32::try_from(next_stack).unwrap(), ty: *reg_ty, extension: param.extension, }); diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index af34bb5a4f94..5f414f2f60bb 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -323,7 +323,7 @@ impl ABIMachineSpec for S390xMachineDeps { let offset = (next_stack + offset) as i64; next_stack += slot_size; ABIArgSlot::Stack { - offset, + offset: i32::try_from(offset).unwrap(), ty: param.value_type, extension: param.extension, } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index eeda5b6cdeb7..dbe681258ac7 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -196,7 +196,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { None => { next_stack = align_to(next_stack, 8) + 8; ABIArgSlot::Stack { - offset: (next_stack - 8) as i64, + offset: i32::try_from(next_stack - 8).unwrap(), ty: ir::types::I64, extension: param.extension, } @@ -250,7 +250,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { debug_assert!(size.is_power_of_two()); next_stack = align_to(next_stack, size); slots.push(ABIArgSlot::Stack { - offset: next_stack as i64, + offset: i32::try_from(next_stack).unwrap(), ty: *reg_ty, extension: param.extension, }); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index 4700bbaa2fff..ef5091eec208 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -158,7 +158,7 @@ pub enum ABIArgSlot { /// Arguments only: on stack, at given offset from SP at entry. Stack { /// Offset of this arg relative to the base of stack args. - offset: i64, + offset: i32, /// Value type of this arg. ty: ir::Type, /// Should this arg be zero- or sign-extended? @@ -176,10 +176,12 @@ impl ABIArgSlot { } } -/// A vector of `ABIArgSlot`s. Inline capacity for one element because basically -/// 100% of values use one slot. Only `i128`s need multiple slots, and they are -/// super rare (and never happen with Wasm). -pub type ABIArgSlotVec = SmallVec<[ABIArgSlot; 1]>; +/// A vector of `ABIArgSlot`s. Inline capacity for two elements because that +/// many fit inline in a SmallVec anyway. Basically 100% of values use one slot. +/// Only `i128`s need two slots, and they are super rare (and never happen with +/// Wasm). On 64-bit targets, nothing needs more than two slots, so this should +/// never spill to the heap. +pub type ABIArgSlotVec = SmallVec<[ABIArgSlot; 2]>; /// An ABIArg is composed of one or more parts. This allows for a CLIF-level /// Value to be passed with its parts in more than one location at the ABI @@ -257,7 +259,7 @@ impl ABIArg { ) -> ABIArg { ABIArg::Slots { slots: smallvec![ABIArgSlot::Stack { - offset, + offset: i32::try_from(offset).unwrap(), ty, extension, }], @@ -1523,7 +1525,7 @@ impl Callee { }; insts.push(M::gen_load_stack( StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + M::fp_to_arg_offset(self.call_conv, &self.flags) + i64::from(offset), ty, ), *into_reg, @@ -1577,7 +1579,8 @@ impl Callee { let addr_reg = self.arg_temp_reg[idx].unwrap(); insts.push(M::gen_load_stack( StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, + M::fp_to_arg_offset(self.call_conv, &self.flags) + + i64::from(offset), ty, ), addr_reg, @@ -2433,7 +2436,7 @@ impl CallSite { }; locs.push(( data.into(), - ArgLoc::Stack(StackAMode::SPOffset(offset, ty)), + ArgLoc::Stack(StackAMode::SPOffset(i64::from(offset), ty)), )); } } @@ -2459,7 +2462,7 @@ impl CallSite { ABIArgSlot::Reg { reg, .. } => ArgLoc::Reg(reg.into()), ABIArgSlot::Stack { offset, .. } => { let ty = M::word_type(); - ArgLoc::Stack(StackAMode::SPOffset(offset, ty)) + ArgLoc::Stack(StackAMode::SPOffset(i64::from(offset), ty)) } }; locs.push((tmp.into(), loc)); @@ -2567,7 +2570,7 @@ impl CallSite { sig_data.sized_stack_arg_space() }; insts.push(M::gen_load_stack( - StackAMode::SPOffset(offset + ret_area_base, ty), + StackAMode::SPOffset(i64::from(offset) + ret_area_base, ty), *into_reg, ty, )); diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle index 54851de654ad..01ad2515f8c1 100644 --- a/cranelift/codegen/src/prelude_lower.isle +++ b/cranelift/codegen/src/prelude_lower.isle @@ -957,7 +957,7 @@ (ty Type) (extension ArgumentExtension)) (Stack - (offset i64) + (offset i32) (ty Type) (extension ArgumentExtension))))