From 21e9ff86c7f00f679da98874389ff682cedb25e5 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Tue, 2 Apr 2024 14:13:49 -0700 Subject: [PATCH 1/2] cranelift: Specialize StackAMode::FPOffset The StackAMode::FPOffset address mode was always used together with fp_to_arg_offset, to compute addresses within the current stack frame's argument area. Instead, introduce a new StackAMode::ArgOffset variant specifically for stack addresses within the current frame's argument area. The details of how to find the argument area are folded into the conversion from the target-independent StackAMode into target-dependent address-mode types. Currently, fp_to_arg_offset returns a target-specific constant, so I've preserved that constant in each backend's address-mode conversion. However, in general the location of the argument area may depend on calling convention, flags, or other concerns. Also, it may not always be desirable to use a frame pointer register as the base to find the argument area. I expect some backends will eventually need to introduce new synthetic addressing modes to resolve argument-area offsets after register allocation, when the full frame layout is known. I also cleaned up a couple minor things while I was in the area: - Determining argument extension type was written in a confusing way and also had a typo in the comment describing it. - riscv64's AMode::offset was only used in one place and is clearer when inlined. --- cranelift/codegen/src/isa/aarch64/abi.rs | 7 +-- cranelift/codegen/src/isa/riscv64/abi.rs | 5 -- .../codegen/src/isa/riscv64/inst/args.rs | 10 +--- cranelift/codegen/src/isa/s390x/abi.rs | 7 +-- cranelift/codegen/src/isa/x64/abi.rs | 8 +--- cranelift/codegen/src/machinst/abi.rs | 48 ++++++------------- 6 files changed, 23 insertions(+), 62 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index c973869aad74..fded724480bc 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -35,7 +35,8 @@ static STACK_ARG_RET_SIZE_LIMIT: u32 = 128 * 1024 * 1024; impl Into for StackAMode { fn into(self) -> AMode { match self { - StackAMode::FPOffset(off, ty) => AMode::FPOffset { off, ty }, + // Argument area begins after saved frame pointer + return address. + StackAMode::ArgOffset(off, ty) => AMode::FPOffset { off: off + 16, ty }, StackAMode::NominalSPOffset(off, ty) => AMode::NominalSPOffset { off, ty }, StackAMode::SPOffset(off, ty) => AMode::SPOffset { off, ty }, } @@ -365,10 +366,6 @@ impl ABIMachineSpec for AArch64MachineDeps { Ok((next_stack, extra_arg)) } - fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { - 16 // frame pointer + return address. - } - fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) } diff --git a/cranelift/codegen/src/isa/riscv64/abi.rs b/cranelift/codegen/src/isa/riscv64/abi.rs index a0ebd3e7f532..abda85b0bf26 100644 --- a/cranelift/codegen/src/isa/riscv64/abi.rs +++ b/cranelift/codegen/src/isa/riscv64/abi.rs @@ -205,11 +205,6 @@ impl ABIMachineSpec for Riscv64MachineDeps { Ok((next_stack, pos)) } - fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { - // lr fp. - 16 - } - fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { Inst::gen_load(into_reg, mem.into(), ty, MemFlags::trusted()) } diff --git a/cranelift/codegen/src/isa/riscv64/inst/args.rs b/cranelift/codegen/src/isa/riscv64/inst/args.rs index 5802f594ac76..f90168f54dba 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/args.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/args.rs @@ -147,16 +147,9 @@ impl AMode { pub(crate) fn get_offset_with_state(&self, state: &EmitState) -> i64 { match self { &AMode::NominalSPOffset(offset, _) => offset + state.virtual_sp_offset, - _ => self.get_offset(), - } - } - - fn get_offset(&self) -> i64 { - match self { &AMode::RegOffset(_, offset, ..) => offset, &AMode::SPOffset(offset, _) => offset, &AMode::FPOffset(offset, _) => offset, - &AMode::NominalSPOffset(offset, _) => offset, &AMode::Const(_) | &AMode::Label(_) => 0, } } @@ -206,7 +199,8 @@ impl Display for AMode { impl Into for StackAMode { fn into(self) -> AMode { match self { - StackAMode::FPOffset(offset, ty) => AMode::FPOffset(offset, ty), + // Argument area begins after saved lr + fp. + StackAMode::ArgOffset(offset, ty) => AMode::FPOffset(offset + 16, ty), StackAMode::SPOffset(offset, ty) => AMode::SPOffset(offset, ty), StackAMode::NominalSPOffset(offset, ty) => AMode::NominalSPOffset(offset, ty), } diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 1a1e82422b98..b5fb077bf6ff 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -191,7 +191,8 @@ pub static REG_SAVE_AREA_SIZE: u32 = 160; impl Into for StackAMode { fn into(self) -> MemArg { match self { - StackAMode::FPOffset(off, _ty) => MemArg::InitialSPOffset { off }, + // Argument area always begins at the initial SP. + StackAMode::ArgOffset(off, _ty) => MemArg::InitialSPOffset { off }, StackAMode::NominalSPOffset(off, _ty) => MemArg::NominalSPOffset { off }, StackAMode::SPOffset(off, _ty) => { MemArg::reg_plus_off(stack_reg(), off, MemFlags::trusted()) @@ -404,10 +405,6 @@ impl ABIMachineSpec for S390xMachineDeps { Ok((next_stack, extra_arg)) } - fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { - 0 - } - fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { Inst::gen_load(into_reg, mem.into(), ty) } diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index c7c5a4a823a1..3eab5813452c 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -405,10 +405,6 @@ impl ABIMachineSpec for X64ABIMachineSpec { Ok((next_stack, extra_arg)) } - fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { - 16 // frame pointer + return address. - } - fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I { // For integer-typed values, we always load a full 64 bits (and we always spill a full 64 // bits as well -- see `Inst::store()`). @@ -1023,11 +1019,11 @@ impl From for SyntheticAmode { // We enforce a 128 MB stack-frame size limit above, so these // `expect()`s should never fail. match amode { - StackAMode::FPOffset(off, _ty) => { + StackAMode::ArgOffset(off, _ty) => { let off = i32::try_from(off) .expect("Offset in FPOffset is greater than 2GB; should hit impl limit first"); SyntheticAmode::Real(Amode::ImmReg { - simm32: off, + simm32: off + 16, // frame pointer + return address base: regs::rbp(), flags: MemFlags::trusted(), }) diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index b1ececcce55c..0f4c45ce8065 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -281,9 +281,9 @@ pub enum ArgsOrRets { /// appropriate addressing mode. #[derive(Clone, Copy, Debug)] pub enum StackAMode { - /// Offset from the frame pointer, possibly making use of a specific type - /// for a scaled indexing operation. - FPOffset(i64, ir::Type), + /// Offset into the current frame's argument area, possibly making use of a + /// specific type for a scaled indexing operation. + ArgOffset(i64, ir::Type), /// Offset from the nominal stack pointer, possibly making use of a specific /// type for a scaled indexing operation. NominalSPOffset(i64, ir::Type), @@ -296,7 +296,7 @@ impl StackAMode { /// Offset by an addend. pub fn offset(self, addend: i64) -> Self { match self { - StackAMode::FPOffset(off, ty) => StackAMode::FPOffset(off + addend, ty), + StackAMode::ArgOffset(off, ty) => StackAMode::ArgOffset(off + addend, ty), StackAMode::NominalSPOffset(off, ty) => StackAMode::NominalSPOffset(off + addend, ty), StackAMode::SPOffset(off, ty) => StackAMode::SPOffset(off + addend, ty), } @@ -304,7 +304,7 @@ impl StackAMode { pub fn get_type(&self) -> ir::Type { match self { - &StackAMode::FPOffset(_, ty) => ty, + &StackAMode::ArgOffset(_, ty) => ty, &StackAMode::NominalSPOffset(_, ty) => ty, &StackAMode::SPOffset(_, ty) => ty, } @@ -417,10 +417,6 @@ pub trait ABIMachineSpec { args: ArgsAccumulator, ) -> CodegenResult<(u32, Option)>; - /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return - /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg). - fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64; - /// Generate a load from the stack. fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Self::I; @@ -1520,22 +1516,17 @@ impl Callee { extension, .. } => { - // However, we have to respect the extention mode for stack + // However, we have to respect the extension mode for stack // slots, or else we grab the wrong bytes on big-endian. let ext = M::get_ext_mode(sigs[self.sig].call_conv, extension); - let ty = match (ext, ty_bits(ty) as u32) { - (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) - if n < M::word_bits() => - { + let ty = + if ext != ArgumentExtension::None && M::word_bits() > ty_bits(ty) as u32 { M::word_type() - } - _ => ty, - }; + } else { + ty + }; insts.push(M::gen_load_stack( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, - ty, - ), + StackAMode::ArgOffset(offset, ty), *into_reg, ty, )); @@ -1560,10 +1551,7 @@ impl Callee { } else { // Buffer address is implicitly defined by the ABI. insts.push(M::gen_get_stack_addr( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, - I8, - ), + StackAMode::ArgOffset(offset, I8), into_reg, I8, )); @@ -1586,10 +1574,7 @@ impl Callee { // This was allocated in the `init` routine. let addr_reg = self.arg_temp_reg[idx].unwrap(); insts.push(M::gen_load_stack( - StackAMode::FPOffset( - M::fp_to_arg_offset(self.call_conv, &self.flags) + offset, - ty, - ), + StackAMode::ArgOffset(offset, ty), addr_reg, ty, )); @@ -2357,10 +2342,7 @@ impl CallSite { "tail calls require frame pointers to be enabled" ); - StackAMode::FPOffset( - offset + M::fp_to_arg_offset(self.caller_conv, &self.flags), - ty, - ) + StackAMode::ArgOffset(offset, ty) } else { StackAMode::SPOffset(offset, ty) }; From 6a2f03cab821d0bee36131f455bf1df17229cdcf Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Wed, 3 Apr 2024 09:30:22 -0700 Subject: [PATCH 2/2] Review comments @bjorn3 correctly pointed out that I had changed the overflow behavior of this address computation. The existing code always added the result of `fp_to_arg_offset` using `i64` addition. It used Rust's default overflow behavior for addition, which panics in debug builds and wraps in release builds. In this commit I'm preserving that behavior: - s390x doesn't add anything, so can't overflow. - aarch64 and riscv64 use `i64` offsets in `FPOffset` address modes, so the addition is still using `i64` addition. - x64 does a checked narrowing to `i32`, so it's important to do the addition before that, on the wider `i64` offset. --- cranelift/codegen/src/isa/x64/abi.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 3eab5813452c..ab0859a46143 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -1020,10 +1020,13 @@ impl From for SyntheticAmode { // `expect()`s should never fail. match amode { StackAMode::ArgOffset(off, _ty) => { - let off = i32::try_from(off) - .expect("Offset in FPOffset is greater than 2GB; should hit impl limit first"); + let off = + i32::try_from(off + 16) // frame pointer + return address + .expect( + "Offset in ArgOffset is greater than 2GB; should hit impl limit first", + ); SyntheticAmode::Real(Amode::ImmReg { - simm32: off + 16, // frame pointer + return address + simm32: off, base: regs::rbp(), flags: MemFlags::trusted(), })