From 2788550b7e133333f8c2d92db6897f0b69abbddc Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 28 Feb 2022 15:21:58 -0800 Subject: [PATCH 1/2] Unwind: separate the "set frame reg" and "define new frame" operations to fix metadata bug related to stack checks. In #3859 we discovered that the unwind metadata info on Windows is placed slightly incorrectly in the presence of explicit stack checks in function prologues. In particular, if the stack check fails, then we start the process of backtracing from a state where we have actually updated rbp (the frame pointer) but we have not emitted the metadata saying we have done so. The fix is to emit the `SetFPReg` Win32 unwind op exactly at the offset of the `mov rbp, rsp` instruction, not after the stack check. However, actually achieving this output given our current unwind design (which abstracts over SysV and Win32) is slightly tricky, because: - We previously had a single `DefineNewFrame` op that both indicated where the FP was set, and gave the size of the clobbers; - We don't know the size of the clobbers until we generate the clobber-save sequence; and - The clobber-save sequence is generated after any explicit stack check. To resolve the problem, this PR splits the `DefineNewFrame` op into two pieces: `SetFrameReg` and `DefineNewFrame`. On x86-64 and aarch64 this is emitted in the appropriate place. (`s390x` does not use a frame register, so has no need for this op.) This should resolve the issue we were seeing with Windows-2022 and unwinding. (Thanks to @alexcrichton, @iximeow and @peterhuene for help debugging this!) --- .github/workflows/main.yml | 3 ++- cranelift/codegen/src/isa/aarch64/abi.rs | 7 ++++++ cranelift/codegen/src/isa/unwind.rs | 23 +++++++++++-------- cranelift/codegen/src/isa/unwind/systemv.rs | 17 ++++++++------ cranelift/codegen/src/isa/unwind/winx64.rs | 4 +++- cranelift/codegen/src/isa/x64/abi.rs | 7 ++++++ .../filetests/filetests/isa/x64/fastcall.clif | 10 ++++++++ 7 files changed, 53 insertions(+), 18 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index bf16a384594d..46e598af88d3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -219,7 +219,8 @@ jobs: # defaults to x86_64-apple-darwin - os: macos-latest - os: windows-2019 - - os: windows-2019 + - os: windows-2022 + - os: windows-2022 target: x86_64-pc-windows-gnu - os: ubuntu-latest target: aarch64-unknown-linux-gnu diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index bc75ce85f398..c8f6075064e6 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -671,6 +671,13 @@ impl ABIMachineSpec for AArch64MachineDeps { shift12: false, }, }); + + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::SetFrameReg, + }); + } + insts } diff --git a/cranelift/codegen/src/isa/unwind.rs b/cranelift/codegen/src/isa/unwind.rs index bf4561e840df..97b7cf7f25fe 100644 --- a/cranelift/codegen/src/isa/unwind.rs +++ b/cranelift/codegen/src/isa/unwind.rs @@ -118,8 +118,8 @@ pub enum UnwindInfo { /// push rbp /// unwind UnwindInst::PushFrameRegs { offset_upward_to_caller_sp: 16 } /// mov rbp, rsp -/// unwind UnwindInst::DefineNewFrame { offset_upward_to_caller_sp: 16, -/// offset_downward_to_clobbers: 16 } +/// unwind UnwindInst::SetFrameReg { offset_upward_to_caller_sp: 16 } +/// unwind UnwindInst::DefineNewFrame { offset_downward_to_clobbers: 16 } /// sub rsp, 32 /// mov [rsp+16], r12 /// unwind UnwindInst::SaveReg { reg: R12, clobber_offset: 0 } @@ -143,15 +143,20 @@ pub enum UnwindInst { }, /// The frame-pointer register for this architecture has just been /// set to the current stack location. We wish to define a new - /// frame that is anchored on this new FP value. Offsets are provided - /// upward to the caller's stack frame and downward toward the clobber - /// area. We expect this pseudo-op to come after `PushFrameRegs`. + /// frame that is anchored on this new FP value. We expect this + /// pseudo-op to come after `PushFrameRegs`. + SetFrameReg, + /// We now know the size of clobbers, if any, and can define a + /// frame in which we will give their offsets. Offsets are + /// provided upward to the caller's stack frame and downward + /// toward the clobber area. We expect this pseudo op to come + /// after `SetFrameReg`. DefineNewFrame { - /// The offset from the current SP and FP value upward to the value of - /// SP at the callsite that invoked us. + /// The offset from the current SP and FP value upward to the + /// value of SP at the callsite that invoked us. offset_upward_to_caller_sp: u32, - /// The offset from the current SP and FP value downward to the start of - /// the clobber area. + /// The offset from the current SP and FP value downward to + /// the start of the clobber area. offset_downward_to_clobbers: u32, }, /// The stack pointer was adjusted to allocate the stack. diff --git a/cranelift/codegen/src/isa/unwind/systemv.rs b/cranelift/codegen/src/isa/unwind/systemv.rs index e2c2a381a30d..a4b8cc7f0ac5 100644 --- a/cranelift/codegen/src/isa/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/unwind/systemv.rs @@ -203,18 +203,21 @@ pub(crate) fn create_unwind_info_from_insts>( )); } } - &UnwindInst::DefineNewFrame { - offset_upward_to_caller_sp, - offset_downward_to_clobbers, - } => { + &UnwindInst::SetFrameReg => { // Define CFA in terms of FP. Note that we assume it was already // defined correctly in terms of the current SP, and FP has just // been set to the current SP, so we do not need to change the // offset, only the register. (This is done only if the target // defines a frame pointer register.) - if let Some(fp) = mr.fp() { - instructions.push((instruction_offset, CallFrameInstruction::CfaRegister(fp))); - } + instructions.push(( + instruction_offset, + CallFrameInstruction::CfaRegister(mr.fp().unwrap()), + )); + } + &UnwindInst::DefineNewFrame { + offset_upward_to_caller_sp, + offset_downward_to_clobbers, + } => { // Record initial CFA offset. This will be used with later // StackAlloc calls if we do not have a frame pointer. cfa_offset = offset_upward_to_caller_sp; diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 349b94cfe6f8..bc377c3531a1 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -277,12 +277,14 @@ pub(crate) fn create_unwind_info_from_insts { + unwind_codes.push(UnwindCode::SetFPReg { instruction_offset }); + } &UnwindInst::DefineNewFrame { offset_downward_to_clobbers, .. } => { frame_register_offset = ensure_unwind_offset(offset_downward_to_clobbers)?; - unwind_codes.push(UnwindCode::SetFPReg { instruction_offset }); } &UnwindInst::StackAlloc { size } => { unwind_codes.push(UnwindCode::StackAlloc { diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 620dfec16851..c1a4081fed7a 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -463,6 +463,13 @@ impl ABIMachineSpec for X64ABIMachineSpec { // `mov %rsp, %rbp` // RSP is now 0 % 16 insts.push(Inst::mov_r_r(OperandSize::Size64, r_rsp, w_rbp)); + + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::SetFrameReg, + }); + } + insts } diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index 247af5ac3847..c37a6660944c 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -11,6 +11,7 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; movq %rcx, %rax @@ -26,6 +27,7 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; movq %rdx, %rax @@ -41,6 +43,7 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; movq %r8, %rax @@ -56,6 +59,7 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; movq %r9, %rax @@ -71,6 +75,7 @@ block0(v0: i64, v1: i64, v2: f64, v3: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; movdqa %xmm2, %xmm0 @@ -86,6 +91,7 @@ block0(v0: i64, v1: i64, v2: f64, v3: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; movq %r9, %rax @@ -111,6 +117,7 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; movq 48(%rbp), %r11 @@ -127,6 +134,7 @@ block0(v0: i128, v1: i64, v2: i128, v3: i128): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; movq 48(%rbp), %r11 @@ -149,6 +157,7 @@ block0(v0: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: ; cvtsi2sd %rcx, %xmm2 @@ -220,6 +229,7 @@ block0(v0: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp +; unwind SetFrameReg ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 144 } ; subq %rsp, $240, %rsp ; movdqu %xmm6, 96(%rsp) From d4f2b25a4a8060f225d48a543fdbbb03c4abba98 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 9 May 2022 16:59:18 -0700 Subject: [PATCH 2/2] Add fastcall compile-test with stack limit check. --- cranelift/codegen/src/isa/x64/abi.rs | 1 + .../filetests/filetests/isa/x64/fastcall.clif | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index c1a4081fed7a..04c25f2247f8 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -537,6 +537,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { Writable::from_reg(regs::rsp()), )); } + // Store each clobbered register in order at offsets from RSP, // placing them above the fixed frame slots. let mut cur_offset = fixed_frame_storage_size; diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index c37a6660944c..24ab95b2169d 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -315,3 +315,30 @@ block0(v0: i64): ; popq %rbp ; ret +function %f10(i64 vmctx, i64, i64, i64) -> i64 windows_fastcall { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + stack_limit = gv1 + ss0 = explicit_slot 20 + +block0(v0: i64, v1: i64, v2: i64, v3: i64): + return v0 +} + +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind SetFrameReg +; movq 0(%rcx), %r10 +; addq %r10, $32, %r10 +; cmpq %rsp, %r10 +; jbe ; ud2 stk_ovf ; +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; subq %rsp, $32, %rsp +; block0: +; movq %rcx, %rax +; addq %rsp, $32, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret +