From 1d19faebb537e7e5049efcba41d20f0d739db3e9 Mon Sep 17 00:00:00 2001
From: Anton Kirilov <anton.kirilov@arm.com>
Date: Thu, 29 Apr 2021 15:53:01 +0100
Subject: [PATCH 1/3] Cranelift AArch64: Simplify leaf functions that do not
 use the stack

Leaf functions that do not use the stack (e.g. do not clobber any
callee-saved registers) do not need a frame record.

Copyright (c) 2021, Arm Limited.
---
 cranelift/codegen/src/isa/aarch64/abi.rs      |  52 +++-
 .../src/isa/aarch64/inst/unwind/systemv.rs    | 104 +++++++
 cranelift/codegen/src/isa/aarch64/mod.rs      |  16 +-
 cranelift/codegen/src/isa/arm32/abi.rs        |  48 +--
 cranelift/codegen/src/isa/s390x/abi.rs        |  43 ++-
 cranelift/codegen/src/isa/x64/abi.rs          |  89 +++---
 cranelift/codegen/src/machinst/abi_impl.rs    |  66 +++--
 .../filetests/isa/aarch64/amodes.clif         | 160 ++--------
 .../filetests/isa/aarch64/arithmetic.clif     | 165 +++--------
 .../filetests/isa/aarch64/atomic_load.clif    |   9 -
 .../filetests/isa/aarch64/atomic_store.clif   |   9 -
 .../filetests/isa/aarch64/basic1.clif         |   3 -
 .../filetests/isa/aarch64/bitops.clif         | 200 +++----------
 .../filetests/filetests/isa/aarch64/call.clif |  26 +-
 .../filetests/isa/aarch64/condbr.clif         | 174 +++--------
 .../filetests/isa/aarch64/constants.clif      |  95 ++----
 .../filetests/isa/aarch64/extend-op.clif      |  45 +--
 .../filetests/isa/aarch64/fcvt-small.clif     |  24 --
 .../filetests/isa/aarch64/floating-point.clif | 280 ++++--------------
 .../filetests/isa/aarch64/heap_addr.clif      |  10 +-
 .../isa/aarch64/iconst-icmp-small.clif        |  19 +-
 .../filetests/isa/aarch64/multivalue-ret.clif |   5 +-
 .../isa/aarch64/narrow-arithmetic.clif        |  25 +-
 .../filetests/isa/aarch64/reduce.clif         |  20 +-
 .../filetests/isa/aarch64/reftypes.clif       |  20 +-
 .../filetests/isa/aarch64/shift-op.clif       |  10 +-
 .../filetests/isa/aarch64/shift-rotate.clif   | 126 ++------
 .../filetests/isa/aarch64/simd-extmul.clif    |  12 -
 .../isa/aarch64/simd-pairwise-add.clif        |  40 +--
 .../filetests/filetests/isa/aarch64/simd.clif |  55 +---
 .../filetests/isa/aarch64/simd_load_zero.clif |  10 +-
 .../filetests/isa/aarch64/stack-limit.clif    |  10 +-
 .../filetests/isa/aarch64/symbol-value.clif   |   5 +-
 .../isa/aarch64/uextend-sextend.clif          |  60 +---
 34 files changed, 650 insertions(+), 1385 deletions(-)
diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs
index 586d88cab3a4..46eabe5de5ec 100644
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -693,20 +693,30 @@ impl ABIMachineSpec for AArch64MachineDeps {
     // nominal SP offset; abi_impl generic code will do that.
     fn gen_clobber_save(
         call_conv: isa::CallConv,
+        setup_frame: bool,
         flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
         fixed_frame_storage_size: u32,
         _outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Inst; 16]>) {
-        let mut insts = SmallVec::new();
-        let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
+        let mut clobbered_int = vec![];
+        let mut clobbered_vec = vec![];
+
+        for &reg in clobbered_callee_saves.iter() {
+            match reg.to_reg().get_class() {
+                RegClass::I64 => clobbered_int.push(reg),
+                RegClass::V128 => clobbered_vec.push(reg),
+                class => panic!("Unexpected RegClass: {:?}", class),
+            }
+        }
 
         let (int_save_bytes, vec_save_bytes) =
             saved_reg_stack_size(call_conv, &clobbered_int, &clobbered_vec);
         let total_save_bytes = int_save_bytes + vec_save_bytes;
         let clobber_size = total_save_bytes as i32;
+        let mut insts = SmallVec::new();
 
-        if flags.unwind_info() {
+        if flags.unwind_info() && setup_frame {
             // The *unwind* frame (but not the actual frame) starts at the
             // clobbers, just below the saved FP/LR pair.
             insts.push(Inst::Unwind {
@@ -916,7 +926,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
         _outgoing_args_size: u32,
     ) -> SmallVec<[Inst; 16]> {
         let mut insts = SmallVec::new();
-        let (clobbered_int, clobbered_vec) = get_regs_saved_in_prologue(call_conv, clobbers);
+        let (clobbered_int, clobbered_vec) = get_regs_restored_in_epilogue(call_conv, clobbers);
 
         // Free the fixed frame if necessary.
         if fixed_frame_storage_size > 0 {
@@ -1180,6 +1190,36 @@ impl ABIMachineSpec for AArch64MachineDeps {
             ir::ArgumentExtension::None
         }
     }
+
+    fn get_clobbered_callee_saves(
+        call_conv: isa::CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>> {
+        let mut regs: Vec<Writable<RealReg>> = regs
+            .iter()
+            .cloned()
+            .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))
+            .collect();
+
+        // Sort registers for deterministic code output. We can do an unstable
+        // sort because the registers will be unique (there are no dups).
+        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs
+    }
+
+    fn is_frame_setup_needed(
+        is_leaf: bool,
+        stack_args_size: u32,
+        num_clobbered_callee_saves: usize,
+        fixed_frame_storage_size: u32,
+    ) -> bool {
+        !is_leaf
+            // The function arguments that are passed on the stack are addressed
+            // relative to the Frame Pointer.
+            || stack_args_size > 0
+            || num_clobbered_callee_saves > 0
+            || fixed_frame_storage_size > 0
+    }
 }
 
 /// Is this type supposed to be seen on this machine? E.g. references of the
@@ -1224,7 +1264,7 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool {
 /// Return the set of all integer and vector registers that must be saved in the
 /// prologue and restored in the epilogue, given the set of all registers
 /// written by the function's body.
-fn get_regs_saved_in_prologue(
+fn get_regs_restored_in_epilogue(
     call_conv: isa::CallConv,
     regs: &Set<Writable<RealReg>>,
 ) -> (Vec<Writable<RealReg>>, Vec<Writable<RealReg>>) {
diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
index b514dc20b820..8f7edda09c90 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs
@@ -66,3 +66,107 @@ impl crate::isa::unwind::systemv::RegisterMapper<Reg> for RegisterMapper {
         Some(8)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::cursor::{Cursor, FuncCursor};
+    use crate::ir::{
+        types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData,
+        StackSlotKind,
+    };
+    use crate::isa::{lookup, CallConv};
+    use crate::settings::{builder, Flags};
+    use crate::Context;
+    use gimli::write::Address;
+    use std::str::FromStr;
+    use target_lexicon::triple;
+
+    #[test]
+    fn test_simple_func() {
+        let isa = lookup(triple!("aarch64"))
+            .expect("expect aarch64 ISA")
+            .finish(Flags::new(builder()));
+
+        let mut context = Context::for_function(create_function(
+            CallConv::SystemV,
+            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
+        ));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let fde = match context
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(1234))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 24, lsda: None, instructions: [(0, ValExpression(Register(34), Expression { operations: [Simple(DwOp(48))] })), (4, CfaOffset(16)), (4, Offset(Register(29), -16)), (4, Offset(Register(30), -8)), (8, CfaRegister(Register(29)))] }");
+    }
+
+    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
+        let mut func =
+            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
+
+        let block0 = func.dfg.make_block();
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().return_(&[]);
+
+        if let Some(stack_slot) = stack_slot {
+            func.stack_slots.push(stack_slot);
+        }
+
+        func
+    }
+
+    #[test]
+    fn test_multi_return_func() {
+        let isa = lookup(triple!("aarch64"))
+            .expect("expect aarch64 ISA")
+            .finish(Flags::new(builder()));
+
+        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
+
+        context.compile(&*isa).expect("expected compilation");
+
+        let fde = match context
+            .create_unwind_info(isa.as_ref())
+            .expect("can create unwind info")
+        {
+            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
+                info.to_fde(Address::Constant(4321))
+            }
+            _ => panic!("expected unwind information"),
+        };
+
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [] }");
+    }
+
+    fn create_multi_return_function(call_conv: CallConv) -> Function {
+        let mut sig = Signature::new(call_conv);
+        sig.params.push(AbiParam::new(types::I32));
+        let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
+
+        let block0 = func.dfg.make_block();
+        let v0 = func.dfg.append_block_param(block0, types::I32);
+        let block1 = func.dfg.make_block();
+        let block2 = func.dfg.make_block();
+
+        let mut pos = FuncCursor::new(&mut func);
+        pos.insert_block(block0);
+        pos.ins().brnz(v0, block2, &[]);
+        pos.ins().jump(block1, &[]);
+
+        pos.insert_block(block1);
+        pos.ins().return_(&[]);
+
+        pos.insert_block(block2);
+        pos.ins().return_(&[]);
+
+        func
+    }
+}
diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs
index a2161645bc62..3243d0f7b462 100644
--- a/cranelift/codegen/src/isa/aarch64/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/mod.rs
@@ -218,15 +218,11 @@ mod test {
         let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
         let code = &buffer.data[..];
 
-        // stp x29, x30, [sp, #-16]!
-        // mov x29, sp
         // mov x1, #0x1234
         // add w0, w0, w1
-        // ldp x29, x30, [sp], #16
         // ret
         let golden = vec![
-            0xfd, 0x7b, 0xbf, 0xa9, 0xfd, 0x03, 0x00, 0x91, 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00,
-            0x01, 0x0b, 0xfd, 0x7b, 0xc1, 0xa8, 0xc0, 0x03, 0x5f, 0xd6,
+            0x81, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x01, 0x0b, 0xc0, 0x03, 0x5f, 0xd6,
         ];
 
         assert_eq!(code, &golden[..]);
@@ -277,8 +273,6 @@ mod test {
             .unwrap();
         let code = &result.buffer.data[..];
 
-        // stp	x29, x30, [sp, #-16]!
-        // mov	x29, sp
         // mov	x1, #0x1234                	// #4660
         // add	w0, w0, w1
         // mov	w1, w0
@@ -291,13 +285,11 @@ mod test {
         // cbnz	x1, 0x18
         // mov	x1, #0x1234                	// #4660
         // sub	w0, w0, w1
-        // ldp	x29, x30, [sp], #16
         // ret
         let golden = vec![
-            253, 123, 191, 169, 253, 3, 0, 145, 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161,
-            0, 0, 181, 129, 70, 130, 210, 1, 0, 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3,
-            0, 42, 97, 255, 255, 181, 129, 70, 130, 210, 0, 0, 1, 75, 253, 123, 193, 168, 192, 3,
-            95, 214,
+            129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161, 0, 0, 181, 129, 70, 130, 210, 1, 0,
+            1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3, 0, 42, 97, 255, 255, 181, 129, 70,
+            130, 210, 0, 0, 1, 75, 192, 3, 95, 214,
         ];
 
         assert_eq!(code, &golden[..]);
diff --git a/cranelift/codegen/src/isa/arm32/abi.rs b/cranelift/codegen/src/isa/arm32/abi.rs
index 5a4145d8b7d5..aba6e548d854 100644
--- a/cranelift/codegen/src/isa/arm32/abi.rs
+++ b/cranelift/codegen/src/isa/arm32/abi.rs
@@ -316,8 +316,9 @@ impl ABIMachineSpec for Arm32MachineDeps {
     /// nominal SP offset; caller will do that.
     fn gen_clobber_save(
         _call_conv: isa::CallConv,
+        _setup_frame: bool,
         _flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
         fixed_frame_storage_size: u32,
         _outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Inst; 16]>) {
@@ -325,8 +326,7 @@ impl ABIMachineSpec for Arm32MachineDeps {
         if fixed_frame_storage_size > 0 {
             insts.extend(Self::gen_sp_reg_adjust(-(fixed_frame_storage_size as i32)).into_iter());
         }
-        let clobbered_vec = get_callee_saves(clobbers);
-        let mut clobbered_vec: Vec<_> = clobbered_vec
+        let mut clobbered_vec: Vec<_> = clobbered_callee_saves
             .into_iter()
             .map(|r| r.to_reg().to_reg())
             .collect();
@@ -345,14 +345,14 @@ impl ABIMachineSpec for Arm32MachineDeps {
     }
 
     fn gen_clobber_restore(
-        _call_conv: isa::CallConv,
+        call_conv: isa::CallConv,
         _flags: &settings::Flags,
         clobbers: &Set<Writable<RealReg>>,
         _fixed_frame_storage_size: u32,
         _outgoing_args_size: u32,
     ) -> SmallVec<[Inst; 16]> {
         let mut insts = SmallVec::new();
-        let clobbered_vec = get_callee_saves(clobbers);
+        let clobbered_vec = Self::get_clobbered_callee_saves(call_conv, clobbers);
         let mut clobbered_vec: Vec<_> = clobbered_vec
             .into_iter()
             .map(|r| Writable::from_reg(r.to_reg().to_reg()))
@@ -468,6 +468,31 @@ impl ABIMachineSpec for Arm32MachineDeps {
     ) -> ir::ArgumentExtension {
         specified
     }
+
+    fn get_clobbered_callee_saves(
+        _call_conv: isa::CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>> {
+        let mut ret = Vec::new();
+        for &reg in regs.iter() {
+            if is_callee_save(reg.to_reg()) {
+                ret.push(reg);
+            }
+        }
+
+        // Sort registers for deterministic code output.
+        ret.sort_by_key(|r| r.to_reg().get_index());
+        ret
+    }
+
+    fn is_frame_setup_needed(
+        _is_leaf: bool,
+        _stack_args_size: u32,
+        _num_clobbered_callee_saves: usize,
+        _fixed_frame_storage_size: u32,
+    ) -> bool {
+        true
+    }
 }
 
 fn is_callee_save(r: RealReg) -> bool {
@@ -475,19 +500,6 @@ fn is_callee_save(r: RealReg) -> bool {
     4 <= enc && enc <= 10
 }
 
-fn get_callee_saves(regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
-    let mut ret = Vec::new();
-    for &reg in regs.iter() {
-        if is_callee_save(reg.to_reg()) {
-            ret.push(reg);
-        }
-    }
-
-    // Sort registers for deterministic code output.
-    ret.sort_by_key(|r| r.to_reg().get_index());
-    ret
-}
-
 fn is_reg_clobbered_by_call(r: RealReg) -> bool {
     let enc = r.get_hw_encoding();
     enc <= 3
diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs
index 14344e58663c..d0dd517e65c7 100644
--- a/cranelift/codegen/src/isa/s390x/abi.rs
+++ b/cranelift/codegen/src/isa/s390x/abi.rs
@@ -459,16 +459,25 @@ impl ABIMachineSpec for S390xMachineDeps {
     // Returns stack bytes used as well as instructions. Does not adjust
     // nominal SP offset; abi_impl generic code will do that.
     fn gen_clobber_save(
-        call_conv: isa::CallConv,
+        _call_conv: isa::CallConv,
+        _setup_frame: bool,
         flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
         fixed_frame_storage_size: u32,
         outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Inst; 16]>) {
         let mut insts = SmallVec::new();
+        let mut clobbered_fpr = vec![];
+        let mut clobbered_gpr = vec![];
+
+        for &reg in clobbered_callee_saves.iter() {
+            match reg.to_reg().get_class() {
+                RegClass::I64 => clobbered_gpr.push(reg),
+                RegClass::F64 => clobbered_fpr.push(reg),
+                class => panic!("Unexpected RegClass: {:?}", class),
+            }
+        }
 
-        // Collect clobbered registers.
-        let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers);
         let mut first_clobbered_gpr = 16;
         for reg in clobbered_gpr {
             let enc = reg.to_reg().get_hw_encoding();
@@ -718,6 +727,32 @@ impl ABIMachineSpec for S390xMachineDeps {
     ) -> ir::ArgumentExtension {
         specified
     }
+
+    fn get_clobbered_callee_saves(
+        call_conv: isa::CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>> {
+        let mut regs: Vec<Writable<RealReg>> = regs
+            .iter()
+            .cloned()
+            .filter(|r| is_reg_saved_in_prologue(call_conv, r.to_reg()))
+            .collect();
+
+        // Sort registers for deterministic code output. We can do an unstable
+        // sort because the registers will be unique (there are no dups).
+        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs
+    }
+
+    fn is_frame_setup_needed(
+        _is_leaf: bool,
+        _stack_args_size: u32,
+        _num_clobbered_callee_saves: usize,
+        _fixed_frame_storage_size: u32,
+    ) -> bool {
+        // The call frame set-up is handled by gen_clobber_save().
+        false
+    }
 }
 
 fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool {
diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs
index 9194c2a9c42c..f0c36dd6d784 100644
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -496,18 +496,17 @@ impl ABIMachineSpec for X64ABIMachineSpec {
     }
 
     fn gen_clobber_save(
-        call_conv: isa::CallConv,
+        _call_conv: isa::CallConv,
+        setup_frame: bool,
         flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
         fixed_frame_storage_size: u32,
         _outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Self::I; 16]>) {
         let mut insts = SmallVec::new();
-        // Find all clobbered registers that are callee-save.
-        let clobbered = get_callee_saves(&call_conv, clobbers);
-        let clobbered_size = compute_clobber_size(&clobbered);
+        let clobbered_size = compute_clobber_size(&clobbered_callee_saves);
 
-        if flags.unwind_info() {
+        if flags.unwind_info() && setup_frame {
             // Emit unwind info: start the frame. The frame (from unwind
             // consumers' point of view) starts at clobbbers, just below
             // the FP and return address. Spill slots and stack slots are
@@ -534,7 +533,7 @@ impl ABIMachineSpec for X64ABIMachineSpec {
         // Store each clobbered register in order at offsets from RSP,
         // placing them above the fixed frame slots.
         let mut cur_offset = fixed_frame_storage_size;
-        for reg in &clobbered {
+        for reg in clobbered_callee_saves {
             let r_reg = reg.to_reg();
             let off = cur_offset;
             match r_reg.get_class() {
@@ -579,14 +578,14 @@ impl ABIMachineSpec for X64ABIMachineSpec {
     ) -> SmallVec<[Self::I; 16]> {
         let mut insts = SmallVec::new();
 
-        let clobbered = get_callee_saves(&call_conv, clobbers);
-        let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered);
+        let clobbered_callee_saves = Self::get_clobbered_callee_saves(call_conv, clobbers);
+        let stack_size = fixed_frame_storage_size + compute_clobber_size(&clobbered_callee_saves);
 
         // Restore regs by loading from offsets of RSP. RSP will be
         // returned to nominal-RSP at this point, so we can use the
         // same offsets that we used when saving clobbers above.
         let mut cur_offset = fixed_frame_storage_size;
-        for reg in &clobbered {
+        for reg in &clobbered_callee_saves {
             let rreg = reg.to_reg();
             match rreg.get_class() {
                 RegClass::I64 => {
@@ -797,6 +796,47 @@ impl ABIMachineSpec for X64ABIMachineSpec {
             ir::ArgumentExtension::None
         }
     }
+
+    fn get_clobbered_callee_saves(
+        call_conv: CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>> {
+        let mut regs: Vec<Writable<RealReg>> = match call_conv {
+            CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
+                .iter()
+                .cloned()
+                .filter(|r| is_callee_save_baldrdash(r.to_reg()))
+                .collect(),
+            CallConv::BaldrdashWindows => {
+                todo!("baldrdash windows");
+            }
+            CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
+                .iter()
+                .cloned()
+                .filter(|r| is_callee_save_systemv(r.to_reg()))
+                .collect(),
+            CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
+                .iter()
+                .cloned()
+                .filter(|r| is_callee_save_fastcall(r.to_reg()))
+                .collect(),
+            CallConv::Probestack => todo!("probestack?"),
+            CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
+        };
+        // Sort registers for deterministic code output. We can do an unstable sort because the
+        // registers will be unique (there are no dups).
+        regs.sort_unstable_by_key(|r| r.to_reg().get_index());
+        regs
+    }
+
+    fn is_frame_setup_needed(
+        _is_leaf: bool,
+        _stack_args_size: u32,
+        _num_clobbered_callee_saves: usize,
+        _fixed_frame_storage_size: u32,
+    ) -> bool {
+        true
+    }
 }
 
 impl From<StackAMode> for SyntheticAmode {
@@ -984,35 +1024,6 @@ fn is_callee_save_fastcall(r: RealReg) -> bool {
     }
 }
 
-fn get_callee_saves(call_conv: &CallConv, regs: &Set<Writable<RealReg>>) -> Vec<Writable<RealReg>> {
-    let mut regs: Vec<Writable<RealReg>> = match call_conv {
-        CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs
-            .iter()
-            .cloned()
-            .filter(|r| is_callee_save_baldrdash(r.to_reg()))
-            .collect(),
-        CallConv::BaldrdashWindows => {
-            todo!("baldrdash windows");
-        }
-        CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => regs
-            .iter()
-            .cloned()
-            .filter(|r| is_callee_save_systemv(r.to_reg()))
-            .collect(),
-        CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => regs
-            .iter()
-            .cloned()
-            .filter(|r| is_callee_save_fastcall(r.to_reg()))
-            .collect(),
-        CallConv::Probestack => todo!("probestack?"),
-        CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
-    };
-    // Sort registers for deterministic code output. We can do an unstable sort because the
-    // registers will be unique (there are no dups).
-    regs.sort_unstable_by_key(|r| r.to_reg().get_index());
-    regs
-}
-
 fn compute_clobber_size(clobbers: &Vec<Writable<RealReg>>) -> u32 {
     let mut clobbered_size = 0;
     for reg in clobbers {
diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs
index 75935ee90526..db37a5f81bff 100644
--- a/cranelift/codegen/src/machinst/abi_impl.rs
+++ b/cranelift/codegen/src/machinst/abi_impl.rs
@@ -428,20 +428,35 @@ pub trait ABIMachineSpec {
     /// Generate a probestack call.
     fn gen_probestack(_frame_size: u32) -> SmallInstVec<Self::I>;
 
-    /// Generate a clobber-save sequence. This takes the list of *all* registers
-    /// written/modified by the function body. The implementation here is
-    /// responsible for determining which of these are callee-saved according to
-    /// the ABI. It should return a sequence of instructions that "push" or
-    /// otherwise save these values to the stack. The sequence of instructions
-    /// should adjust the stack pointer downward, and should align as necessary
-    /// according to ABI requirements.
+    /// Get all clobbered registers that are callee-saved according to the ABI; the result
+    /// contains the registers in a sorted order.
+    fn get_clobbered_callee_saves(
+        call_conv: isa::CallConv,
+        regs: &Set<Writable<RealReg>>,
+    ) -> Vec<Writable<RealReg>>;
+
+    /// Determine whether it is necessary to generate the usual frame-setup
+    /// sequence (refer to gen_prologue_frame_setup()).
+    fn is_frame_setup_needed(
+        is_leaf: bool,
+        stack_args_size: u32,
+        num_clobbered_callee_saves: usize,
+        fixed_frame_storage_size: u32,
+    ) -> bool;
+
+    /// Generate a clobber-save sequence. The implementation here should return
+    /// a sequence of instructions that "push" or otherwise save to the stack all
+    /// registers written/modified by the function body that are callee-saved.
+    /// The sequence of instructions should adjust the stack pointer downward,
+    /// and should align as necessary according to ABI requirements.
     ///
     /// Returns stack bytes used as well as instructions. Does not adjust
     /// nominal SP offset; caller will do that.
     fn gen_clobber_save(
         call_conv: isa::CallConv,
+        setup_frame: bool,
         flags: &settings::Flags,
-        clobbers: &Set<Writable<RealReg>>,
+        clobbered_callee_saves: &Vec<Writable<RealReg>>,
         fixed_frame_storage_size: u32,
         outgoing_args_size: u32,
     ) -> (u64, SmallVec<[Self::I; 16]>);
@@ -615,6 +630,8 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
     /// Are we to invoke the probestack function in the prologue? If so,
     /// what is the minimum size at which we must invoke it?
     probestack_min_frame: Option<u32>,
+    /// Whether it is necessary to generate the usual frame-setup sequence.
+    setup_frame: bool,
 
     _mach: PhantomData<M>,
 }
@@ -706,6 +723,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
             is_leaf: f.is_leaf(),
             stack_limit,
             probestack_min_frame,
+            setup_frame: true,
             _mach: PhantomData,
         })
     }
@@ -1248,12 +1266,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
     }
 
     fn gen_prologue(&mut self) -> SmallInstVec<Self::I> {
-        let mut insts = smallvec![];
-        if !self.call_conv.extends_baldrdash() {
-            // set up frame
-            insts.extend(M::gen_prologue_frame_setup(&self.flags).into_iter());
-        }
-
         let bytes = M::word_bytes();
         let mut total_stacksize = self.stackslots_size + bytes * self.spillslots.unwrap() as u32;
         if self.call_conv.extends_baldrdash() {
@@ -1265,8 +1277,23 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
         }
         let mask = M::stack_align(self.call_conv) - 1;
         let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
+        let clobbered_callee_saves = M::get_clobbered_callee_saves(self.call_conv, &self.clobbered);
+        let mut insts = smallvec![];
 
         if !self.call_conv.extends_baldrdash() {
+            self.fixed_frame_storage_size += total_stacksize;
+            self.setup_frame = M::is_frame_setup_needed(
+                self.is_leaf,
+                self.stack_args_size(),
+                clobbered_callee_saves.len(),
+                self.fixed_frame_storage_size,
+            );
+
+            if self.setup_frame {
+                // set up frame
+                insts.extend(M::gen_prologue_frame_setup(&self.flags).into_iter());
+            }
+
             // Leaf functions with zero stack don't need a stack check if one's
             // specified, otherwise always insert the stack check.
             if total_stacksize > 0 || !self.is_leaf {
@@ -1280,16 +1307,14 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
                     }
                 }
             }
-            if total_stacksize > 0 {
-                self.fixed_frame_storage_size += total_stacksize;
-            }
         }
 
         // Save clobbered registers.
         let (clobber_size, clobber_insts) = M::gen_clobber_save(
             self.call_conv,
+            self.setup_frame,
             &self.flags,
-            &self.clobbered,
+            &clobbered_callee_saves,
             self.fixed_frame_storage_size,
             self.outgoing_args_size,
         );
@@ -1329,7 +1354,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
         // offset for the rest of the body.
 
         if !self.call_conv.extends_baldrdash() {
-            insts.extend(M::gen_epilogue_frame_restore(&self.flags));
+            if self.setup_frame {
+                insts.extend(M::gen_epilogue_frame_restore(&self.flags));
+            }
+
             insts.push(M::gen_ret());
         }
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif
index fbab91d7f72e..6a1ce64689b2 100644
--- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif
@@ -9,10 +9,7 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, UXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, UXTW]
 ; nextln: ret
 
 function %f2(i64, i32) -> i32 {
@@ -22,10 +19,7 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, UXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, UXTW]
 ; nextln: ret
 
 function %f3(i64, i32) -> i32 {
@@ -35,10 +29,7 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, SXTW]
 ; nextln: ret
 
 function %f4(i64, i32) -> i32 {
@@ -48,10 +39,7 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, SXTW]
 ; nextln: ret
 
 function %f5(i64, i32) -> i32 {
@@ -62,10 +50,7 @@ block0(v0: i64, v1: i32):
   return v4
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, SXTW]
 ; nextln: ret
 
 function %f6(i64, i32) -> i32 {
@@ -76,10 +61,7 @@ block0(v0: i64, v1: i32):
   return v4
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0, w1, SXTW]
 ; nextln: ret
 
 function %f7(i32, i32) -> i32 {
@@ -91,11 +73,8 @@ block0(v0: i32, v1: i32):
   return v5
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w0, w0
+; check: mov w0, w0
 ; nextln: ldr w0, [x0, w1, UXTW]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f8(i64, i32) -> i32 {
@@ -112,13 +91,10 @@ block0(v0: i64, v1: i32):
 ; v6+4 = 2*v5 = 2*v4 + 2*v0 + 4 = 2*v2 + 2*v3 + 2*v0 + 4
 ;      = 2*sextend($x1) + 2*$x0 + 68
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x2, x0, #68
+; check: add x2, x0, #68
 ; nextln: add x0, x2, x0
 ; nextln: add x0, x0, x1, SXTW
 ; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f9(i64, i64, i64) -> i32 {
@@ -133,12 +109,9 @@ block0(v0: i64, v1: i64, v2: i64):
 
 ; v6 = $x0 + $x1 + $x2 + 48
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, x2
+; check: add x0, x0, x2
 ; nextln: add x0, x0, x1
 ; nextln: ldur w0, [x0, #48]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f10(i64, i64, i64) -> i32 {
@@ -153,13 +126,10 @@ block0(v0: i64, v1: i64, v2: i64):
 
 ; v6 = $x0 + $x1 + $x2 + 4100
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x3, #4100
+; check: movz x3, #4100
 ; nextln: add x1, x3, x1
 ; nextln: add x1, x1, x2
 ; nextln: ldr w0, [x1, x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f10() -> i32 {
@@ -171,11 +141,8 @@ block0:
 
 ; v6 = $x0 + $x1 + $x2 + 48
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #1234
+; check: movz x0, #1234
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f11(i64) -> i32 {
@@ -186,11 +153,8 @@ block0(v0: i64):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, #8388608
+; check: add x0, x0, #8388608
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f12(i64) -> i32 {
@@ -201,11 +165,8 @@ block0(v0: i64):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sub x0, x0, #4
+; check: sub x0, x0, #4
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f13(i64) -> i32 {
@@ -216,13 +177,10 @@ block0(v0: i64):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz w1, #51712
+; check: movz w1, #51712
 ; nextln: movk w1, #15258, LSL #16
 ; nextln: add x0, x1, x0
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f14(i32) -> i32 {
@@ -232,11 +190,8 @@ block0(v0: i32):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
+; check: sxtw x0, w0
 ; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f15(i32, i32) -> i32 {
@@ -248,11 +203,8 @@ block0(v0: i32, v1: i32):
   return v5
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
+; check: sxtw x0, w0
 ; nextln: ldr w0, [x0, w1, SXTW]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f16(i64) -> i32 {
@@ -263,10 +215,7 @@ block0(v0: i64):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr w0, [x0]
 ; nextln: ret
 
 function %f17(i64) -> i32 {
@@ -277,10 +226,7 @@ block0(v0: i64):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldur w0, [x0, #4]
-; nextln: ldp fp, lr, [sp], #16
+; check: ldur w0, [x0, #4]
 ; nextln: ret
 
 function %f18(i64, i32) -> i16x8 {
@@ -290,11 +236,8 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr d0, [x0, w1, UXTW]
+; check: ldr d0, [x0, w1, UXTW]
 ; nextln: sxtl v0.8h, v0.8b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f19(i64, i64) -> i32x4 {
@@ -303,12 +246,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, x1
+; check: add x0, x0, x1
 ; nextln: ldr d0, [x0, #8]
 ; nextln: uxtl v0.4s, v0.4h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f20(i64, i32) -> i64x2 {
@@ -318,11 +258,8 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr d0, [x0, w1, SXTW]
+; check: ldr d0, [x0, w1, SXTW]
 ; nextln: uxtl v0.2d, v0.2s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f18(i64, i64, i64) -> i32 {
@@ -333,11 +270,8 @@ block0(v0: i64, v1: i64, v2: i64):
   return v5
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn w0, #4097
+; check: movn w0, #4097
 ; nextln: ldrsh x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f19(i64, i64, i64) -> i32 {
@@ -348,11 +282,8 @@ block0(v0: i64, v1: i64, v2: i64):
   return v5
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #4098
+; check: movz x0, #4098
 ; nextln: ldrsh x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f20(i64, i64, i64) -> i32 {
@@ -363,12 +294,9 @@ block0(v0: i64, v1: i64, v2: i64):
   return v5
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn w0, #4097
+; check: movn w0, #4097
 ; nextln: sxtw x0, w0
 ; nextln: ldrsh x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f21(i64, i64, i64) -> i32 {
@@ -379,12 +307,9 @@ block0(v0: i64, v1: i64, v2: i64):
   return v5
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #4098
+; check: movz x0, #4098
 ; nextln: sxtw x0, w0
 ; nextln: ldrsh x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -395,13 +320,10 @@ block0(v0: i64):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1]
 ; nextln: stp x2, x1, [x0]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -412,13 +334,10 @@ block0(v0: i64):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1, #16]
 ; nextln: stp x2, x1, [x0, #16]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %i128_imm_offset_large(i64) -> i128 {
@@ -428,13 +347,10 @@ block0(v0: i64):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1, #504]
 ; nextln: stp x2, x1, [x0, #504]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %i128_imm_offset_negative_large(i64) -> i128 {
@@ -444,13 +360,10 @@ block0(v0: i64):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1, #-512]
 ; nextln: stp x2, x1, [x0, #-512]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -462,13 +375,10 @@ block0(v0: i64):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: ldp x2, x1, [x1, #32]
 ; nextln: stp x2, x1, [x0, #32]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -481,14 +391,11 @@ block0(v0: i32):
 }
 
 ; TODO: We should be able to deduplicate the sxtw instruction
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x1, w0
+; check: sxtw x1, w0
 ; nextln: ldp x2, x1, [x1]
 ; nextln: sxtw x0, w0
 ; nextln: stp x2, x1, [x0]
 ; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -502,14 +409,11 @@ block0(v0: i64, v1: i32):
   return v5
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x2, x0
+; check: mov x2, x0
 ; nextln: add x2, x2, x1, SXTW
 ; nextln: ldp x3, x2, [x2, #24]
 ; nextln: add x0, x0, x1, SXTW
 ; nextln: stp x3, x2, [x0, #24]
 ; nextln: mov x0, x3
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
index 41d65ee2934e..5f27d9f034a1 100644
--- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif
@@ -8,10 +8,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  add x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  add x0, x0, x1
 ; nextln:  ret
 
 
@@ -21,10 +18,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  sub x0, x0, x1
 ; nextln:  ret
 
 function %f3(i64, i64) -> i64 {
@@ -33,10 +27,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  madd x0, x0, x1, xzr
-; nextln:  ldp fp, lr, [sp], #16
+; check:  madd x0, x0, x1, xzr
 ; nextln:  ret
 
 function %f4(i64, i64) -> i64 {
@@ -45,10 +36,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  umulh x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  umulh x0, x0, x1
 ; nextln:  ret
 
 function %f5(i64, i64) -> i64 {
@@ -57,10 +45,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  smulh x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  smulh x0, x0, x1
 ; nextln:  ret
 
 function %f6(i64, i64) -> i64 {
@@ -69,15 +54,12 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sdiv x2, x0, x1
+; check:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  adds xzr, x1, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
 ; nextln:  mov x0, x2
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f7(i64) -> i64 {
@@ -87,16 +69,13 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x2, #2
+; check:  movz x2, #2
 ; nextln:  sdiv x1, x0, x2
 ; nextln:  cbnz x2, 8 ; udf
 ; nextln:  adds xzr, x2, #1
 ; nextln:  ccmp x0, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
 ; nextln:  mov x0, x1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f8(i64, i64) -> i64 {
@@ -105,11 +84,8 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  udiv x0, x0, x1
+; check:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f9(i64) -> i64 {
@@ -119,12 +95,9 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x1, #2
+; check:  movz x1, #2
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f10(i64, i64) -> i64 {
@@ -133,12 +106,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sdiv x2, x0, x1
+; check:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  msub x0, x2, x1, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f11(i64, i64) -> i64 {
@@ -147,12 +117,9 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  udiv x2, x0, x1
+; check:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  msub x0, x2, x1, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 
@@ -162,16 +129,13 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sxtw x3, w0
+; check:  sxtw x3, w0
 ; nextln:  sxtw x2, w1
 ; nextln:  sdiv x0, x3, x2
 ; nextln:  cbnz x2, 8 ; udf
 ; nextln:  adds wzr, w2, #1
 ; nextln:  ccmp w3, #1, #nzcv, eq
 ; nextln:  b.vc 8 ; udf
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f13(i32) -> i32 {
@@ -181,9 +145,7 @@ block0(v0: i32):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
+; check: sxtw x0, w0
 ; nextln: movz x1, #2
 ; nextln: sxtw x2, w1
 ; nextln: sdiv x1, x0, x2
@@ -192,7 +154,6 @@ block0(v0: i32):
 ; nextln: ccmp w0, #1, #nzcv, eq
 ; nextln: b.vc 8 ; udf
 ; nextln: mov x0, x1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f14(i32, i32) -> i32 {
@@ -201,13 +162,10 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w0, w0
+; check: mov w0, w0
 ; nextln: mov w1, w1
 ; nextln: udiv x0, x0, x1
 ; nextln: cbnz x1, 8 ; udf
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -218,13 +176,10 @@ block0(v0: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mov w0, w0
+; check:  mov w0, w0
 ; nextln:  movz x1, #2
 ; nextln:  udiv x0, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f16(i32, i32) -> i32 {
@@ -233,14 +188,11 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sxtw x0, w0
+; check:  sxtw x0, w0
 ; nextln:  sxtw x1, w1
 ; nextln:  sdiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  msub x0, x2, x1, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f17(i32, i32) -> i32 {
@@ -249,14 +201,11 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mov w0, w0
+; check:  mov w0, w0
 ; nextln:  mov w1, w1
 ; nextln:  udiv x2, x0, x1
 ; nextln:  cbnz x1, 8 ; udf
 ; nextln:  msub x0, x2, x1, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f18(i64, i64) -> i64 {
@@ -265,10 +214,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  and x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  and x0, x0, x1
 ; nextln:  ret
 
 function %f19(i64, i64) -> i64 {
@@ -277,10 +223,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  orr x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  orr x0, x0, x1
 ; nextln:  ret
 
 function %f20(i64, i64) -> i64 {
@@ -289,10 +232,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  eor x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  eor x0, x0, x1
 ; nextln:  ret
 
 function %f21(i64, i64) -> i64 {
@@ -301,10 +241,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  bic x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  bic x0, x0, x1
 ; nextln:  ret
 
 function %f22(i64, i64) -> i64 {
@@ -313,10 +250,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  orn x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  orn x0, x0, x1
 ; nextln:  ret
 
 function %f23(i64, i64) -> i64 {
@@ -325,10 +259,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  eon x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  eon x0, x0, x1
 ; nextln:  ret
 
 function %f24(i64, i64) -> i64 {
@@ -337,10 +268,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  orn x0, xzr, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  orn x0, xzr, x0
 ; nextln:  ret
 
 function %f25(i32, i32) -> i32 {
@@ -351,10 +279,7 @@ block0(v0: i32, v1: i32):
   return v4
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub w0, w1, w0, LSL 21
-; nextln:  ldp fp, lr, [sp], #16
+; check:  sub w0, w1, w0, LSL 21
 ; nextln:  ret
 
 function %f26(i32) -> i32 {
@@ -364,10 +289,7 @@ block0(v0: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub w0, w0, #1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  sub w0, w0, #1
 ; nextln:  ret
 
 function %f27(i32) -> i32 {
@@ -377,10 +299,7 @@ block0(v0: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  add w0, w0, #1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  add w0, w0, #1
 ; nextln:  ret
 
 function %f28(i64) -> i64 {
@@ -390,10 +309,7 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  add x0, x0, #1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  add x0, x0, #1
 ; nextln:  ret
 
 function %f29(i64) -> i64 {
@@ -403,11 +319,8 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #1
+; check:  movz x0, #1
 ; nextln:  sub x0, xzr, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f30(i8x16) -> i8x16 {
@@ -417,13 +330,10 @@ block0(v0: i8x16):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #1
+; check:  movz x0, #1
 ; nextln: sub w0, wzr, w0
 ; nextln: dup v1.16b, w0
 ; nextln: ushl v0.16b, v0.16b, v1.16b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -433,11 +343,8 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds x0, x0, x2
+; check: adds x0, x0, x2
 ; nextln: adc x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %sub_i128(i128, i128) -> i128 {
@@ -446,11 +353,8 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs x0, x0, x2
+; check:  subs x0, x0, x2
 ; nextln: sbc x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %mul_i128(i128, i128) -> i128 {
@@ -459,12 +363,9 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: umulh x4, x0, x2
+; check:  umulh x4, x0, x2
 ; nextln: madd x3, x0, x3, x4
 ; nextln: madd x1, x1, x2, x3
 ; nextln: madd x0, x0, x2, xzr
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
index cc69eef05c5c..7f47a7db1820 100644
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
@@ -8,7 +8,6 @@ block0(v0: i64):
 }
 
 ; check: ldar x0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_load_i32(i64) -> i32 {
@@ -18,7 +17,6 @@ block0(v0: i64):
 }
 
 ; check: ldar w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_load_i16(i64) -> i16 {
@@ -28,7 +26,6 @@ block0(v0: i64):
 }
 
 ; check: ldarh w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_load_i8(i64) -> i8 {
@@ -38,7 +35,6 @@ block0(v0: i64):
 }
 
 ; check: ldarb w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_load_i32_i64(i64) -> i64 {
@@ -49,7 +45,6 @@ block0(v0: i64):
 }
 
 ; check: ldar w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_load_i16_i64(i64) -> i64 {
@@ -60,7 +55,6 @@ block0(v0: i64):
 }
 
 ; check: ldarh w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_load_i8_i64(i64) -> i64 {
@@ -71,7 +65,6 @@ block0(v0: i64):
 }
 
 ; check: ldarb w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_load_i16_i32(i64) -> i32 {
@@ -82,7 +75,6 @@ block0(v0: i64):
 }
 
 ; check: ldarh w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_load_i8_i32(i64) -> i32 {
@@ -93,5 +85,4 @@ block0(v0: i64):
 }
 
 ; check: ldarb w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
index 89d37452787d..60b331da4c1a 100644
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
@@ -8,7 +8,6 @@ block0(v0: i64, v1: i64):
 }
 
 ; check: stlr x0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_store_i32(i32, i64) {
@@ -18,7 +17,6 @@ block0(v0: i32, v1: i64):
 }
 
 ; check: stlr w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_store_i16(i16, i64) {
@@ -28,7 +26,6 @@ block0(v0: i16, v1: i64):
 }
 
 ; check: stlrh w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_store_i8(i8, i64) {
@@ -38,7 +35,6 @@ block0(v0: i8, v1: i64):
 }
 
 ; check: stlrb w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_store_i64_i32(i64, i64) {
@@ -50,7 +46,6 @@ block0(v0: i64, v1: i64):
 
 ; check-not: uxt
 ; check: stlr w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_store_i64_i16(i64, i64) {
@@ -62,7 +57,6 @@ block0(v0: i64, v1: i64):
 
 ; check-not: uxt
 ; check: stlrh w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_store_i64_i8(i64, i64) {
@@ -74,7 +68,6 @@ block0(v0: i64, v1: i64):
 
 ; check-not: uxt
 ; check: stlrb w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_store_i32_i16(i32, i64) {
@@ -86,7 +79,6 @@ block0(v0: i32, v1: i64):
 
 ; check-not: uxt
 ; check: stlrh w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %atomic_store_i32_i8(i32, i64) {
@@ -98,5 +90,4 @@ block0(v0: i32, v1: i64):
 
 ; check-not: uxt
 ; check: stlrb w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/basic1.clif b/cranelift/filetests/filetests/isa/aarch64/basic1.clif
index df9a3c0dc1a3..12cfc3c44ced 100644
--- a/cranelift/filetests/filetests/isa/aarch64/basic1.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/basic1.clif
@@ -4,11 +4,8 @@ target aarch64
 
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v2 = iadd v0, v1
     ; check: add w0, w0, w1
     return v2
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
index 18c77e62ae3b..f3bc8cb5d0c3 100644
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -8,11 +8,8 @@ block0(v0: i8):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: lsr w0, w0, #24
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %a(i16) -> i16 {
@@ -21,11 +18,8 @@ block0(v0: i16):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: lsr w0, w0, #16
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %a(i32) -> i32 {
@@ -34,10 +28,7 @@ block0(v0: i32):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: rbit w0, w0
 ; nextln: ret
 
 function %a(i64) -> i64 {
@@ -46,10 +37,7 @@ block0(v0: i64):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit x0, x0
-; nextln: ldp fp, lr, [sp], #16
+; check: rbit x0, x0
 ; nextln: ret
 
 function %a(i128) -> i128 {
@@ -58,12 +46,9 @@ block0(v0: i128):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit x2, x0
+; check: rbit x2, x0
 ; nextln: rbit x0, x1
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %b(i8) -> i8 {
@@ -72,11 +57,8 @@ block0(v0: i8):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
+; check: uxtb w0, w0
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %b(i16) -> i16 {
@@ -85,11 +67,8 @@ block0(v0: i16):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
+; check: uxth w0, w0
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %b(i32) -> i32 {
@@ -98,10 +77,7 @@ block0(v0: i32):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: clz w0, w0
 ; nextln: ret
 
 function %b(i64) -> i64 {
@@ -110,10 +86,7 @@ block0(v0: i64):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: clz x0, x0
-; nextln: ldp fp, lr, [sp], #16
+; check: clz x0, x0
 ; nextln: ret
 
 function %b(i128) -> i128 {
@@ -122,14 +95,11 @@ block0(v0: i128):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: clz x1, x1
+; check: clz x1, x1
 ; nextln: clz x0, x0
 ; nextln: lsr x2, x1, #6
 ; nextln: madd x0, x0, x2, x1
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %c(i8) -> i8 {
@@ -138,11 +108,8 @@ block0(v0: i8):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
+; check: uxtb w0, w0
 ; nextln: cls w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %c(i16) -> i16 {
@@ -151,11 +118,8 @@ block0(v0: i16):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
+; check: uxth w0, w0
 ; nextln: cls w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %c(i32) -> i32 {
@@ -164,10 +128,7 @@ block0(v0: i32):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: cls w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: cls w0, w0
 ; nextln: ret
 
 function %c(i64) -> i64 {
@@ -176,10 +137,7 @@ block0(v0: i64):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: cls x0, x0
-; nextln: ldp fp, lr, [sp], #16
+; check: cls x0, x0
 ; nextln: ret
 
 function %c(i128) -> i128 {
@@ -188,9 +146,7 @@ block0(v0: i128):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: cls x2, x0
+; check: cls x2, x0
 ; nextln: cls x3, x1
 ; nextln: eon x0, x1, x0
 ; nextln: lsr x0, x0, #63
@@ -199,7 +155,6 @@ block0(v0: i128):
 ; nextln: csel x0, x0, xzr, eq
 ; nextln: add x0, x0, x3
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i8) -> i8 {
@@ -208,12 +163,9 @@ block0(v0: i8):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: lsr w0, w0, #24
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i16) -> i16 {
@@ -222,12 +174,9 @@ block0(v0: i16):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: lsr w0, w0, #16
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i32) -> i32 {
@@ -236,11 +185,8 @@ block0(v0: i32):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit w0, w0
+; check: rbit w0, w0
 ; nextln: clz w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i64) -> i64 {
@@ -249,11 +195,8 @@ block0(v0: i64):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit x0, x0
+; check: rbit x0, x0
 ; nextln: clz x0, x0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i128) -> i128 {
@@ -262,16 +205,13 @@ block0(v0: i128):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: rbit x0, x0
+; check: rbit x0, x0
 ; nextln: rbit x1, x1
 ; nextln: clz x0, x0
 ; nextln: clz x1, x1
 ; nextln: lsr x2, x0, #6
 ; nextln: madd x0, x1, x2, x0
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i128) -> i128 {
@@ -280,15 +220,12 @@ block0(v0: i128):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov d0, x0
+; check: fmov d0, x0
 ; nextln: mov v0.d[1], x1
 ; nextln: cnt v0.16b, v0.16b
 ; nextln: addv b0, v0.16b
 ; nextln: umov w0, v0.b[0]
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -298,13 +235,10 @@ block0(v0: i64):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov d0, x0
+; check: fmov d0, x0
 ; nextln: cnt v0.8b, v0.8b
 ; nextln: addv b0, v0.8b
 ; nextln: umov w0, v0.b[0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i32) -> i32 {
@@ -313,13 +247,10 @@ block0(v0: i32):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov s0, w0
+; check: fmov s0, w0
 ; nextln: cnt v0.8b, v0.8b
 ; nextln: addv b0, v0.8b
 ; nextln: umov w0, v0.b[0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i16) -> i16 {
@@ -328,13 +259,10 @@ block0(v0: i16):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov s0, w0
+; check: fmov s0, w0
 ; nextln: cnt v0.8b, v0.8b
 ; nextln: addp v0.8b, v0.8b, v0.8b
 ; nextln: umov w0, v0.b[0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %d(i8) -> i8 {
@@ -343,12 +271,9 @@ block0(v0: i8):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: fmov s0, w0
+; check: fmov s0, w0
 ; nextln: cnt v0.8b, v0.8b
 ; nextln: umov w0, v0.b[0]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %bextend_b8() -> b32 {
@@ -358,11 +283,8 @@ block0:
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #255
+; check: movz x0, #255
 ; nextln: sxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %bextend_b1() -> b32 {
@@ -372,11 +294,8 @@ block0:
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #1
+; check: movz x0, #1
 ; nextln: sbfx w0, w0, #0, #1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %bnot_i128(i128) -> i128 {
@@ -385,11 +304,8 @@ block0(v0: i128):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn x0, xzr, x0
+; check: orn x0, xzr, x0
 ; nextln: orn x1, xzr, x1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %band_i128(i128, i128) -> i128 {
@@ -398,11 +314,8 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: and x0, x0, x2
+; check: and x0, x0, x2
 ; nextln: and x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %bor_i128(i128, i128) -> i128 {
@@ -411,11 +324,8 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orr x0, x0, x2
+; check: orr x0, x0, x2
 ; nextln: orr x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %bxor_i128(i128, i128) -> i128 {
@@ -424,11 +334,8 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %band_not_i128(i128, i128) -> i128 {
@@ -437,11 +344,8 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: bic x0, x0, x2
+; check: bic x0, x0, x2
 ; nextln: bic x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %bor_not_i128(i128, i128) -> i128 {
@@ -450,11 +354,8 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn x0, x0, x2
+; check: orn x0, x0, x2
 ; nextln: orn x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %bxor_not_i128(i128, i128) -> i128 {
@@ -463,11 +364,8 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eon x0, x0, x2
+; check: eon x0, x0, x2
 ; nextln: eon x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -477,9 +375,7 @@ block0(v0: i128, v1: i8):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsr x4, x0, #1
 ; nextln: lsl x1, x1, x2
 ; nextln: lsr x3, x4, x3
@@ -488,7 +384,6 @@ block0(v0: i128, v1: i8):
 ; nextln: orr x1, x1, x3
 ; nextln: csel x1, x0, x1, ne
 ; nextln: csel x0, xzr, x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -498,9 +393,7 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsr x4, x0, #1
 ; nextln: lsl x1, x1, x2
 ; nextln: lsr x3, x4, x3
@@ -509,7 +402,6 @@ block0(v0: i128, v1: i128):
 ; nextln: orr x1, x1, x3
 ; nextln: csel x1, x0, x1, ne
 ; nextln: csel x0, xzr, x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -519,9 +411,7 @@ block0(v0: i128, v1: i8):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsl x4, x1, #1
 ; nextln: lsr x0, x0, x2
 ; nextln: lsl x3, x4, x3
@@ -531,7 +421,6 @@ block0(v0: i128, v1: i8):
 ; nextln: csel x2, xzr, x1, ne
 ; nextln: csel x0, x1, x0, ne
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -541,9 +430,7 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsl x4, x1, #1
 ; nextln: lsr x0, x0, x2
 ; nextln: lsl x3, x4, x3
@@ -553,7 +440,6 @@ block0(v0: i128, v1: i128):
 ; nextln: csel x2, xzr, x1, ne
 ; nextln: csel x0, x1, x0, ne
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -563,9 +449,7 @@ block0(v0: i128, v1: i8):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsl x4, x1, #1
 ; nextln: lsr x0, x0, x2
 ; nextln: lsl x4, x4, x3
@@ -575,7 +459,6 @@ block0(v0: i128, v1: i8):
 ; nextln: orr x0, x0, x4
 ; nextln: csel x1, x1, x3, ne
 ; nextln: csel x0, x3, x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -585,9 +468,7 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orn w3, wzr, w2
+; check: orn w3, wzr, w2
 ; nextln: lsl x4, x1, #1
 ; nextln: lsr x0, x0, x2
 ; nextln: lsl x4, x4, x3
@@ -597,5 +478,4 @@ block0(v0: i128, v1: i128):
 ; nextln: orr x0, x0, x4
 ; nextln: csel x1, x1, x3, ne
 ; nextln: csel x0, x3, x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif
index 9491c73b75e6..dcbf1edbc443 100644
--- a/cranelift/filetests/filetests/isa/aarch64/call.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/call.clif
@@ -100,9 +100,7 @@ block0(v0: i8):
     return v1, v1, v1, v1, v1, v1, v1, v1, v0
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mov x9, x0
+; check:  mov x9, x0
 ; nextln:  mov x8, x1
 ; nextln:  movz x0, #42
 ; nextln:  movz x1, #42
@@ -113,7 +111,6 @@ block0(v0: i8):
 ; nextln:  movz x6, #42
 ; nextln:  movz x7, #42
 ; nextln:  sturb w9, [x8]
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f8() {
@@ -259,10 +256,7 @@ block0(v0: i128, v1: i64):
     return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x0, x1
-; nextln: ldp fp, lr, [sp], #16
+; check: mov x0, x1
 ; nextln: ret
 
 
@@ -295,14 +289,10 @@ block0(v0: i64, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x0, x2
-; nextln: ldp fp, lr, [sp], #16
+; check: mov x0, x2
 ; nextln: ret
 
 
-
 function %f12_call(i64) -> i64 {
     fn0 = %f12(i64, i128) -> i64
 
@@ -333,10 +323,7 @@ block0(v0: i64, v1: i128):
     return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x0, x1
-; nextln: ldp fp, lr, [sp], #16
+; check: mov x0, x1
 ; nextln: ret
 
 
@@ -470,12 +457,9 @@ block0:
     return v0, v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov x1, x0
+; check: mov x1, x0
 ; nextln: movz x0, #0
 ; nextln: movz x2, #1
 ; nextln: stur w2, [x1]
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif
index 19fd4fb33e38..72ea9c63fa0f 100644
--- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif
@@ -8,11 +8,8 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x1
+; check: subs xzr, x0, x1
 ; nextln: cset x0, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %icmp_eq_i128(i128, i128) -> b1 {
@@ -21,13 +18,10 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
 ; nextln: adds xzr, x0, x1
 ; nextln: cset x0, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -37,13 +31,10 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
 ; nextln: adds xzr, x0, x1
 ; nextln: cset x0, ne
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -53,14 +44,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, lo
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, lt
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -70,14 +58,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, lo
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, lo
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %icmp_sle_i128(i128, i128) -> b1 {
@@ -86,14 +71,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, ls
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, le
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %icmp_ule_i128(i128, i128) -> b1 {
@@ -102,14 +84,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, ls
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, ls
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %icmp_sgt_i128(i128, i128) -> b1 {
@@ -118,14 +97,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hi
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, gt
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %icmp_ugt_i128(i128, i128) -> b1 {
@@ -134,14 +110,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hi
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, hi
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -151,14 +124,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hs
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, ge
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %icmp_uge_i128(i128, i128) -> b1 {
@@ -167,14 +137,11 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hs
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, hs
 ; nextln: csel x0, x0, x1, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %icmp_of_i128(i128, i128) -> b1 {
@@ -183,12 +150,9 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, x2
+; check: adds xzr, x0, x2
 ; nextln: adcs xzr, x1, x3
 ; nextln: cset x0, vs
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %icmp_nof_i128(i128, i128) -> b1 {
@@ -197,12 +161,9 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, x2
+; check: adds xzr, x0, x2
 ; nextln: adcs xzr, x1, x3
 ; nextln: cset x0, vc
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -222,17 +183,13 @@ block2:
 }
 
 ; check: Block 0:
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x1
+; check: subs xzr, x0, x1
 ; nextln: b.eq label1 ; b label2
 ; check: Block 1:
 ; check: movz x0, #1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 ; check: Block 2:
 ; check: movz x0, #2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f(i64, i64) -> i64 {
@@ -246,12 +203,9 @@ block1:
   return v4
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x1
+; check: subs xzr, x0, x1
 ; check: Block 1:
 ; check: movz x0, #1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -265,17 +219,14 @@ block1:
     return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orr x0, x0, x1
+; check: orr x0, x0, x1
 ; nextln: cbz x0, label1 ; b label2
 ; check: Block 1:
 ; check: b label3
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 
 function %i128_brnz(i128){
@@ -288,17 +239,14 @@ block1:
     return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orr x0, x0, x1
+; check: orr x0, x0, x1
 ; nextln: cbnz x0, label1 ; b label2
 ; check: Block 1:
 ; check: b label3
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 
 
@@ -311,9 +259,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
 ; nextln: adds xzr, x0, x1
 ; nextln: b.eq label1 ; b label2
@@ -322,8 +268,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 
 function %i128_bricmp_ne(i128, i128) {
@@ -335,9 +280,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: eor x0, x0, x2
+; check: eor x0, x0, x2
 ; nextln: eor x1, x1, x3
 ; nextln: adds xzr, x0, x1
 ; nextln: b.ne label1 ; b label2
@@ -346,8 +289,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 
 function %i128_bricmp_slt(i128, i128) {
@@ -359,9 +301,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, lo
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, lt
@@ -373,8 +313,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 
 function %i128_bricmp_ult(i128, i128) {
@@ -386,9 +325,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, lo
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, lo
@@ -400,8 +337,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %i128_bricmp_sle(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -412,9 +348,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, ls
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, le
@@ -427,8 +361,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %i128_bricmp_ule(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -439,9 +372,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, ls
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, ls
@@ -454,8 +385,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %i128_bricmp_sgt(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -466,9 +396,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hi
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, gt
@@ -480,8 +408,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %i128_bricmp_ugt(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -492,9 +419,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hi
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, hi
@@ -506,8 +431,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 
 function %i128_bricmp_sge(i128, i128) {
@@ -519,9 +443,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hs
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, ge
@@ -534,8 +456,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %i128_bricmp_uge(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -546,9 +467,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, x2
+; check: subs xzr, x0, x2
 ; nextln: cset x0, hs
 ; nextln: subs xzr, x1, x3
 ; nextln: cset x1, hs
@@ -561,8 +480,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %i128_bricmp_of(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -573,9 +491,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, x2
+; check: adds xzr, x0, x2
 ; nextln: adcs xzr, x1, x3
 ; nextln: b.vs label1 ; b label2
 ; check: Block 1:
@@ -583,8 +499,7 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %i128_bricmp_nof(i128, i128) {
 block0(v0: i128, v1: i128):
@@ -595,9 +510,7 @@ block1:
   return
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, x2
+; check: adds xzr, x0, x2
 ; nextln: adcs xzr, x1, x3
 ; nextln: b.vc label1 ; b label2
 ; check: Block 1:
@@ -605,5 +518,4 @@ block1:
 ; check: Block 2:
 ; check: b label3
 ; check: Block 3:
-; check: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/constants.clif b/cranelift/filetests/filetests/isa/aarch64/constants.clif
index 25d459482ceb..a8c940acd111 100644
--- a/cranelift/filetests/filetests/isa/aarch64/constants.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif
@@ -8,10 +8,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #255
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #255
 ; nextln: ret
 
 function %f() -> b16 {
@@ -20,10 +17,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #0
 ; nextln: ret
 
 function %f() -> i64 {
@@ -32,10 +26,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #0
 ; nextln: ret
 
 function %f() -> i64 {
@@ -44,10 +35,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #65535
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #65535
 ; nextln: ret
 
 function %f() -> i64 {
@@ -56,10 +44,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #65535, LSL #16
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #65535, LSL #16
 ; nextln: ret
 
 function %f() -> i64 {
@@ -68,10 +53,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #65535, LSL #32
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #65535, LSL #32
 ; nextln: ret
 
 function %f() -> i64 {
@@ -80,10 +62,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #65535, LSL #48
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #65535, LSL #48
 ; nextln: ret
 
 function %f() -> i64 {
@@ -92,10 +71,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #0
 ; nextln: ret
 
 function %f() -> i64 {
@@ -104,10 +80,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #65535
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #65535
 ; nextln: ret
 
 function %f() -> i64 {
@@ -116,10 +89,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #65535, LSL #16
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #65535, LSL #16
 ; nextln: ret
 
 function %f() -> i64 {
@@ -128,10 +98,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #65535, LSL #32
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #65535, LSL #32
 ; nextln: ret
 
 function %f() -> i64 {
@@ -140,10 +107,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #65535, LSL #48
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #65535, LSL #48
 ; nextln: ret
 
 function %f() -> i64 {
@@ -152,13 +116,10 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #58
+; check: movz x0, #58
 ; nextln: movk x0, #4626, LSL #16
 ; nextln: movk x0, #61603, LSL #32
 ; nextln: movk x0, #62283, LSL #48
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f() -> i64 {
@@ -167,11 +128,8 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #7924, LSL #16
+; check: movz x0, #7924, LSL #16
 ; nextln: movk x0, #4841, LSL #48
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f() -> i64 {
@@ -180,11 +138,8 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #57611, LSL #16
+; check: movn x0, #57611, LSL #16
 ; nextln: movk x0, #4841, LSL #48
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f() -> i32 {
@@ -193,10 +148,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: orr x0, xzr, #4294967295
-; nextln: ldp fp, lr, [sp], #16
+; check: orr x0, xzr, #4294967295
 ; nextln: ret
 
 function %f() -> i32 {
@@ -205,10 +157,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn w0, #8
-; nextln: ldp fp, lr, [sp], #16
+; check: movn w0, #8
 ; nextln: ret
 
 function %f() -> i64 {
@@ -217,10 +166,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn w0, #8
-; nextln: ldp fp, lr, [sp], #16
+; check: movn w0, #8
 ; nextln: ret
 
 function %f() -> i64 {
@@ -229,8 +175,5 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movn x0, #8
-; nextln: ldp fp, lr, [sp], #16
+; check: movn x0, #8
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
index c64f64af64ce..58735d5bf8d1 100644
--- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif
@@ -10,11 +10,8 @@ block0(v0: i8):
   return v3
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x1, #42
+; check:  movz x1, #42
 ; nextln:  add x0, x1, x0, SXTB
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 
@@ -24,10 +21,7 @@ block0(v0: i64):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x1, #0
 ; nextln: ret
 
 function %i128_sextend_i64(i64) -> i128 {
@@ -36,10 +30,7 @@ block0(v0: i64):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: asr x1, x0, #63
-; nextln: ldp fp, lr, [sp], #16
+; check: asr x1, x0, #63
 ; nextln: ret
 
 
@@ -49,11 +40,8 @@ block0(v0: i32):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w0, w0
+; check: mov w0, w0
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %i128_sextend_i32(i32) -> i128 {
@@ -62,11 +50,8 @@ block0(v0: i32):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
+; check: sxtw x0, w0
 ; nextln: asr x1, x0, #63
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -76,11 +61,8 @@ block0(v0: i16):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
+; check: uxth w0, w0
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %i128_sextend_i16(i16) -> i128 {
@@ -89,11 +71,8 @@ block0(v0: i16):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxth x0, w0
+; check: sxth x0, w0
 ; nextln: asr x1, x0, #63
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 
@@ -103,11 +82,8 @@ block0(v0: i8):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
+; check: uxtb w0, w0
 ; nextln: movz x1, #0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %i128_sextend_i8(i8) -> i128 {
@@ -116,9 +92,6 @@ block0(v0: i8):
     return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtb x0, w0
+; check: sxtb x0, w0
 ; nextln: asr x1, x0, #63
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
index e8f0b0ee355f..2ce248a79664 100644
--- a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
@@ -4,56 +4,42 @@ target aarch64
 
 function u0:0(i8) -> f32 {
 block0(v0: i8):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v1 = fcvt_from_uint.f32 v0
     ; check: uxtb w0, w0
     ; check: ucvtf s0, w0
     return v1
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
 
 function u0:0(i8) -> f64 {
 block0(v0: i8):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v1 = fcvt_from_uint.f64 v0
     ; check: uxtb w0, w0
     ; check: ucvtf d0, w0
     return v1
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
 
 function u0:0(i16) -> f32 {
 block0(v0: i16):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v1 = fcvt_from_uint.f32 v0
     ; check: uxth w0, w0
     ; check: ucvtf s0, w0
     return v1
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
 
 function u0:0(i16) -> f64 {
 block0(v0: i16):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v1 = fcvt_from_uint.f64 v0
     ; check: uxth w0, w0
     ; check: ucvtf d0, w0
     return v1
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
 
 function u0:0(f32) -> i8 {
 block0(v0: f32):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v1 = fcvt_to_uint.i8 v0
     ; check: fcmp s0, s0
     ; check: b.vc 8 ; udf
@@ -67,14 +53,11 @@ block0(v0: f32):
     ; check: b.mi 8 ; udf
     ; check: fcvtzu w0, s0
     return v1
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
 
 function u0:0(f64) -> i8 {
 block0(v0: f64):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v1 = fcvt_to_uint.i8 v0
     ; check: fcmp d0, d0
     ; check: b.vc 8 ; udf
@@ -88,14 +71,11 @@ block0(v0: f64):
     ; check: b.mi 8 ; udf
     ; check: fcvtzu w0, d0
     return v1
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
 
 function u0:0(f32) -> i16 {
 block0(v0: f32):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v1 = fcvt_to_uint.i16 v0
     ; check: fcmp s0, s0
     ; check: b.vc 8 ; udf
@@ -109,14 +89,11 @@ block0(v0: f32):
     ; check: b.mi 8 ; udf
     ; check: fcvtzu w0, s0
     return v1
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
 
 function u0:0(f64) -> i16 {
 block0(v0: f64):
-    ; check: stp fp, lr, [sp, #-16]!
-    ; check: mov fp, sp
     v1 = fcvt_to_uint.i16 v0
     ; check: fcmp d0, d0
     ; check: b.vc 8 ; udf
@@ -130,6 +107,5 @@ block0(v0: f64):
     ; check: b.mi 8 ; udf
     ; check: fcvtzu w0, d0
     return v1
-    ; check: ldp fp, lr, [sp], #16
     ; check: ret
 }
diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
index 67851347b3ec..8c57deb6ec38 100644
--- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
@@ -8,10 +8,7 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fadd s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fadd s0, s0, s1
 ; nextln:  ret
 
 function %f2(f64, f64) -> f64 {
@@ -20,10 +17,7 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fadd d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fadd d0, d0, d1
 ; nextln:  ret
 
 function %f3(f32, f32) -> f32 {
@@ -32,10 +26,7 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fsub s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fsub s0, s0, s1
 ; nextln:  ret
 
 function %f4(f64, f64) -> f64 {
@@ -44,10 +35,7 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fsub d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fsub d0, d0, d1
 ; nextln:  ret
 
 function %f5(f32, f32) -> f32 {
@@ -56,10 +44,7 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmul s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmul s0, s0, s1
 ; nextln:  ret
 
 function %f6(f64, f64) -> f64 {
@@ -68,10 +53,7 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmul d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmul d0, d0, d1
 ; nextln:  ret
 
 function %f7(f32, f32) -> f32 {
@@ -80,10 +62,7 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fdiv s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fdiv s0, s0, s1
 ; nextln:  ret
 
 function %f8(f64, f64) -> f64 {
@@ -92,10 +71,7 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fdiv d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fdiv d0, d0, d1
 ; nextln:  ret
 
 function %f9(f32, f32) -> f32 {
@@ -104,10 +80,7 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmin s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmin s0, s0, s1
 ; nextln:  ret
 
 function %f10(f64, f64) -> f64 {
@@ -116,10 +89,7 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmin d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmin d0, d0, d1
 ; nextln:  ret
 
 function %f11(f32, f32) -> f32 {
@@ -128,10 +98,7 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmax s0, s0, s1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmax s0, s0, s1
 ; nextln:  ret
 
 function %f12(f64, f64) -> f64 {
@@ -140,10 +107,7 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmax d0, d0, d1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmax d0, d0, d1
 ; nextln:  ret
 
 function %f13(f32) -> f32 {
@@ -152,10 +116,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fsqrt s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fsqrt s0, s0
 ; nextln:  ret
 
 function %f15(f64) -> f64 {
@@ -164,10 +125,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fsqrt d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fsqrt d0, d0
 ; nextln:  ret
 
 function %f16(f32) -> f32 {
@@ -176,10 +134,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fabs s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fabs s0, s0
 ; nextln:  ret
 
 function %f17(f64) -> f64 {
@@ -188,10 +143,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fabs d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fabs d0, d0
 ; nextln:  ret
 
 function %f18(f32) -> f32 {
@@ -200,10 +152,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fneg s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fneg s0, s0
 ; nextln:  ret
 
 function %f19(f64) -> f64 {
@@ -212,10 +161,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fneg d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fneg d0, d0
 ; nextln:  ret
 
 function %f20(f32) -> f64 {
@@ -224,10 +170,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcvt d0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fcvt d0, s0
 ; nextln:  ret
 
 function %f21(f64) -> f32 {
@@ -236,10 +179,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcvt s0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fcvt s0, d0
 ; nextln:  ret
 
 function %f22(f32) -> f32 {
@@ -248,10 +188,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintp s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintp s0, s0
 ; nextln:  ret
 
 function %f22(f64) -> f64 {
@@ -260,10 +197,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintp d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintp d0, d0
 ; nextln:  ret
 
 function %f23(f32) -> f32 {
@@ -272,10 +206,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintm s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintm s0, s0
 ; nextln:  ret
 
 function %f24(f64) -> f64 {
@@ -284,10 +215,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintm d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintm d0, d0
 ; nextln:  ret
 
 function %f25(f32) -> f32 {
@@ -296,10 +224,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintz s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintz s0, s0
 ; nextln:  ret
 
 function %f26(f64) -> f64 {
@@ -308,10 +233,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintz d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintz d0, d0
 ; nextln:  ret
 
 function %f27(f32) -> f32 {
@@ -320,10 +242,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintn s0, s0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintn s0, s0
 ; nextln:  ret
 
 function %f28(f64) -> f64 {
@@ -332,10 +251,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  frintn d0, d0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  frintn d0, d0
 ; nextln:  ret
 
 function %f29(f32, f32, f32) -> f32 {
@@ -344,10 +260,7 @@ block0(v0: f32, v1: f32, v2: f32):
   return v3
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmadd s0, s0, s1, s2
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmadd s0, s0, s1, s2
 ; nextln:  ret
 
 function %f30(f64, f64, f64) -> f64 {
@@ -356,9 +269,8 @@ block0(v0: f64, v1: f64, v2: f64):
   return v3
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmadd d0, d0, d1, d2
+; check:  fmadd d0, d0, d1, d2
+; nextln:  ret
 
 function %f31(f32, f32) -> f32 {
 block0(v0: f32, v1: f32):
@@ -366,11 +278,8 @@ block0(v0: f32, v1: f32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ushr v1.2s, v1.2s, #31
+; check:  ushr v1.2s, v1.2s, #31
 ; nextln:  sli v0.2s, v1.2s, #31
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f32(f64, f64) -> f64 {
@@ -379,11 +288,8 @@ block0(v0: f64, v1: f64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ushr d1, d1, #63
+; check:  ushr d1, d1, #63
 ; nextln:  sli d0, d1, #63
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f33(f32) -> i32 {
@@ -392,9 +298,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp s0, s0
+; check:  fcmp s0, s0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #49024, LSL #16
 ; nextln:  fmov d1, x0
@@ -405,7 +309,6 @@ block0(v0: f32):
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzu w0, s0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f34(f32) -> i32 {
@@ -414,9 +317,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp s0, s0
+; check:  fcmp s0, s0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #52992, LSL #16
 ; nextln:  fmov d1, x0
@@ -427,7 +328,6 @@ block0(v0: f32):
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzs w0, s0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f35(f32) -> i64 {
@@ -436,9 +336,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp s0, s0
+; check:  fcmp s0, s0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #49024, LSL #16
 ; nextln:  fmov d1, x0
@@ -449,7 +347,6 @@ block0(v0: f32):
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzu x0, s0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f36(f32) -> i64 {
@@ -458,9 +355,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp s0, s0
+; check:  fcmp s0, s0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #57088, LSL #16
 ; nextln:  fmov d1, x0
@@ -471,7 +366,6 @@ block0(v0: f32):
 ; nextln:  fcmp s0, s1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzs x0, s0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f37(f64) -> i32 {
@@ -480,9 +374,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp d0, d0
+; check:  fcmp d0, d0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #49136, LSL #48
 ; nextln:  fmov d1, x0
@@ -493,7 +385,6 @@ block0(v0: f64):
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzu w0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f38(f64) -> i32 {
@@ -502,9 +393,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp d0, d0
+; check:  fcmp d0, d0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  ldr d1, pc+8 ; b 12 ; data.f64 -2147483649
 ; nextln:  fcmp d0, d1
@@ -514,7 +403,6 @@ block0(v0: f64):
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzs w0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f39(f64) -> i64 {
@@ -523,9 +411,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp d0, d0
+; check:  fcmp d0, d0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #49136, LSL #48
 ; nextln:  fmov d1, x0
@@ -536,7 +422,6 @@ block0(v0: f64):
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzu x0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f40(f64) -> i64 {
@@ -545,9 +430,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fcmp d0, d0
+; check:  fcmp d0, d0
 ; nextln:  b.vc 8 ; udf
 ; nextln:  movz x0, #50144, LSL #48
 ; nextln:  fmov d1, x0
@@ -558,7 +441,6 @@ block0(v0: f64):
 ; nextln:  fcmp d0, d1
 ; nextln:  b.mi 8 ; udf
 ; nextln:  fcvtzs x0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f41(i32) -> f32 {
@@ -567,10 +449,7 @@ block0(v0: i32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ucvtf s0, w0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ucvtf s0, w0
 ; nextln:  ret
 
 function %f42(i32) -> f32 {
@@ -579,10 +458,7 @@ block0(v0: i32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  scvtf s0, w0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  scvtf s0, w0
 ; nextln:  ret
 
 function %f43(i64) -> f32 {
@@ -591,10 +467,7 @@ block0(v0: i64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ucvtf s0, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ucvtf s0, x0
 ; nextln:  ret
 
 function %f44(i64) -> f32 {
@@ -603,10 +476,7 @@ block0(v0: i64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  scvtf s0, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  scvtf s0, x0
 ; nextln:  ret
 
 function %f45(i32) -> f64 {
@@ -615,10 +485,7 @@ block0(v0: i32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ucvtf d0, w0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ucvtf d0, w0
 ; nextln:  ret
 
 function %f46(i32) -> f64 {
@@ -627,10 +494,7 @@ block0(v0: i32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  scvtf d0, w0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  scvtf d0, w0
 ; nextln:  ret
 
 function %f47(i64) -> f64 {
@@ -639,10 +503,7 @@ block0(v0: i64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ucvtf d0, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ucvtf d0, x0
 ; nextln:  ret
 
 function %f48(i64) -> f64 {
@@ -651,10 +512,7 @@ block0(v0: i64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  scvtf d0, x0
-; nextln:  ldp fp, lr, [sp], #16
+; check:  scvtf d0, x0
 ; nextln:  ret
 
 function %f49(f32) -> i32 {
@@ -663,9 +521,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #20352, LSL #16
+; check:  movz x0, #20352, LSL #16
 ; nextln: fmov d1, x0
 ; nextln: fmin s2, s0, s1
 ; nextln: movi v1.2s, #0
@@ -673,7 +529,6 @@ block0(v0: f32):
 ; nextln: fcmp s0, s0
 ; nextln: fcsel s0, s1, s2, ne
 ; nextln: fcvtzu w0, s0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f50(f32) -> i32 {
@@ -682,9 +537,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #20224, LSL #16
+; check:  movz x0, #20224, LSL #16
 ; nextln: fmov d1, x0
 ; nextln: fmin s1, s0, s1
 ; nextln: movz x0, #52992, LSL #16
@@ -694,7 +547,6 @@ block0(v0: f32):
 ; nextln: fcmp s0, s0
 ; nextln: fcsel s0, s2, s1, ne
 ; nextln: fcvtzs w0, s0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f51(f32) -> i64 {
@@ -703,9 +555,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #24448, LSL #16
+; check:  movz x0, #24448, LSL #16
 ; nextln: fmov d1, x0
 ; nextln: fmin s2, s0, s1
 ; nextln: movi v1.2s, #0
@@ -713,7 +563,6 @@ block0(v0: f32):
 ; nextln: fcmp s0, s0
 ; nextln: fcsel s0, s1, s2, ne
 ; nextln: fcvtzu x0, s0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f52(f32) -> i64 {
@@ -722,9 +571,7 @@ block0(v0: f32):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #24320, LSL #16
+; check:  movz x0, #24320, LSL #16
 ; nextln: fmov d1, x0
 ; nextln: fmin s1, s0, s1
 ; nextln: movz x0, #57088, LSL #16
@@ -734,7 +581,6 @@ block0(v0: f32):
 ; nextln: fcmp s0, s0
 ; nextln: fcsel s0, s2, s1, ne
 ; nextln: fcvtzs x0, s0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f53(f64) -> i32 {
@@ -743,16 +589,13 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: ldr d1, pc+8 ; b 12 ; data.f64 4294967295
+; check:  ldr d1, pc+8 ; b 12 ; data.f64 4294967295
 ; nextln: fmin d2, d0, d1
 ; nextln: movi v1.2s, #0
 ; nextln: fmax d2, d2, d1
 ; nextln: fcmp d0, d0
 ; nextln: fcsel d0, d1, d2, ne
 ; nextln: fcvtzu w0, d0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f54(f64) -> i32 {
@@ -761,9 +604,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: ldr d1, pc+8 ; b 12 ; data.f64 2147483647
+; check:  ldr d1, pc+8 ; b 12 ; data.f64 2147483647
 ; nextln: fmin d1, d0, d1
 ; nextln: movz x0, #49632, LSL #48
 ; nextln: fmov d2, x0
@@ -772,7 +613,6 @@ block0(v0: f64):
 ; nextln: fcmp d0, d0
 ; nextln: fcsel d0, d2, d1, ne
 ; nextln: fcvtzs w0, d0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f55(f64) -> i64 {
@@ -781,9 +621,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #17392, LSL #48
+; check:  movz x0, #17392, LSL #48
 ; nextln: fmov d1, x0
 ; nextln: fmin d2, d0, d1
 ; nextln: movi v1.2s, #0
@@ -791,7 +629,6 @@ block0(v0: f64):
 ; nextln: fcmp d0, d0
 ; nextln: fcsel d0, d1, d2, ne
 ; nextln: fcvtzu x0, d0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f56(f64) -> i64 {
@@ -800,9 +637,7 @@ block0(v0: f64):
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln: movz x0, #17376, LSL #48
+; check:  movz x0, #17376, LSL #48
 ; nextln: fmov d1, x0
 ; nextln: fmin d1, d0, d1
 ; nextln: movz x0, #50144, LSL #48
@@ -812,5 +647,4 @@ block0(v0: f64):
 ; nextln: fcmp d0, d0
 ; nextln: fcsel d0, d2, d1, ne
 ; nextln: fcvtzs x0, d0
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
index f78ec80b42c3..e037f0984af2 100644
--- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
@@ -14,9 +14,7 @@ block0(v0: i64, v1: i32):
 }
 
 ; check: Block 0:
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w2, w1
+; check: mov w2, w1
 ; nextln: ldr x3, [x0]
 ; nextln: mov x3, x3
 ; nextln: subs xzr, x2, x3
@@ -26,7 +24,6 @@ block0(v0: i64, v1: i32):
 ; nextln: subs xzr, x2, x3
 ; nextln: movz x1, #0
 ; nextln: csel x0, x1, x0, hi
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 ; check: Block 2:
 ; check: udf
@@ -41,9 +38,7 @@ block0(v0: i64, v1: i32):
 }
 
 ; check: Block 0:
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w2, w1
+; check: mov w2, w1
 ; nextln: subs xzr, x2, #65536
 ; nextln: b.ls label1 ; b label2
 ; check: Block 1:
@@ -51,7 +46,6 @@ block0(v0: i64, v1: i32):
 ; nextln: subs xzr, x2, #65536
 ; nextln: movz x1, #0
 ; nextln: csel x0, x1, x0, hi
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 ; check: Block 2:
 ; check: udf
diff --git a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
index e55e3d74e012..2d2f317fb3f3 100644
--- a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif
@@ -18,15 +18,12 @@ block0:
 ; nextln: Entry block: 0
 ; nextln: Block 0:
 ; nextln:   (original IR block: block0)
-; nextln:   (instruction range: 0 .. 10)
-; nextln:   Inst 0:   stp fp, lr, [sp, #-16]!
-; nextln:   Inst 1:   mov fp, sp
-; nextln:   Inst 2:   movz x0, #56780
-; nextln:   Inst 3:   uxth w0, w0
-; nextln:   Inst 4:   movz x1, #56780
-; nextln:   Inst 5:   subs wzr, w0, w1, UXTH
-; nextln:   Inst 6:   cset x0, ne
-; nextln:   Inst 7:   and w0, w0, #1
-; nextln:   Inst 8:   ldp fp, lr, [sp], #16
-; nextln:   Inst 9:   ret
+; nextln:   (instruction range: 0 .. 7)
+; nextln:   Inst 0:   movz x0, #56780
+; nextln:   Inst 1:   uxth w0, w0
+; nextln:   Inst 2:   movz x1, #56780
+; nextln:   Inst 3:   subs wzr, w0, w1, UXTH
+; nextln:   Inst 4:   cset x0, ne
+; nextln:   Inst 5:   and w0, w0, #1
+; nextln:   Inst 6:   ret
 ; nextln: }}
diff --git a/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif b/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif
index 3b143969e966..38c512a5647d 100644
--- a/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif
@@ -10,9 +10,6 @@ block1:
   return v0, v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #1
+; check:  movz x0, #1
 ; nextln:  movz x1, #2
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif
index 7286c4249ec4..d7ff72c1982a 100644
--- a/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif
@@ -8,10 +8,7 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add w0, w0, w1
-; nextln: ldp fp, lr, [sp], #16
+; check: add w0, w0, w1
 ; nextln: ret
 
 function %add16(i16, i16) -> i16 {
@@ -20,10 +17,7 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add w0, w0, w1
-; nextln: ldp fp, lr, [sp], #16
+; check: add w0, w0, w1
 ; nextln: ret
 
 function %add32(i32, i32) -> i32 {
@@ -32,10 +26,7 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add w0, w0, w1
-; nextln: ldp fp, lr, [sp], #16
+; check: add w0, w0, w1
 ; nextln: ret
 
 function %add32_8(i32, i8) -> i32 {
@@ -45,10 +36,7 @@ block0(v0: i32, v1: i8):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add w0, w0, w1, SXTB
-; nextln: ldp fp, lr, [sp], #16
+; check: add w0, w0, w1, SXTB
 ; nextln: ret
 
 function %add64_32(i64, i32) -> i64 {
@@ -58,8 +46,5 @@ block0(v0: i64, v1: i32):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, x1, SXTW
-; nextln: ldp fp, lr, [sp], #16
+; check: add x0, x0, x1, SXTW
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/reduce.clif b/cranelift/filetests/filetests/isa/aarch64/reduce.clif
index 6beb77ff4f63..7efbb03fcc0a 100644
--- a/cranelift/filetests/filetests/isa/aarch64/reduce.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/reduce.clif
@@ -7,37 +7,25 @@ block0(v0: i128):
   v1 = ireduce.i64 v0
   return v1
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %ireduce_128_32(i128) -> i32 {
 block0(v0: i128):
   v1 = ireduce.i32 v0
   return v1
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %ireduce_128_16(i128) -> i16 {
 block0(v0: i128):
   v1 = ireduce.i16 v0
   return v1
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %ireduce_128_8(i128) -> i8 {
 block0(v0: i128):
   v1 = ireduce.i8 v0
   return v1
 }
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
index 5485a837f3d9..ade68f0a2b86 100644
--- a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif
@@ -7,10 +7,7 @@ block0(v0: r64):
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
+; check: ret
 
 function %f1(r64) -> b1 {
 block0(v0: r64):
@@ -18,11 +15,8 @@ block0(v0: r64):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: subs xzr, x0, #0
+; check: subs xzr, x0, #0
 ; nextln: cset x0, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f2(r64) -> b1 {
@@ -31,11 +25,8 @@ block0(v0: r64):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: adds xzr, x0, #1
+; check: adds xzr, x0, #1
 ; nextln: cset x0, eq
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f3() -> r64 {
@@ -44,10 +35,7 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x0, #0
-; nextln: ldp fp, lr, [sp], #16
+; check: movz x0, #0
 ; nextln: ret
 
 function %f4(r64, r64) -> r64, r64, r64 {
diff --git a/cranelift/filetests/filetests/isa/aarch64/shift-op.clif b/cranelift/filetests/filetests/isa/aarch64/shift-op.clif
index adedf1b6d9fb..8fce954a6ff5 100644
--- a/cranelift/filetests/filetests/isa/aarch64/shift-op.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/shift-op.clif
@@ -10,10 +10,7 @@ block0(v0: i64):
   return v3
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: add x0, x0, x0, LSL 3
-; nextln: ldp fp, lr, [sp], #16
+; check: add x0, x0, x0, LSL 3
 ; nextln: ret
 
 function %f(i32) -> i32 {
@@ -23,8 +20,5 @@ block0(v0: i32):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: lsl w0, w0, #21
-; nextln: ldp fp, lr, [sp], #16
+; check: lsl w0, w0, #21
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
index 4346713ac7fc..f46277a0e979 100644
--- a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
@@ -12,9 +12,7 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x3, #128
+; check: movz x3, #128
 ; nextln: sub x5, x3, x2
 ; nextln: orn w4, wzr, w2
 ; nextln: lsl x6, x1, #1
@@ -36,7 +34,6 @@ block0(v0: i128, v1: i128):
 ; nextln: csel x0, xzr, x0, ne
 ; nextln: orr x0, x0, x4
 ; nextln: orr x1, x1, x3
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f0(i64, i64) -> i64 {
@@ -45,10 +42,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror x0, x0, x1
 ; nextln:  ret
 
 function %f1(i32, i32) -> i32 {
@@ -57,10 +51,7 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror w0, w0, w1
 ; nextln:  ret
 
 function %f2(i16, i16) -> i16 {
@@ -69,16 +60,13 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxth w0, w0
+; check:  uxth w0, w0
 ; nextln:  and w1, w1, #15
 ; nextln:  sub w2, w1, #16
 ; nextln:  sub w2, wzr, w2
 ; nextln:  lsr w1, w0, w1
 ; nextln:  lsl w0, w0, w2
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f3(i8, i8) -> i8 {
@@ -87,16 +75,13 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxtb w0, w0
+; check:  uxtb w0, w0
 ; nextln:  and w1, w1, #7
 ; nextln:  sub w2, w1, #8
 ; nextln:  sub w2, wzr, w2
 ; nextln:  lsr w1, w0, w1
 ; nextln:  lsl w0, w0, w2
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -109,9 +94,7 @@ block0(v0: i128, v1: i128):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: movz x3, #128
+; check: movz x3, #128
 ; nextln: sub x5, x3, x2
 ; nextln: orn w4, wzr, w2
 ; nextln: lsr x6, x0, #1
@@ -136,7 +119,6 @@ block0(v0: i128, v1: i128):
 ; nextln: mov x2, x0
 ; nextln: mov x0, x1
 ; nextln: mov x1, x2
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f4(i64, i64) -> i64 {
@@ -145,11 +127,8 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub x1, xzr, x1
+; check:  sub x1, xzr, x1
 ; nextln:  ror x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f5(i32, i32) -> i32 {
@@ -158,11 +137,8 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  sub w1, wzr, w1
+; check:  sub w1, wzr, w1
 ; nextln:  ror w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f6(i16, i16) -> i16 {
@@ -171,9 +147,7 @@ block0(v0: i16, v1: i16):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxth w0, w0
+; check:  uxth w0, w0
 ; nextln:  sub w1, wzr, w1
 ; nextln:  and w1, w1, #15
 ; nextln:  sub w2, w1, #16
@@ -181,7 +155,6 @@ block0(v0: i16, v1: i16):
 ; nextln:  lsr w1, w0, w1
 ; nextln:  lsl w0, w0, w2
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f7(i8, i8) -> i8 {
@@ -190,9 +163,7 @@ block0(v0: i8, v1: i8):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxtb w0, w0
+; check:  uxtb w0, w0
 ; nextln:  sub w1, wzr, w1
 ; nextln:  and w1, w1, #7
 ; nextln:  sub w2, w1, #8
@@ -200,7 +171,6 @@ block0(v0: i8, v1: i8):
 ; nextln:  lsr w1, w0, w1
 ; nextln:  lsl w0, w0, w2
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -213,10 +183,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsr x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsr x0, x0, x1
 ; nextln:  ret
 
 function %f9(i32, i32) -> i32 {
@@ -225,10 +192,7 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsr w0, w0, w1
 ; nextln:  ret
 
 function %f10(i16, i16) -> i16 {
@@ -240,6 +204,7 @@ block0(v0: i16, v1: i16):
 ; check:  uxth w0, w0
 ; nextln:  and w1, w1, #15
 ; nextln:  lsr w0, w0, w1
+; nextln:  ret
 
 function %f11(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -250,6 +215,7 @@ block0(v0: i8, v1: i8):
 ; check:  uxtb w0, w0
 ; nextln:  and w1, w1, #7
 ; nextln:  lsr w0, w0, w1
+; nextln:  ret
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; LSL, variable
@@ -261,10 +227,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsl x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsl x0, x0, x1
 ; nextln:  ret
 
 function %f13(i32, i32) -> i32 {
@@ -273,10 +236,7 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsl w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsl w0, w0, w1
 ; nextln:  ret
 
 function %f14(i16, i16) -> i16 {
@@ -287,6 +247,7 @@ block0(v0: i16, v1: i16):
 
 ; check:  and w1, w1, #15
 ; nextln:  lsl w0, w0, w1
+; nextln:  ret
 
 function %f15(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -296,6 +257,7 @@ block0(v0: i8, v1: i8):
 
 ; check:  and w1, w1, #7
 ; nextln:  lsl w0, w0, w1
+; nextln:  ret
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; ASR, variable
@@ -307,10 +269,7 @@ block0(v0: i64, v1: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  asr x0, x0, x1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  asr x0, x0, x1
 ; nextln:  ret
 
 function %f17(i32, i32) -> i32 {
@@ -319,10 +278,7 @@ block0(v0: i32, v1: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  asr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
+; check:  asr w0, w0, w1
 ; nextln:  ret
 
 function %f18(i16, i16) -> i16 {
@@ -333,6 +289,7 @@ block0(v0: i16, v1: i16):
 
 ; check:  and w1, w1, #15
 ; nextln:  asr w0, w0, w1
+; nextln:  ret
 
 function %f19(i8, i8) -> i8 {
 block0(v0: i8, v1: i8):
@@ -342,6 +299,7 @@ block0(v0: i8, v1: i8):
 
 ; check:  and w1, w1, #7
 ; nextln:  asr w0, w0, w1
+; nextln:  ret
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; immediate forms
@@ -354,10 +312,7 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror x0, x0, #17
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror x0, x0, #17
 ; nextln:  ret
 
 function %f21(i64) -> i64 {
@@ -367,10 +322,7 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror x0, x0, #47
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror x0, x0, #47
 ; nextln:  ret
 
 function %f22(i32) -> i32 {
@@ -380,10 +332,7 @@ block0(v0: i32):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ror w0, w0, #15
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ror w0, w0, #15
 ; nextln:  ret
 
 function %f23(i16) -> i16 {
@@ -393,13 +342,10 @@ block0(v0: i16):
   return v2
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
+; check: uxth w0, w0
 ; nextln: lsr w1, w0, #6
 ; nextln: lsl w0, w0, #10
 ; nextln: orr w0, w0, w1
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %f24(i8) -> i8 {
@@ -409,13 +355,10 @@ block0(v0: i8):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  uxtb w0, w0
+; check:  uxtb w0, w0
 ; nextln:  lsr w1, w0, #5
 ; nextln:  lsl w0, w0, #3
 ; nextln:  orr w0, w0, w1
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f25(i64) -> i64 {
@@ -425,10 +368,7 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsr x0, x0, #17
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsr x0, x0, #17
 ; nextln:  ret
 
 function %f26(i64) -> i64 {
@@ -438,10 +378,7 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  asr x0, x0, #17
-; nextln:  ldp fp, lr, [sp], #16
+; check:  asr x0, x0, #17
 ; nextln:  ret
 
 function %f27(i64) -> i64 {
@@ -451,8 +388,5 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  lsl x0, x0, #17
-; nextln:  ldp fp, lr, [sp], #16
+; check:  lsl x0, x0, #17
 ; nextln:  ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif b/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif
index ca9b3e2fae21..cea61b270455 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif
@@ -12,7 +12,6 @@ block0(v0: i8x16, v1: i8x16):
 
 ; check-not: sxtl
 ; check: smull v0.8h, v0.8b, v1.8b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn2(i8x16, i8x16) -> i16x8 {
@@ -25,7 +24,6 @@ block0(v0: i8x16, v1: i8x16):
 
 ; check-not: sxtl
 ; check: smull2 v0.8h, v0.16b, v1.16b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn3(i16x8, i16x8) -> i32x4 {
@@ -38,7 +36,6 @@ block0(v0: i16x8, v1: i16x8):
 
 ; check-not: sxtl
 ; check: smull v0.4s, v0.4h, v1.4h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn4(i16x8, i16x8) -> i32x4 {
@@ -51,7 +48,6 @@ block0(v0: i16x8, v1: i16x8):
 
 ; check-not: sxtl
 ; check: smull2 v0.4s, v0.8h, v1.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn5(i32x4, i32x4) -> i64x2 {
@@ -64,7 +60,6 @@ block0(v0: i32x4, v1: i32x4):
 
 ; check-not: sxtl
 ; check: smull v0.2d, v0.2s, v1.2s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn6(i32x4, i32x4) -> i64x2 {
@@ -77,7 +72,6 @@ block0(v0: i32x4, v1: i32x4):
 
 ; check-not: sxtl
 ; check: smull2 v0.2d, v0.4s, v1.4s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn7(i8x16, i8x16) -> i16x8 {
@@ -90,7 +84,6 @@ block0(v0: i8x16, v1: i8x16):
 
 ; check-not: uxtl
 ; check: umull v0.8h, v0.8b, v1.8b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn8(i8x16, i8x16) -> i16x8 {
@@ -103,7 +96,6 @@ block0(v0: i8x16, v1: i8x16):
 
 ; check-not: uxtl
 ; check: umull2 v0.8h, v0.16b, v1.16b
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn9(i16x8, i16x8) -> i32x4 {
@@ -116,7 +108,6 @@ block0(v0: i16x8, v1: i16x8):
 
 ; check-not: uxtl
 ; check: umull v0.4s, v0.4h, v1.4h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn10(i16x8, i16x8) -> i32x4 {
@@ -129,7 +120,6 @@ block0(v0: i16x8, v1: i16x8):
 
 ; check-not: uxtl
 ; check: umull2 v0.4s, v0.8h, v1.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn11(i32x4, i32x4) -> i64x2 {
@@ -142,7 +132,6 @@ block0(v0: i32x4, v1: i32x4):
 
 ; check-not: uxtl
 ; check: umull v0.2d, v0.2s, v1.2s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn12(i32x4, i32x4) -> i64x2 {
@@ -155,5 +144,4 @@ block0(v0: i32x4, v1: i32x4):
 
 ; check-not: uxtl2
 ; check: umull2 v0.2d, v0.4s, v1.4s
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
index 42190619c613..0b91d1214573 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif
@@ -11,10 +11,7 @@ block0(v0: i8x16):
   return v3
 }
 
-; check: stp fp
-; nextln: mov fp, sp
-; nextln: saddlp v0.8h, v0.16b
-; nextln: ldp fp, lr, [sp], #16
+; check: saddlp v0.8h, v0.16b
 ; nextln: ret
 
 function %fn2(i8x16) -> i16x8 {
@@ -25,10 +22,7 @@ block0(v0: i8x16):
   return v3
 }
 
-; check: stp fp
-; nextln: mov fp, sp
-; nextln: uaddlp v0.8h, v0.16b
-; nextln: ldp fp, lr, [sp], #16
+; check: uaddlp v0.8h, v0.16b
 ; nextln: ret
 
 function %fn3(i16x8) -> i32x4 {
@@ -39,10 +33,7 @@ block0(v0: i16x8):
   return v3
 }
 
-; check: stp fp
-; nextln: mov fp, sp
-; nextln: saddlp v0.4s, v0.8h
-; nextln: ldp fp, lr, [sp], #16
+; check: saddlp v0.4s, v0.8h
 ; nextln: ret
 
 function %fn4(i16x8) -> i32x4 {
@@ -53,10 +44,7 @@ block0(v0: i16x8):
   return v3
 }
 
-; check: stp fp
-; nextln: mov fp, sp
-; nextln: uaddlp v0.4s, v0.8h
-; nextln: ldp fp, lr, [sp], #16
+; check: uaddlp v0.4s, v0.8h
 ; nextln: ret
 
 function %fn5(i8x16, i8x16) -> i16x8 {
@@ -67,12 +55,9 @@ block0(v0: i8x16, v1: i8x16):
   return v4
 }
 
-; check: stp fp
-; nextln: mov fp, sp
-; nextln: sxtl v0.8h, v0.8b
+; check: sxtl v0.8h, v0.8b
 ; nextln: sxtl2 v1.8h, v1.16b
 ; nextln: addp v0.8h, v0.8h, v1.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn6(i8x16, i8x16) -> i16x8 {
@@ -83,12 +68,9 @@ block0(v0: i8x16, v1: i8x16):
   return v4
 }
 
-; check: stp fp
-; nextln: mov fp, sp
-; nextln: uxtl v0.8h, v0.8b
+; check: uxtl v0.8h, v0.8b
 ; nextln: uxtl2 v1.8h, v1.16b
 ; nextln: addp v0.8h, v0.8h, v1.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn7(i8x16) -> i16x8 {
@@ -99,12 +81,9 @@ block0(v0: i8x16):
   return v3
 }
 
-; check: stp fp
-; nextln: mov fp, sp
-; nextln: uxtl v1.8h, v0.8b
+; check: uxtl v1.8h, v0.8b
 ; nextln: sxtl2 v0.8h, v0.16b
 ; nextln: addp v0.8h, v1.8h, v0.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
 function %fn8(i8x16) -> i16x8 {
@@ -115,10 +94,7 @@ block0(v0: i8x16):
   return v3
 }
 
-; check: stp fp
-; nextln: mov fp, sp
-; nextln: sxtl v1.8h, v0.8b
+; check: sxtl v1.8h, v0.8b
 ; nextln: uxtl2 v0.8h, v0.16b
 ; nextln: addp v0.8h, v1.8h, v0.8h
-; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd.clif b/cranelift/filetests/filetests/isa/aarch64/simd.clif
index c43d9cb6f24e..b0e2c4dfbad9 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif
@@ -9,12 +9,9 @@ block0:
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #1
+; check:  movz x0, #1
 ; nextln:  movk x0, #1, LSL #48
 ; nextln:  dup v0.2d, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f2() -> i16x8 {
@@ -25,11 +22,8 @@ block0:
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #42679
+; check:  movz x0, #42679
 ; nextln:  dup v0.8h, w0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f3() -> b8x16 {
@@ -40,10 +34,7 @@ block0:
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movi v0.16b, #255
-; nextln:  ldp fp, lr, [sp], #16
+; check:  movi v0.16b, #255
 ; nextln:  ret
 
 function %f4(i32, i8x16, i8x16) -> i8x16 {
@@ -52,11 +43,8 @@ block0(v0: i32, v1: i8x16, v2: i8x16):
    return v3
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  subs wzr, w0, wzr
+; check:  subs wzr, w0, wzr
 ; nextln:  vcsel v0.16b, v0.16b, v1.16b, ne (if-then-else diamond)
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f5(i64) -> i8x16 {
@@ -66,10 +54,7 @@ block0(v0: i64):
   return v2
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ld1r { v0.16b }, [x0]
-; nextln:  ldp fp, lr, [sp], #16
+; check:  ld1r { v0.16b }, [x0]
 ; nextln:  ret
 
 function %f6(i64, i64) -> i8x16, i8x16 {
@@ -81,11 +66,8 @@ block0(v0: i64, v1: i64):
   return v4, v5
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ld1r { v0.16b }, [x0]
+; check:  ld1r { v0.16b }, [x0]
 ; nextln:  ld1r { v1.16b }, [x1]
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f7(i64, i64) -> i8x16, i8x16 {
@@ -97,12 +79,9 @@ block0(v0: i64, v1: i64):
   return v4, v5
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ldrb w0, [x0]
+; check:  ldrb w0, [x0]
 ; nextln:  ld1r { v0.16b }, [x1]
 ; nextln:  dup v1.16b, w0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f8(i64, i64) -> i8x16, i8x16 {
@@ -113,12 +92,9 @@ block0(v0: i64, v1: i64):
   return v3, v4
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  ldrb w0, [x0]
+; check:  ldrb w0, [x0]
 ; nextln:  dup v0.16b, w0
 ; nextln:  dup v1.16b, w0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f9() -> i32x2 {
@@ -128,11 +104,8 @@ block0:
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movi v0.2d, #18374687579166474495
+; check:  movi v0.2d, #18374687579166474495
 ; nextln:  fmov d0, d0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f10() -> i32x4 {
@@ -142,10 +115,7 @@ block0:
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  mvni v0.4s, #15, MSL #16
-; nextln:  ldp fp, lr, [sp], #16
+; check:  mvni v0.4s, #15, MSL #16
 ; nextln:  ret
 
 function %f11() -> f32x4 {
@@ -155,8 +125,5 @@ block0:
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  fmov v0.4s, #1.3125
-; nextln:  ldp fp, lr, [sp], #16
+; check:  fmov v0.4s, #1.3125
 ; nextln:  ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif
index b6b106c69894..e76521d26349 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif
@@ -9,12 +9,9 @@ block0:
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #1
+; check:  movz x0, #1
 ; nextln:  movk x0, #1, LSL #48
 ; nextln:  fmov d0, x0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
 
 function %f2() -> i32x4 {
@@ -24,9 +21,6 @@ block0:
   return v1
 }
 
-; check:  stp fp, lr, [sp, #-16]!
-; nextln:  mov fp, sp
-; nextln:  movz x0, #42679
+; check:  movz x0, #42679
 ; nextln:  fmov s0, w0
-; nextln:  ldp fp, lr, [sp], #16
 ; nextln:  ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
index ccaa8ea47c9f..0b767127ea42 100644
--- a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
@@ -12,10 +12,7 @@ block0(v0: i64):
     return
 }
 
-; check:      stp fp, lr, [sp, #-16]!
-; nextln:     mov fp, sp
-; nextln:     ldp fp, lr, [sp], #16
-; nextln:     ret
+; check:      ret
 
 function %stack_limit_gv_leaf_zero(i64 vmctx) {
     gv0 = vmctx
@@ -26,10 +23,7 @@ block0(v0: i64):
     return
 }
 
-; check:      stp fp, lr, [sp, #-16]!
-; nextln:     mov fp, sp
-; nextln:     ldp fp, lr, [sp], #16
-; nextln:     ret
+; check:      ret
 
 
 function %stack_limit_call_zero(i64 stack_limit) {
diff --git a/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif b/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif
index 24306da6b337..9a40f4827c95 100644
--- a/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif
@@ -10,8 +10,5 @@ block0:
   return v0
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: ldr x0, 8 ; b 12 ; data
-; nextln: ldp fp, lr, [sp], #16
+; check: ldr x0, 8 ; b 12 ; data
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
index 6a72e108f0c9..3a85a79ac0d3 100644
--- a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
@@ -8,10 +8,7 @@ block0(v0: i8):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxtb w0, w0
 ; nextln: ret
 
 function %f_u_8_32(i8) -> i32 {
@@ -20,10 +17,7 @@ block0(v0: i8):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxtb w0, w0
 ; nextln: ret
 
 function %f_u_8_16(i8) -> i16 {
@@ -32,10 +26,7 @@ block0(v0: i8):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxtb w0, w0
 ; nextln: ret
 
 function %f_s_8_64(i8) -> i64 {
@@ -44,10 +35,7 @@ block0(v0: i8):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtb x0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxtb x0, w0
 ; nextln: ret
 
 function %f_s_8_32(i8) -> i32 {
@@ -56,10 +44,7 @@ block0(v0: i8):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxtb w0, w0
 ; nextln: ret
 
 function %f_s_8_16(i8) -> i16 {
@@ -68,10 +53,7 @@ block0(v0: i8):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtb w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxtb w0, w0
 ; nextln: ret
 
 function %f_u_16_64(i16) -> i64 {
@@ -80,10 +62,7 @@ block0(v0: i16):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxth w0, w0
 ; nextln: ret
 
 function %f_u_16_32(i16) -> i32 {
@@ -92,10 +71,7 @@ block0(v0: i16):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: uxth w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: uxth w0, w0
 ; nextln: ret
 
 function %f_s_16_64(i16) -> i64 {
@@ -104,10 +80,7 @@ block0(v0: i16):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxth x0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxth x0, w0
 ; nextln: ret
 
 function %f_s_16_32(i16) -> i32 {
@@ -116,10 +89,7 @@ block0(v0: i16):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxth w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxth w0, w0
 ; nextln: ret
 
 function %f_u_32_64(i32) -> i64 {
@@ -128,10 +98,7 @@ block0(v0: i32):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: mov w0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: mov w0, w0
 ; nextln: ret
 
 function %f_s_32_64(i32) -> i64 {
@@ -140,8 +107,5 @@ block0(v0: i32):
   return v1
 }
 
-; check: stp fp, lr, [sp, #-16]!
-; nextln: mov fp, sp
-; nextln: sxtw x0, w0
-; nextln: ldp fp, lr, [sp], #16
+; check: sxtw x0, w0
 ; nextln: ret

From 73583904efb4529fabdd23cb731224def45b7544 Mon Sep 17 00:00:00 2001
From: Anton Kirilov <anton.kirilov@arm.com>
Date: Fri, 20 Aug 2021 20:05:26 +0100
Subject: [PATCH 2/3] Cranelift x64: Simplify leaf functions that do not use
 the stack

Copyright (c) 2021, Arm Limited.
---
 cranelift/codegen/src/isa/x64/abi.rs          |  13 +-
 .../src/isa/x64/inst/unwind/systemv.rs        |   2 +-
 cranelift/filetests/filetests/isa/x64/b1.clif |  22 +-
 .../filetests/filetests/isa/x64/basic.clif    |   4 -
 .../filetests/filetests/isa/x64/bextend.clif  |   6 +-
 .../filetests/isa/x64/clz-lzcnt.clif          |   8 -
 .../filetests/filetests/isa/x64/ctz-bmi1.clif |   8 -
 .../filetests/filetests/isa/x64/i128.clif     | 201 +++---------------
 .../filetests/isa/x64/move-elision.clif       |   6 +-
 .../filetests/isa/x64/popcnt-use-popcnt.clif  |   8 -
 .../filetests/filetests/isa/x64/popcnt.clif   |   4 -
 .../filetests/isa/x64/select-i128.clif        |   8 +-
 .../isa/x64/simd-lane-access-compile.clif     |   3 -
 .../filetests/isa/x64/struct-ret.clif         |   6 +-
 .../filetests/filetests/isa/x64/tls_elf.clif  |   6 +-
 .../filetests/isa/x64/uextend-elision.clif    |   4 -
 .../isa/x64/unused_jt_unreachable_block.clif  |   6 +-
 17 files changed, 53 insertions(+), 262 deletions(-)

diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs
index f0c36dd6d784..c9e186cb3de4 100644
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -830,12 +830,15 @@ impl ABIMachineSpec for X64ABIMachineSpec {
     }
 
     fn is_frame_setup_needed(
-        _is_leaf: bool,
-        _stack_args_size: u32,
-        _num_clobbered_callee_saves: usize,
-        _fixed_frame_storage_size: u32,
+        is_leaf: bool,
+        stack_args_size: u32,
+        num_clobbered_callee_saves: usize,
+        fixed_frame_storage_size: u32,
     ) -> bool {
-        true
+        !is_leaf
+            || stack_args_size > 0
+            || num_clobbered_callee_saves > 0
+            || fixed_frame_storage_size > 0
     }
 }
 
diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
index 9115db06714a..73d217ad0bc6 100644
--- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
@@ -172,7 +172,7 @@ mod tests {
             _ => panic!("expected unwind information"),
         };
 
-        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 22, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 10, lsda: None, instructions: [] }");
     }
 
     fn create_multi_return_function(call_conv: CallConv) -> Function {
diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif
index cbd265a9eacd..354d468b9b0e 100644
--- a/cranelift/filetests/filetests/isa/x64/b1.clif
+++ b/cranelift/filetests/filetests/isa/x64/b1.clif
@@ -2,71 +2,55 @@ test compile
 target x86_64 machinst
 
 function %f0(b1, i32, i32) -> i32 {
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
 
 block0(v0: b1, v1: i32, v2: i32):
     v3 = select.i32 v0, v1, v2
-; nextln: testb   $$1, %dil
+; check: testb   $$1, %dil
 ; nextln: cmovnzl %esi, %edx
 
     return v3
 ; nextln: movq    %rdx, %rax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 }
 
 function %f1(b1) -> i32 {
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
 
 block0(v0: b1):
     brnz v0, block1
     jump block2
-; nextln: testb   $$1, %dil
+; check: testb   $$1, %dil
 ; nextln: jnz     label1; j label2
 
 block1:
     v1 = iconst.i32 1
     return v1
 ; check:  movl    $$1, %eax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 block2:
     v2 = iconst.i32 2
     return v2
 ; check:  movl    $$2, %eax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 }
 
 function %f2(b1) -> i32 {
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
 
 block0(v0: b1):
     brz v0, block1
     jump block2
-; nextln: testb   $$1, %dil
+; check: testb   $$1, %dil
 ; nextln: jz      label1; j label2
 
 block1:
     v1 = iconst.i32 1
     return v1
 ; check:  movl    $$1, %eax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 block2:
     v2 = iconst.i32 2
     return v2
 ; check:  movl    $$2, %eax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 }
diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif
index 8b43d70c7cb7..954b37c1bd79 100644
--- a/cranelift/filetests/filetests/isa/x64/basic.clif
+++ b/cranelift/filetests/filetests/isa/x64/basic.clif
@@ -3,13 +3,9 @@ target x86_64 machinst
 
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
-    ; check: pushq   %rbp
-    ; check: movq    %rsp, %rbp
     v2 = iadd v0, v1
     ; check: addl    %esi, %edi
     return v2
     ; check: movq    %rdi, %rax
-    ; check: movq    %rbp, %rsp
-    ; check: popq    %rbp
     ; check: ret
 }
diff --git a/cranelift/filetests/filetests/isa/x64/bextend.clif b/cranelift/filetests/filetests/isa/x64/bextend.clif
index 6b53f3c3bdcc..a828775b9061 100644
--- a/cranelift/filetests/filetests/isa/x64/bextend.clif
+++ b/cranelift/filetests/filetests/isa/x64/bextend.clif
@@ -7,10 +7,6 @@ block0(v0: b8):
   return v1
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movsbq  %dil, %rsi
+; check: movsbq  %dil, %rsi
 ; nextln: movq    %rsi, %rax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
index f36caed88ae6..c0ef764df8d0 100644
--- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
@@ -7,12 +7,8 @@ block0(v0: i64):
     return v1
 }
 
-; check: pushq   %rbp
-; check: movq    %rsp, %rbp
 ; check: lzcntq  %rdi, %rsi
 ; check: movq    %rsi, %rax
-; check: movq    %rbp, %rsp
-; check: popq    %rbp
 ; check: ret
 
 function %clz(i32) -> i32 {
@@ -21,10 +17,6 @@ block0(v0: i32):
     return v1
 }
 
-; check: pushq   %rbp
-; check: movq    %rsp, %rbp
 ; check: lzcntl  %edi, %esi
 ; check: movq    %rsi, %rax
-; check: movq    %rbp, %rsp
-; check: popq    %rbp
 ; check: ret
diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
index 5931451e11f1..811dbe3fb040 100644
--- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
+++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
@@ -7,12 +7,8 @@ block0(v0: i64):
     return v1
 }
 
-; check: pushq   %rbp
-; check: movq    %rsp, %rbp
 ; check: tzcntq  %rdi, %rsi
 ; check: movq    %rsi, %rax
-; check: movq    %rbp, %rsp
-; check: popq    %rbp
 ; check: ret
 
 function %ctz(i32) -> i32 {
@@ -21,10 +17,6 @@ block0(v0: i32):
     return v1
 }
 
-; check: pushq   %rbp
-; check: movq    %rsp, %rbp
 ; check: tzcntl  %edi, %esi
 ; check: movq    %rsi, %rax
-; check: movq    %rbp, %rsp
-; check: popq    %rbp
 ; check: ret
diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif
index 61783e366d23..9d7fb524d9ae 100644
--- a/cranelift/filetests/filetests/isa/x64/i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/i128.clif
@@ -3,122 +3,96 @@ set enable_llvm_abi_extensions=true
 target x86_64 machinst
 
 function %f0(i128, i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = iadd v0, v1
-; nextln:  addq    %rdx, %rdi
+; check:  addq    %rdx, %rdi
 ; nextln:  adcq    %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f1(i128, i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = isub v0, v1
-; nextln:  subq    %rdx, %rdi
+; check:  subq    %rdx, %rdi
 ; nextln:  sbbq    %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f2(i128, i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = band v0, v1
-; nextln:  andq    %rdx, %rdi
+; check:  andq    %rdx, %rdi
 ; nextln:  andq    %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f3(i128, i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = bor v0, v1
-; nextln:  orq     %rdx, %rdi
+; check:  orq     %rdx, %rdi
 ; nextln:  orq     %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f4(i128, i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = bxor v0, v1
-; nextln:  xorq    %rdx, %rdi
+; check:  xorq    %rdx, %rdi
 ; nextln:  xorq    %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f5(i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
 
     v1 = bnot v0
-; nextln:  notq    %rdi
+; check:  notq    %rdi
 ; nextln:  notq    %rsi
 
     return v1
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f6(i128, i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 ; v0 in rdi:rsi, v1 in rdx:rcx
 
     v2 = imul v0, v1
-; nextln:  movq    %rsi, %rax
+; check:  movq    %rsi, %rax
 ; nextln:  movq    %rcx, %r8
 ; nextln:  movq    %rdi, %rsi
 ; nextln:  imulq   %rdx, %rsi
@@ -133,44 +107,32 @@ block0(v0: i128, v1: i128):
 ; nextln:  movq    %rcx, %rdx
 
     return v2
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f7(i64, i64) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i64, v1: i64):
     v2 = iconcat.i64 v0, v1
-; nextln:  movq    %rdi, %rax
+; check:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
 
     return v2
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f8(i128) -> i64, i64 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1, v2 = isplit.i128 v0
-; nextln:  movq    %rdi, %rax
+; check:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
 
     return v1, v2
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f9(i128, i128) -> b1 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
     v2 = icmp eq v0, v1
@@ -190,7 +152,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %rax, %r8
 ; nextln: andq    $$1, %r8
 ; nextln: setnz   %r8b
- 
+
     v4 = icmp slt v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setl    %r9b
@@ -201,7 +163,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %r9, %r10
 ; nextln: andq    $$1, %r10
 ; nextln: setnz   %r9b
- 
+
     v5 = icmp sle v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setl    %r10b
@@ -212,7 +174,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %r10, %r11
 ; nextln: andq    $$1, %r11
 ; nextln: setnz   %r10b
- 
+
     v6 = icmp sgt v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setnle  %r11b
@@ -290,14 +252,10 @@ block0(v0: i128, v1: i128):
     v20 = band v19, v16
 
     return v20
-; check:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
-; nextln:  ret
+; check:  ret
 }
 
 function %f10(i128) -> i32 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     brz v0, block1
@@ -307,7 +265,7 @@ block0(v0: i128):
 ; nextln: setz    %sil
 ; nextln: andb    %dil, %sil
 ; nextln: jnz     label1; j label2
- 
+
     jump block2
 
 block1:
@@ -318,14 +276,10 @@ block2:
     v2 = iconst.i32 2
     return v2
 
-; check:   movq    %rbp, %rsp
-; nextln:  popq    %rbp
-; nextln:  ret
+; check:  ret
 }
 
 function %f11(i128) -> i32 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     brnz v0, block1
@@ -345,138 +299,106 @@ block2:
     v2 = iconst.i32 2
     return v2
 
-; check:   movq    %rbp, %rsp
-; nextln:  popq    %rbp
-; nextln:  ret
+; check:  ret
 }
 
 function %f12(i64) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i64):
     v1 = uextend.i128 v0
     return v1
 
-; nextln:  movq    %rdi, %rsi
+; check:  movq    %rdi, %rsi
 ; nextln:  xorq    %rdi, %rdi
 ; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rdi, %rdx
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f13(i64) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i64):
     v1 = sextend.i128 v0
     return v1
 
-; nextln:  movq    %rdi, %rsi
+; check:  movq    %rdi, %rsi
 ; nextln:  movq    %rsi, %rdi
 ; nextln:  sarq    $$63, %rdi
 ; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rdi, %rdx
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f14(i8) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i8):
     v1 = sextend.i128 v0
     return v1
 
-; nextln:  movsbq  %dil, %rsi
+; check:  movsbq  %dil, %rsi
 ; nextln:  movq    %rsi, %rdi
 ; nextln:  sarq    $$63, %rdi
 ; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rdi, %rdx
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f15(i8) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i8):
     v1 = uextend.i128 v0
     return v1
 
-; nextln:  movzbq  %dil, %rsi
+; check:  movzbq  %dil, %rsi
 ; nextln:  xorq    %rdi, %rdi
 ; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rdi, %rdx
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 
 }
 
 function %f16(i128) -> i64 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = ireduce.i64 v0
     return v1
 
-; nextln:  movq    %rdi, %rax
+; check:  movq    %rdi, %rax
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f17(i128) -> i8 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = ireduce.i8 v0
     return v1
 
-; nextln:  movq    %rdi, %rax
+; check:  movq    %rdi, %rax
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f18(b1) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: b1):
     v1 = bint.i128 v0
     return v1
 
-; nextln: movq    %rdi, %rsi
+; check: movq    %rdi, %rsi
 ; nextln: andq    $$1, %rsi
 ; nextln: xorq    %rdi, %rdi
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f19(i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = popcnt.i128 v0
@@ -528,16 +450,11 @@ block0(v0: i128):
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
 
-
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 
 function %f20(i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = bitrev.i128 v0
@@ -640,16 +557,12 @@ block0(v0: i128):
 ; nextln: movq    %rcx, %rax
 ; nextln: movq    %rdi, %rdx
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 ; Shifts are covered by run-tests in shift-i128-run.clif.
 
 function %f21(i128, i64) {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i64):
     store.i128 v0, v1
@@ -658,14 +571,10 @@ block0(v0: i128, v1: i64):
 ; check:  movq    %rdi, 0(%rdx)
 ; nextln: movq    %rsi, 8(%rdx)
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f22(i64) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i64):
     v1 = load.i128 v0
@@ -676,8 +585,6 @@ block0(v0: i64):
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
 
-; nextln:  movq    %rbp, %rsp
-; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
@@ -697,9 +604,7 @@ block2(v6: i128):
     v8 = iadd.i128 v6, v7
     return v8
 
-; check: pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: testb   $$1, %dl
+; check: testb   $$1, %dl
 ; nextln: jnz     label1; j label2
 ; check: Block 1:
 ; check:  movl    $$0, %esi
@@ -710,8 +615,6 @@ block2(v6: i128):
 ; nextln: adcq    %rcx, %rdi
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 ; check: Block 2:
 ; check:  movl    $$0, %esi
@@ -722,10 +625,8 @@ block2(v6: i128):
 ; nextln: adcq    %rcx, %rdi
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
- 
+
 }
 
 function %f24(i128, i128, i64, i128, i128, i128) -> i128 {
@@ -772,8 +673,6 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
 }
 
 function %f25(i128) -> i128, i128, i128, i64, i128, i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = ireduce.i64 v0
@@ -828,9 +727,7 @@ block0(v0: i128):
     v1 = clz.i128 v0
     return v1
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movabsq $$-1, %rcx
+; check: movabsq $$-1, %rcx
 ; nextln: bsrq    %rsi, %rax
 ; nextln: cmovzq  %rcx, %rax
 ; nextln: movl    $$63, %esi
@@ -846,8 +743,6 @@ block0(v0: i128):
 ; nextln: xorq    %rsi, %rsi
 ; nextln: movq    %rdi, %rax
 ; nextln: movq    %rsi, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 }
@@ -858,9 +753,7 @@ block0(v0: i128):
     return v1
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rsi, %rax
+; check: movq    %rsi, %rax
 ; nextln: movl    $$64, %ecx
 ; nextln: bsfq    %rdi, %rsi
 ; nextln: cmovzq  %rcx, %rsi
@@ -873,8 +766,6 @@ block0(v0: i128):
 ; nextln: xorq    %rdi, %rdi
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 function %f29(i8, i128) -> i8 {
@@ -883,13 +774,9 @@ block0(v0: i8, v1: i128):
     return v2
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rsi, %rcx
+; check: movq    %rsi, %rcx
 ; nextln: shll    %cl, %edi
 ; nextln: movq    %rdi, %rax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 function %f30(i128, i128) -> i128 {
@@ -898,9 +785,7 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rsi, %rax
+; check: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rsi
 ; nextln: movq    %rdx, %rcx
 ; nextln: shlq    %cl, %rsi
@@ -920,8 +805,6 @@ block0(v0: i128, v1: i128):
 ; nextln: cmovnzq %rsi, %rax
 ; nextln: movq    %rax, %rdx
 ; nextln: movq    %rcx, %rax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 function %f31(i128, i128) -> i128 {
@@ -930,9 +813,7 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rdi, %rax
+; check: movq    %rdi, %rax
 ; nextln: movq    %rsi, %rdi
 ; nextln: movq    %rdi, %rsi
 ; nextln: movq    %rdx, %rcx
@@ -954,8 +835,6 @@ block0(v0: i128, v1: i128):
 ; nextln: cmovnzq %rsi, %rcx
 ; nextln: movq    %rax, %rdx
 ; nextln: movq    %rcx, %rax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 function %f32(i128, i128) -> i128 {
@@ -964,9 +843,7 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rdi, %r8
+; check: movq    %rdi, %r8
 ; nextln: movq    %rsi, %rdi
 ; nextln: movq    %rdi, %rsi
 ; nextln: movq    %rdx, %rcx
@@ -989,8 +866,6 @@ block0(v0: i128, v1: i128):
 ; nextln: cmovnzq %rsi, %rcx
 ; nextln: movq    %rcx, %rax
 ; nextln: movq    %rdi, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 function %f33(i128, i128) -> i128 {
@@ -999,9 +874,7 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rdi, %r8
+; check: movq    %rdi, %r8
 ; nextln: movq    %r8, %r9
 ; nextln: movq    %rdx, %rcx
 ; nextln: shlq    %cl, %r9
@@ -1046,8 +919,6 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %rax, %rcx
 ; nextln: movq    %r8, %rax
 ; nextln: movq    %rcx, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 
 
@@ -1057,9 +928,7 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rsi, %r9
+; check: movq    %rsi, %r9
 ; nextln: movq    %rdx, %rcx
 ; nextln: shrq    %cl, %r9
 ; nextln: movq    %rdi, %rax
@@ -1104,6 +973,4 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %rax, %rcx
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rcx, %rdx
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
-; nextln: ret
\ No newline at end of file
+; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif
index 5b23afb8d3d5..fbd62673e14c 100644
--- a/cranelift/filetests/filetests/isa/x64/move-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif
@@ -12,9 +12,5 @@ block0(v0: i32x4):
     v3 = raw_bitcast.b8x16 v2
     return v3
 }
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
-; nextln: ret
+; check: ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
index 2049f539622d..09a8d493a7c6 100644
--- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
@@ -7,12 +7,8 @@ block0(v0: i64):
     return v1
 }
 
-; check: pushq   %rbp
-; check: movq    %rsp, %rbp
 ; check: popcntq %rdi, %rsi
 ; check: movq    %rsi, %rax
-; check: movq    %rbp, %rsp
-; check: popq    %rbp
 ; check: ret
 
 function %popcnt(i32) -> i32 {
@@ -21,10 +17,6 @@ block0(v0: i32):
     return v1
 }
 
-; check: pushq   %rbp
-; check: movq    %rsp, %rbp
 ; check: popcntl %edi, %esi
 ; check: movq    %rsi, %rax
-; check: movq    %rbp, %rsp
-; check: popq    %rbp
 ; check: ret
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif
index df68f6b4b701..a35da8393ed6 100644
--- a/cranelift/filetests/filetests/isa/x64/popcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif
@@ -76,8 +76,6 @@ block0(v0: i32):
 ; nextln: imull   $$16843009, %esi
 ; nextln: shrl    $$24, %esi
 ; nextln: movq    %rsi, %rax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 }
 
@@ -104,7 +102,5 @@ block0(v0: i64):
 ; nextln: imull   $$16843009, %esi
 ; nextln: shrl    $$24, %esi
 ; nextln: movq    %rsi, %rax
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
 }
diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif
index af6996f85ffa..9cd0b3beca39 100644
--- a/cranelift/filetests/filetests/isa/x64/select-i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif
@@ -3,14 +3,12 @@ set enable_llvm_abi_extensions=true
 target x86_64 machinst
 
 function %f0(i32, i128, i128) -> i128 {
-; check:   pushq   %rbp
-; nextln:  movq    %rsp, %rbp
 
 block0(v0: i32, v1: i128, v2: i128):
 
     v3 = iconst.i32 42
     v4 = icmp.i32 eq v0, v3
-; nextln: movl    $$42, %eax
+; check: movl    $$42, %eax
 ; nextln: cmpl    %eax, %edi
 
     v5 = select.i128 v4, v1, v2
@@ -21,9 +19,7 @@ block0(v0: i32, v1: i128, v2: i128):
 ; nextln: movq    %rcx, %rax
 ; nextln: movq    %r8, %rdx
 
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
- 
+
 }
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
index 2f6a8c7dfda1..112698ebc729 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
@@ -115,9 +115,6 @@ block0(v0: i32):
 function %load32_zero_float(f32) -> f32x4 {
 block0(v0: f32):
     v1 = scalar_to_vector.f32x4 v0
-    ; regex: MOV=movap*
-    ; check: pushq
-    ; not: $MOV
     ; check: ret
     return v1
 }
diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
index ee59ff496364..3a213ffaaf60 100644
--- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif
+++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
@@ -8,11 +8,7 @@ block0(v0: i64):
     return
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rdi, %rax
+; check: movq    %rdi, %rax
 ; nextln: movl    $$42, %esi
 ; nextln: movq    %rsi, 0(%rdi)
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif
index 37a4698619ed..02cae8143b33 100644
--- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif
+++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif
@@ -10,9 +10,5 @@ block0(v0: i32):
     return v1
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: elf_tls_get_addr User { namespace: 1, index: 0 }
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
+; check: elf_tls_get_addr User { namespace: 1, index: 0 }
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
index ef43c3dd03e6..6412c663e7cb 100644
--- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
@@ -3,14 +3,10 @@ target x86_64 machinst
 
 function %elide_uextend_add(i32, i32) -> i64 {
 block0(v0: i32, v1: i32):
-    ; check: pushq   %rbp
-    ; check: movq    %rsp, %rbp
     v2 = iadd v0, v1
     ; check: addl    %esi, %edi
     v3 = uextend.i64 v2
     ; check: movq    %rdi, %rax
-    ; check: movq    %rbp, %rsp
-    ; check: popq    %rbp
     ; check: ret
     return v3
 }
diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
index 5ddd4b20d3aa..26ee0b6bd54b 100644
--- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
+++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
@@ -13,8 +13,4 @@ block1:
     trap unreachable
 }
 
-; check:  pushq   %rbp
-; nextln: movq    %rsp, %rbp
-; nextln: movq    %rbp, %rsp
-; nextln: popq    %rbp
-; nextln: ret
+; check: ret

From 5a39e97db11adeaf80172a47a096aafff9d6b031 Mon Sep 17 00:00:00 2001
From: Anton Kirilov <anton.kirilov@arm.com>
Date: Thu, 26 Aug 2021 17:41:57 +0100
Subject: [PATCH 3/3] Revert "Cranelift x64: Simplify leaf functions that do
 not use the stack"

This reverts commit a531d78c2842fb41405e5e860e9df23c9a199877.

Copyright (c) 2021, Arm Limited.
---
 cranelift/codegen/src/isa/x64/abi.rs          |  13 +-
 .../src/isa/x64/inst/unwind/systemv.rs        |   2 +-
 cranelift/filetests/filetests/isa/x64/b1.clif |  22 +-
 .../filetests/filetests/isa/x64/basic.clif    |   4 +
 .../filetests/filetests/isa/x64/bextend.clif  |   6 +-
 .../filetests/isa/x64/clz-lzcnt.clif          |   8 +
 .../filetests/filetests/isa/x64/ctz-bmi1.clif |   8 +
 .../filetests/filetests/isa/x64/i128.clif     | 201 +++++++++++++++---
 .../filetests/isa/x64/move-elision.clif       |   6 +-
 .../filetests/isa/x64/popcnt-use-popcnt.clif  |   8 +
 .../filetests/filetests/isa/x64/popcnt.clif   |   4 +
 .../filetests/isa/x64/select-i128.clif        |   8 +-
 .../isa/x64/simd-lane-access-compile.clif     |   3 +
 .../filetests/isa/x64/struct-ret.clif         |   6 +-
 .../filetests/filetests/isa/x64/tls_elf.clif  |   6 +-
 .../filetests/isa/x64/uextend-elision.clif    |   4 +
 .../isa/x64/unused_jt_unreachable_block.clif  |   6 +-
 17 files changed, 262 insertions(+), 53 deletions(-)

diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs
index c9e186cb3de4..f0c36dd6d784 100644
--- a/cranelift/codegen/src/isa/x64/abi.rs
+++ b/cranelift/codegen/src/isa/x64/abi.rs
@@ -830,15 +830,12 @@ impl ABIMachineSpec for X64ABIMachineSpec {
     }
 
     fn is_frame_setup_needed(
-        is_leaf: bool,
-        stack_args_size: u32,
-        num_clobbered_callee_saves: usize,
-        fixed_frame_storage_size: u32,
+        _is_leaf: bool,
+        _stack_args_size: u32,
+        _num_clobbered_callee_saves: usize,
+        _fixed_frame_storage_size: u32,
     ) -> bool {
-        !is_leaf
-            || stack_args_size > 0
-            || num_clobbered_callee_saves > 0
-            || fixed_frame_storage_size > 0
+        true
     }
 }
 
diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
index 73d217ad0bc6..9115db06714a 100644
--- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
@@ -172,7 +172,7 @@ mod tests {
             _ => panic!("expected unwind information"),
         };
 
-        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 10, lsda: None, instructions: [] }");
+        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 22, lsda: None, instructions: [(1, CfaOffset(16)), (1, Offset(Register(6), -16)), (4, CfaRegister(Register(6)))] }");
     }
 
     fn create_multi_return_function(call_conv: CallConv) -> Function {
diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif
index 354d468b9b0e..cbd265a9eacd 100644
--- a/cranelift/filetests/filetests/isa/x64/b1.clif
+++ b/cranelift/filetests/filetests/isa/x64/b1.clif
@@ -2,55 +2,71 @@ test compile
 target x86_64 machinst
 
 function %f0(b1, i32, i32) -> i32 {
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
 
 block0(v0: b1, v1: i32, v2: i32):
     v3 = select.i32 v0, v1, v2
-; check: testb   $$1, %dil
+; nextln: testb   $$1, %dil
 ; nextln: cmovnzl %esi, %edx
 
     return v3
 ; nextln: movq    %rdx, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 }
 
 function %f1(b1) -> i32 {
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
 
 block0(v0: b1):
     brnz v0, block1
     jump block2
-; check: testb   $$1, %dil
+; nextln: testb   $$1, %dil
 ; nextln: jnz     label1; j label2
 
 block1:
     v1 = iconst.i32 1
     return v1
 ; check:  movl    $$1, %eax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 block2:
     v2 = iconst.i32 2
     return v2
 ; check:  movl    $$2, %eax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 }
 
 function %f2(b1) -> i32 {
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
 
 block0(v0: b1):
     brz v0, block1
     jump block2
-; check: testb   $$1, %dil
+; nextln: testb   $$1, %dil
 ; nextln: jz      label1; j label2
 
 block1:
     v1 = iconst.i32 1
     return v1
 ; check:  movl    $$1, %eax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 block2:
     v2 = iconst.i32 2
     return v2
 ; check:  movl    $$2, %eax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 }
diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif
index 954b37c1bd79..8b43d70c7cb7 100644
--- a/cranelift/filetests/filetests/isa/x64/basic.clif
+++ b/cranelift/filetests/filetests/isa/x64/basic.clif
@@ -3,9 +3,13 @@ target x86_64 machinst
 
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
+    ; check: pushq   %rbp
+    ; check: movq    %rsp, %rbp
     v2 = iadd v0, v1
     ; check: addl    %esi, %edi
     return v2
     ; check: movq    %rdi, %rax
+    ; check: movq    %rbp, %rsp
+    ; check: popq    %rbp
     ; check: ret
 }
diff --git a/cranelift/filetests/filetests/isa/x64/bextend.clif b/cranelift/filetests/filetests/isa/x64/bextend.clif
index a828775b9061..6b53f3c3bdcc 100644
--- a/cranelift/filetests/filetests/isa/x64/bextend.clif
+++ b/cranelift/filetests/filetests/isa/x64/bextend.clif
@@ -7,6 +7,10 @@ block0(v0: b8):
   return v1
 }
 
-; check: movsbq  %dil, %rsi
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movsbq  %dil, %rsi
 ; nextln: movq    %rsi, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
index c0ef764df8d0..f36caed88ae6 100644
--- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
@@ -7,8 +7,12 @@ block0(v0: i64):
     return v1
 }
 
+; check: pushq   %rbp
+; check: movq    %rsp, %rbp
 ; check: lzcntq  %rdi, %rsi
 ; check: movq    %rsi, %rax
+; check: movq    %rbp, %rsp
+; check: popq    %rbp
 ; check: ret
 
 function %clz(i32) -> i32 {
@@ -17,6 +21,10 @@ block0(v0: i32):
     return v1
 }
 
+; check: pushq   %rbp
+; check: movq    %rsp, %rbp
 ; check: lzcntl  %edi, %esi
 ; check: movq    %rsi, %rax
+; check: movq    %rbp, %rsp
+; check: popq    %rbp
 ; check: ret
diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
index 811dbe3fb040..5931451e11f1 100644
--- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
+++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
@@ -7,8 +7,12 @@ block0(v0: i64):
     return v1
 }
 
+; check: pushq   %rbp
+; check: movq    %rsp, %rbp
 ; check: tzcntq  %rdi, %rsi
 ; check: movq    %rsi, %rax
+; check: movq    %rbp, %rsp
+; check: popq    %rbp
 ; check: ret
 
 function %ctz(i32) -> i32 {
@@ -17,6 +21,10 @@ block0(v0: i32):
     return v1
 }
 
+; check: pushq   %rbp
+; check: movq    %rsp, %rbp
 ; check: tzcntl  %edi, %esi
 ; check: movq    %rsi, %rax
+; check: movq    %rbp, %rsp
+; check: popq    %rbp
 ; check: ret
diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif
index 9d7fb524d9ae..61783e366d23 100644
--- a/cranelift/filetests/filetests/isa/x64/i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/i128.clif
@@ -3,96 +3,122 @@ set enable_llvm_abi_extensions=true
 target x86_64 machinst
 
 function %f0(i128, i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = iadd v0, v1
-; check:  addq    %rdx, %rdi
+; nextln:  addq    %rdx, %rdi
 ; nextln:  adcq    %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f1(i128, i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = isub v0, v1
-; check:  subq    %rdx, %rdi
+; nextln:  subq    %rdx, %rdi
 ; nextln:  sbbq    %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f2(i128, i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = band v0, v1
-; check:  andq    %rdx, %rdi
+; nextln:  andq    %rdx, %rdi
 ; nextln:  andq    %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f3(i128, i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = bor v0, v1
-; check:  orq     %rdx, %rdi
+; nextln:  orq     %rdx, %rdi
 ; nextln:  orq     %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f4(i128, i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 
     v2 = bxor v0, v1
-; check:  xorq    %rdx, %rdi
+; nextln:  xorq    %rdx, %rdi
 ; nextln:  xorq    %rcx, %rsi
 
     return v2
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f5(i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
 
     v1 = bnot v0
-; check:  notq    %rdi
+; nextln:  notq    %rdi
 ; nextln:  notq    %rsi
 
     return v1
 ; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f6(i128, i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
 ; v0 in rdi:rsi, v1 in rdx:rcx
 
     v2 = imul v0, v1
-; check:  movq    %rsi, %rax
+; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rcx, %r8
 ; nextln:  movq    %rdi, %rsi
 ; nextln:  imulq   %rdx, %rsi
@@ -107,32 +133,44 @@ block0(v0: i128, v1: i128):
 ; nextln:  movq    %rcx, %rdx
 
     return v2
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f7(i64, i64) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i64, v1: i64):
     v2 = iconcat.i64 v0, v1
-; check:  movq    %rdi, %rax
+; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
 
     return v2
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f8(i128) -> i64, i64 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1, v2 = isplit.i128 v0
-; check:  movq    %rdi, %rax
+; nextln:  movq    %rdi, %rax
 ; nextln:  movq    %rsi, %rdx
 
     return v1, v2
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f9(i128, i128) -> b1 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i128):
     v2 = icmp eq v0, v1
@@ -152,7 +190,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %rax, %r8
 ; nextln: andq    $$1, %r8
 ; nextln: setnz   %r8b
-
+ 
     v4 = icmp slt v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setl    %r9b
@@ -163,7 +201,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %r9, %r10
 ; nextln: andq    $$1, %r10
 ; nextln: setnz   %r9b
-
+ 
     v5 = icmp sle v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setl    %r10b
@@ -174,7 +212,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %r10, %r11
 ; nextln: andq    $$1, %r11
 ; nextln: setnz   %r10b
-
+ 
     v6 = icmp sgt v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setnle  %r11b
@@ -252,10 +290,14 @@ block0(v0: i128, v1: i128):
     v20 = band v19, v16
 
     return v20
-; check:  ret
+; check:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
+; nextln:  ret
 }
 
 function %f10(i128) -> i32 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     brz v0, block1
@@ -265,7 +307,7 @@ block0(v0: i128):
 ; nextln: setz    %sil
 ; nextln: andb    %dil, %sil
 ; nextln: jnz     label1; j label2
-
+ 
     jump block2
 
 block1:
@@ -276,10 +318,14 @@ block2:
     v2 = iconst.i32 2
     return v2
 
-; check:  ret
+; check:   movq    %rbp, %rsp
+; nextln:  popq    %rbp
+; nextln:  ret
 }
 
 function %f11(i128) -> i32 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     brnz v0, block1
@@ -299,106 +345,138 @@ block2:
     v2 = iconst.i32 2
     return v2
 
-; check:  ret
+; check:   movq    %rbp, %rsp
+; nextln:  popq    %rbp
+; nextln:  ret
 }
 
 function %f12(i64) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i64):
     v1 = uextend.i128 v0
     return v1
 
-; check:  movq    %rdi, %rsi
+; nextln:  movq    %rdi, %rsi
 ; nextln:  xorq    %rdi, %rdi
 ; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rdi, %rdx
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f13(i64) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i64):
     v1 = sextend.i128 v0
     return v1
 
-; check:  movq    %rdi, %rsi
+; nextln:  movq    %rdi, %rsi
 ; nextln:  movq    %rsi, %rdi
 ; nextln:  sarq    $$63, %rdi
 ; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rdi, %rdx
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f14(i8) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i8):
     v1 = sextend.i128 v0
     return v1
 
-; check:  movsbq  %dil, %rsi
+; nextln:  movsbq  %dil, %rsi
 ; nextln:  movq    %rsi, %rdi
 ; nextln:  sarq    $$63, %rdi
 ; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rdi, %rdx
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f15(i8) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i8):
     v1 = uextend.i128 v0
     return v1
 
-; check:  movzbq  %dil, %rsi
+; nextln:  movzbq  %dil, %rsi
 ; nextln:  xorq    %rdi, %rdi
 ; nextln:  movq    %rsi, %rax
 ; nextln:  movq    %rdi, %rdx
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 
 }
 
 function %f16(i128) -> i64 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = ireduce.i64 v0
     return v1
 
-; check:  movq    %rdi, %rax
+; nextln:  movq    %rdi, %rax
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f17(i128) -> i8 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = ireduce.i8 v0
     return v1
 
-; check:  movq    %rdi, %rax
+; nextln:  movq    %rdi, %rax
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f18(b1) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: b1):
     v1 = bint.i128 v0
     return v1
 
-; check: movq    %rdi, %rsi
+; nextln: movq    %rdi, %rsi
 ; nextln: andq    $$1, %rsi
 ; nextln: xorq    %rdi, %rdi
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f19(i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = popcnt.i128 v0
@@ -450,11 +528,16 @@ block0(v0: i128):
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
 
+
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 
 function %f20(i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = bitrev.i128 v0
@@ -557,12 +640,16 @@ block0(v0: i128):
 ; nextln: movq    %rcx, %rax
 ; nextln: movq    %rdi, %rdx
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 ; Shifts are covered by run-tests in shift-i128-run.clif.
 
 function %f21(i128, i64) {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128, v1: i64):
     store.i128 v0, v1
@@ -571,10 +658,14 @@ block0(v0: i128, v1: i64):
 ; check:  movq    %rdi, 0(%rdx)
 ; nextln: movq    %rsi, 8(%rdx)
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
 function %f22(i64) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i64):
     v1 = load.i128 v0
@@ -585,6 +676,8 @@ block0(v0: i64):
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
 
+; nextln:  movq    %rbp, %rsp
+; nextln:  popq    %rbp
 ; nextln:  ret
 }
 
@@ -604,7 +697,9 @@ block2(v6: i128):
     v8 = iadd.i128 v6, v7
     return v8
 
-; check: testb   $$1, %dl
+; check: pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: testb   $$1, %dl
 ; nextln: jnz     label1; j label2
 ; check: Block 1:
 ; check:  movl    $$0, %esi
@@ -615,6 +710,8 @@ block2(v6: i128):
 ; nextln: adcq    %rcx, %rdi
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 ; check: Block 2:
 ; check:  movl    $$0, %esi
@@ -625,8 +722,10 @@ block2(v6: i128):
 ; nextln: adcq    %rcx, %rdi
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
-
+ 
 }
 
 function %f24(i128, i128, i64, i128, i128, i128) -> i128 {
@@ -673,6 +772,8 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128):
 }
 
 function %f25(i128) -> i128, i128, i128, i64, i128, i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i128):
     v1 = ireduce.i64 v0
@@ -727,7 +828,9 @@ block0(v0: i128):
     v1 = clz.i128 v0
     return v1
 
-; check: movabsq $$-1, %rcx
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movabsq $$-1, %rcx
 ; nextln: bsrq    %rsi, %rax
 ; nextln: cmovzq  %rcx, %rax
 ; nextln: movl    $$63, %esi
@@ -743,6 +846,8 @@ block0(v0: i128):
 ; nextln: xorq    %rsi, %rsi
 ; nextln: movq    %rdi, %rax
 ; nextln: movq    %rsi, %rdx
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 }
@@ -753,7 +858,9 @@ block0(v0: i128):
     return v1
 }
 
-; check: movq    %rsi, %rax
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rsi, %rax
 ; nextln: movl    $$64, %ecx
 ; nextln: bsfq    %rdi, %rsi
 ; nextln: cmovzq  %rcx, %rsi
@@ -766,6 +873,8 @@ block0(v0: i128):
 ; nextln: xorq    %rdi, %rdi
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rdx
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 function %f29(i8, i128) -> i8 {
@@ -774,9 +883,13 @@ block0(v0: i8, v1: i128):
     return v2
 }
 
-; check: movq    %rsi, %rcx
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rsi, %rcx
 ; nextln: shll    %cl, %edi
 ; nextln: movq    %rdi, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 function %f30(i128, i128) -> i128 {
@@ -785,7 +898,9 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: movq    %rsi, %rax
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rsi, %rax
 ; nextln: movq    %rdi, %rsi
 ; nextln: movq    %rdx, %rcx
 ; nextln: shlq    %cl, %rsi
@@ -805,6 +920,8 @@ block0(v0: i128, v1: i128):
 ; nextln: cmovnzq %rsi, %rax
 ; nextln: movq    %rax, %rdx
 ; nextln: movq    %rcx, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 function %f31(i128, i128) -> i128 {
@@ -813,7 +930,9 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: movq    %rdi, %rax
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rdi, %rax
 ; nextln: movq    %rsi, %rdi
 ; nextln: movq    %rdi, %rsi
 ; nextln: movq    %rdx, %rcx
@@ -835,6 +954,8 @@ block0(v0: i128, v1: i128):
 ; nextln: cmovnzq %rsi, %rcx
 ; nextln: movq    %rax, %rdx
 ; nextln: movq    %rcx, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 function %f32(i128, i128) -> i128 {
@@ -843,7 +964,9 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: movq    %rdi, %r8
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rdi, %r8
 ; nextln: movq    %rsi, %rdi
 ; nextln: movq    %rdi, %rsi
 ; nextln: movq    %rdx, %rcx
@@ -866,6 +989,8 @@ block0(v0: i128, v1: i128):
 ; nextln: cmovnzq %rsi, %rcx
 ; nextln: movq    %rcx, %rax
 ; nextln: movq    %rdi, %rdx
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 function %f33(i128, i128) -> i128 {
@@ -874,7 +999,9 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: movq    %rdi, %r8
+; check: pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rdi, %r8
 ; nextln: movq    %r8, %r9
 ; nextln: movq    %rdx, %rcx
 ; nextln: shlq    %cl, %r9
@@ -919,6 +1046,8 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %rax, %rcx
 ; nextln: movq    %r8, %rax
 ; nextln: movq    %rcx, %rdx
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 
 
@@ -928,7 +1057,9 @@ block0(v0: i128, v1: i128):
     return v2
 }
 
-; check: movq    %rsi, %r9
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rsi, %r9
 ; nextln: movq    %rdx, %rcx
 ; nextln: shrq    %cl, %r9
 ; nextln: movq    %rdi, %rax
@@ -973,4 +1104,6 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %rax, %rcx
 ; nextln: movq    %rsi, %rax
 ; nextln: movq    %rcx, %rdx
-; nextln: ret
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
+; nextln: ret
\ No newline at end of file
diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif
index fbd62673e14c..5b23afb8d3d5 100644
--- a/cranelift/filetests/filetests/isa/x64/move-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif
@@ -12,5 +12,9 @@ block0(v0: i32x4):
     v3 = raw_bitcast.b8x16 v2
     return v3
 }
-; check: ret
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
+; nextln: ret
 
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
index 09a8d493a7c6..2049f539622d 100644
--- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
@@ -7,8 +7,12 @@ block0(v0: i64):
     return v1
 }
 
+; check: pushq   %rbp
+; check: movq    %rsp, %rbp
 ; check: popcntq %rdi, %rsi
 ; check: movq    %rsi, %rax
+; check: movq    %rbp, %rsp
+; check: popq    %rbp
 ; check: ret
 
 function %popcnt(i32) -> i32 {
@@ -17,6 +21,10 @@ block0(v0: i32):
     return v1
 }
 
+; check: pushq   %rbp
+; check: movq    %rsp, %rbp
 ; check: popcntl %edi, %esi
 ; check: movq    %rsi, %rax
+; check: movq    %rbp, %rsp
+; check: popq    %rbp
 ; check: ret
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif
index a35da8393ed6..df68f6b4b701 100644
--- a/cranelift/filetests/filetests/isa/x64/popcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif
@@ -76,6 +76,8 @@ block0(v0: i32):
 ; nextln: imull   $$16843009, %esi
 ; nextln: shrl    $$24, %esi
 ; nextln: movq    %rsi, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 }
 
@@ -102,5 +104,7 @@ block0(v0: i64):
 ; nextln: imull   $$16843009, %esi
 ; nextln: shrl    $$24, %esi
 ; nextln: movq    %rsi, %rax
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
 }
diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif
index 9cd0b3beca39..af6996f85ffa 100644
--- a/cranelift/filetests/filetests/isa/x64/select-i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif
@@ -3,12 +3,14 @@ set enable_llvm_abi_extensions=true
 target x86_64 machinst
 
 function %f0(i32, i128, i128) -> i128 {
+; check:   pushq   %rbp
+; nextln:  movq    %rsp, %rbp
 
 block0(v0: i32, v1: i128, v2: i128):
 
     v3 = iconst.i32 42
     v4 = icmp.i32 eq v0, v3
-; check: movl    $$42, %eax
+; nextln: movl    $$42, %eax
 ; nextln: cmpl    %eax, %edi
 
     v5 = select.i128 v4, v1, v2
@@ -19,7 +21,9 @@ block0(v0: i32, v1: i128, v2: i128):
 ; nextln: movq    %rcx, %rax
 ; nextln: movq    %r8, %rdx
 
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
-
+ 
 }
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
index 112698ebc729..2f6a8c7dfda1 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
@@ -115,6 +115,9 @@ block0(v0: i32):
 function %load32_zero_float(f32) -> f32x4 {
 block0(v0: f32):
     v1 = scalar_to_vector.f32x4 v0
+    ; regex: MOV=movap*
+    ; check: pushq
+    ; not: $MOV
     ; check: ret
     return v1
 }
diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
index 3a213ffaaf60..ee59ff496364 100644
--- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif
+++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
@@ -8,7 +8,11 @@ block0(v0: i64):
     return
 }
 
-; check: movq    %rdi, %rax
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rdi, %rax
 ; nextln: movl    $$42, %esi
 ; nextln: movq    %rsi, 0(%rdi)
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif
index 02cae8143b33..37a4698619ed 100644
--- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif
+++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif
@@ -10,5 +10,9 @@ block0(v0: i32):
     return v1
 }
 
-; check: elf_tls_get_addr User { namespace: 1, index: 0 }
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: elf_tls_get_addr User { namespace: 1, index: 0 }
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
index 6412c663e7cb..ef43c3dd03e6 100644
--- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
@@ -3,10 +3,14 @@ target x86_64 machinst
 
 function %elide_uextend_add(i32, i32) -> i64 {
 block0(v0: i32, v1: i32):
+    ; check: pushq   %rbp
+    ; check: movq    %rsp, %rbp
     v2 = iadd v0, v1
     ; check: addl    %esi, %edi
     v3 = uextend.i64 v2
     ; check: movq    %rdi, %rax
+    ; check: movq    %rbp, %rsp
+    ; check: popq    %rbp
     ; check: ret
     return v3
 }
diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
index 26ee0b6bd54b..5ddd4b20d3aa 100644
--- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
+++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
@@ -13,4 +13,8 @@ block1:
     trap unreachable
 }
 
-; check: ret
+; check:  pushq   %rbp
+; nextln: movq    %rsp, %rbp
+; nextln: movq    %rbp, %rsp
+; nextln: popq    %rbp
+; nextln: ret