Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1161,6 +1161,43 @@ impl ABIMachineSpec for AArch64MachineDeps {
}
}

impl AArch64CallSite {
pub fn emit_return_call(mut self, ctx: &mut Lower<Inst>, args: isle::ValueSlice) {
let (new_stack_arg_size, old_stack_arg_size) =
self.emit_temporary_tail_call_frame(ctx, args);

let dest = self.dest().clone();
let opcode = self.opcode();
let uses = self.take_uses();
let info = Box::new(ReturnCallInfo {
uses,
opcode,
old_stack_arg_size,
new_stack_arg_size,
});

match dest {
CallDest::ExtName(callee, RelocDistance::Near) => {
let callee = Box::new(callee);
ctx.emit(Inst::ReturnCall { callee, info });
}
CallDest::ExtName(name, RelocDistance::Far) => {
let callee = ctx.alloc_tmp(types::I64).only_reg().unwrap();
ctx.emit(Inst::LoadExtName {
rd: callee,
name: Box::new(name),
offset: 0,
});
ctx.emit(Inst::ReturnCallInd {
callee: callee.to_reg(),
info,
});
}
CallDest::Reg(callee) => ctx.emit(Inst::ReturnCallInd { callee, info }),
}
}
}

fn compute_arg_locs_tail<'a, I>(
params: I,
add_ret_area_ptr: bool,
Expand Down
11 changes: 11 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,16 @@
(CallInd
(info BoxCallIndInfo))

;; A return-call macro instruction.
(ReturnCall
(callee BoxExternalName)
(info BoxReturnCallInfo))

;; An indirect return-call macro instruction.
(ReturnCallInd
(callee Reg)
(info BoxReturnCallInfo))

;; A pseudo-instruction that captures register arguments in vregs.
(Args
(args VecArgPair))
Expand Down Expand Up @@ -1030,6 +1040,7 @@

(type BoxCallInfo (primitive BoxCallInfo))
(type BoxCallIndInfo (primitive BoxCallIndInfo))
(type BoxReturnCallInfo (primitive BoxReturnCallInfo))
(type CondBrKind (primitive CondBrKind))
(type BranchTarget (primitive BranchTarget))
(type BoxJTSequenceInfo (primitive BoxJTSequenceInfo))
Expand Down
209 changes: 207 additions & 2 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ use cranelift_control::ControlPlane;
use regalloc2::Allocation;

use crate::binemit::{Reloc, StackMap};
use crate::ir::{types::*, RelSourceLoc};
use crate::ir::{LibCall, MemFlags, TrapCode};
use crate::ir::{self, types::*, LibCall, MemFlags, RelSourceLoc, TrapCode};
use crate::isa::aarch64::inst::*;
use crate::machinst::{ty_bits, Reg, RegClass, Writable};
use crate::trace;
Expand Down Expand Up @@ -3205,6 +3204,57 @@ impl MachInstEmit for Inst {
state.virtual_sp_offset
);
}
&Inst::ReturnCall {
ref callee,
ref info,
} => {
emit_return_call_common_sequence(
&mut allocs,
sink,
emit_info,
state,
info.new_stack_arg_size,
info.old_stack_arg_size,
&info.uses,
);

// Note: this is not `Inst::Jump { .. }.emit(..)` because we
// have different metadata in this case: we don't have a label
// for the target, but rather a function relocation.
sink.add_reloc(Reloc::Arm64Call, callee, 0);
sink.put4(enc_jump26(0b000101, 0));
sink.add_call_site(ir::Opcode::ReturnCall);

// `emit_return_call_common_sequence` emits an island if
// necessary, so we can safely disable the worst-case-size check
// in this case.
start_off = sink.cur_offset();
}
&Inst::ReturnCallInd { callee, ref info } => {
let callee = allocs.next(callee);

emit_return_call_common_sequence(
&mut allocs,
sink,
emit_info,
state,
info.new_stack_arg_size,
info.old_stack_arg_size,
&info.uses,
);

Inst::IndirectBr {
rn: callee,
targets: vec![],
}
.emit(&[], sink, emit_info, state);
sink.add_call_site(ir::Opcode::ReturnCallIndirect);

// `emit_return_call_common_sequence` emits an island if
// necessary, so we can safely disable the worst-case-size check
// in this case.
start_off = sink.cur_offset();
}
&Inst::CondBr {
taken,
not_taken,
Expand Down Expand Up @@ -3712,3 +3762,158 @@ impl MachInstEmit for Inst {
self.print_with_state(state, &mut allocs)
}
}

fn emit_return_call_common_sequence(
allocs: &mut AllocationConsumer<'_>,
sink: &mut MachBuffer<Inst>,
emit_info: &EmitInfo,
state: &mut EmitState,
new_stack_arg_size: u32,
old_stack_arg_size: u32,
uses: &CallArgList,
) {
for u in uses {
let _ = allocs.next(u.vreg);
}

// We are emitting a dynamic number of instructions and might need an
// island. We emit four instructions regardless of how many stack arguments
// we have, and then two instructions per word of stack argument space.
let new_stack_words = new_stack_arg_size / 8;
let insts = 4 + 2 * new_stack_words;
let size_of_inst = 4;
let space_needed = insts * size_of_inst;
if sink.island_needed(space_needed) {
let jump_around_label = sink.get_label();
let jmp = Inst::Jump {
dest: BranchTarget::Label(jump_around_label),
};
jmp.emit(&[], sink, emit_info, state);
sink.emit_island(space_needed + 4, &mut state.ctrl_plane);
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
}

// Copy the new frame on top of our current frame.
//
// The current stack layout is the following:
//
// | ... |
// +---------------------+
// | ... |
// | stack arguments |
// | ... |
// current | return address |
// frame | old FP | <-- FP
// | ... |
// | old stack slots |
// | ... |
// +---------------------+
// | ... |
// new | new stack arguments |
// frame | ... | <-- SP
// +---------------------+
//
// We need to restore the old FP, restore the return address from the stack
// to the link register, copy the new stack arguments over the old stack
// arguments, adjust SP to point to the new stack arguments, and then jump
// to the callee (which will push the old FP and RA again). Note that the
// actual jump happens outside this helper function.

assert_eq!(
new_stack_arg_size % 8,
0,
"size of new stack arguments must be 8-byte aligned"
);

// The delta from our frame pointer to the (eventual) stack pointer value
// when we jump to the tail callee. This is the difference in size of stack
// arguments as well as accounting for the two words we pushed onto the
// stack upon entry to this function (the return address and old frame
// pointer).
let fp_to_callee_sp = i64::from(old_stack_arg_size) - i64::from(new_stack_arg_size) + 16;

let tmp1 = regs::writable_spilltmp_reg();
let tmp2 = regs::writable_tmp2_reg();

// Restore the return address to the link register, and load the old FP into
// a temporary register.
//
// We can't put the old FP into the FP register until after we copy the
// stack arguments into place, since that uses address modes that are
// relative to our current FP.
//
// Note that the FP is saved in the function prologue for all non-leaf
// functions, even when `preserve_frame_pointers=false`. Note also that
// `return_call` instructions make it so that a function is considered
// non-leaf. Therefore we always have an FP to restore here.
Inst::LoadP64 {
rt: tmp1,
rt2: writable_link_reg(),
mem: PairAMode::SignedOffset(
regs::fp_reg(),
SImm7Scaled::maybe_from_i64(0, types::I64).unwrap(),
),
flags: MemFlags::trusted(),
}
.emit(&[], sink, emit_info, state);

// Copy the new stack arguments over the old stack arguments.
for i in (0..new_stack_words).rev() {
// Load the `i`th new stack argument word from the temporary stack
// space.
Inst::ULoad64 {
rd: tmp2,
mem: AMode::SPOffset {
off: i64::from(i * 8),
ty: types::I64,
},
flags: ir::MemFlags::trusted(),
}
.emit(&[], sink, emit_info, state);

// Store it to its final destination on the stack, overwriting our
// current frame.
Inst::Store64 {
rd: tmp2.to_reg(),
mem: AMode::FPOffset {
off: fp_to_callee_sp + i64::from(i * 8),
ty: types::I64,
},
flags: ir::MemFlags::trusted(),
}
.emit(&[], sink, emit_info, state);
}

// Initialize the SP for the tail callee, deallocating the temporary stack
// argument space and our current frame at the same time.
let (off, alu_op) = if let Ok(off) = u64::try_from(fp_to_callee_sp) {
(off, ALUOp::Add)
} else {
let abs = fp_to_callee_sp.abs();
let off = u64::try_from(abs).unwrap();
(off, ALUOp::Sub)
};
Inst::AluRRImm12 {
alu_op,
size: OperandSize::Size64,
rd: regs::writable_stack_reg(),
rn: regs::fp_reg(),
imm12: Imm12::maybe_from_u64(off).unwrap(),
}
.emit(&[], sink, emit_info, state);

// Move the old FP value from the temporary into the FP register.
Inst::Mov {
size: OperandSize::Size64,
rd: regs::writable_fp_reg(),
rm: tmp1.to_reg(),
}
.emit(&[], sink, emit_info, state);

state.virtual_sp_offset -= i64::from(new_stack_arg_size);
trace!(
"return_call[_ind] adjusts virtual sp offset by {} -> {}",
new_stack_arg_size,
state.virtual_sp_offset
);
}
59 changes: 59 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,22 @@ pub struct CallIndInfo {
pub callee_pop_size: u32,
}

/// Additional information for `return_call[_ind]` instructions, left out of
/// line to lower the size of the `Inst` enum.
#[derive(Clone, Debug)]
pub struct ReturnCallInfo {
/// Arguments to the call instruction.
pub uses: CallArgList,
/// Instruction opcode.
pub opcode: Opcode,
/// The size of the current/old stack frame's stack arguments.
pub old_stack_arg_size: u32,
/// The size of the new stack frame's stack arguments. This is necessary
/// for copying the frame over our current frame. It must already be
/// allocated on the stack.
pub new_stack_arg_size: u32,
}

/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst
/// enum.
#[derive(Clone, Debug)]
Expand Down Expand Up @@ -873,6 +889,20 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
}
collector.reg_clobbers(info.clobbers);
}
&Inst::ReturnCall {
ref info,
callee: _,
} => {
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
}
&Inst::ReturnCallInd { ref info, callee } => {
collector.reg_use(callee);
for u in &info.uses {
collector.reg_fixed_use(u.vreg, u.preg);
}
}
&Inst::CondBr { ref kind, .. } => match kind {
CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => {
collector.reg_use(*rt);
Expand Down Expand Up @@ -1013,6 +1043,7 @@ impl MachInst for Inst {
fn is_term(&self) -> MachTerminator {
match self {
&Inst::Ret { .. } | &Inst::AuthenticatedRet { .. } => MachTerminator::Ret,
&Inst::ReturnCall { .. } | &Inst::ReturnCallInd { .. } => MachTerminator::RetCall,
&Inst::Jump { .. } => MachTerminator::Uncond,
&Inst::CondBr { .. } => MachTerminator::Cond,
&Inst::IndirectBr { .. } => MachTerminator::Indirect,
Expand Down Expand Up @@ -2522,6 +2553,34 @@ impl Inst {
let rn = pretty_print_reg(info.rn, allocs);
format!("blr {}", rn)
}
&Inst::ReturnCall {
ref callee,
ref info,
} => {
let mut s = format!(
"return_call {callee:?} old_stack_arg_size:{} new_stack_arg_size:{}",
info.old_stack_arg_size, info.new_stack_arg_size
);
for ret in &info.uses {
let preg = pretty_print_reg(ret.preg, &mut empty_allocs);
let vreg = pretty_print_reg(ret.vreg, allocs);
write!(&mut s, " {vreg}={preg}").unwrap();
}
s
}
&Inst::ReturnCallInd { callee, ref info } => {
let callee = pretty_print_reg(callee, allocs);
let mut s = format!(
"return_call_ind {callee} old_stack_arg_size:{} new_stack_arg_size:{}",
info.old_stack_arg_size, info.new_stack_arg_size
);
for ret in &info.uses {
let preg = pretty_print_reg(ret.preg, &mut empty_allocs);
let vreg = pretty_print_reg(ret.vreg, allocs);
write!(&mut s, " {vreg}={preg}").unwrap();
}
s
}
&Inst::Args { ref args } => {
let mut s = "args".to_string();
for arg in args {
Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -2268,6 +2268,14 @@
(rule (lower (return args))
(lower_return args))

;;;; Rules for `return_call` and `return_call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (return_call (func_ref_data sig_ref extname dist) args))
(gen_return_call sig_ref extname dist args))

(rule (lower (return_call_indirect sig_ref callee args))
(gen_return_call_indirect sig_ref callee args))

;;;; Rules for loads ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower
Expand Down
Loading