Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,19 @@ namespace llvm {
//===--------------------------------------------------------------------===//
// X86 Implementation of the TargetLowering interface
class X86TargetLowering final : public TargetLowering {
// Copying needed for an outgoing byval argument.
enum ByValCopyKind {
// Argument is already in the correct location, no copy needed.
NoCopy,
// Argument value is currently in the local stack frame, needs copying to
// outgoing arguemnt area.
CopyOnce,
// Argument value is currently in the outgoing argument area, but not at
// the correct offset, so needs copying via a temporary in local stack
// space.
CopyViaTemp,
};

public:
explicit X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI);
Expand Down Expand Up @@ -1775,6 +1788,9 @@ namespace llvm {
SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
ByValCopyKind ByValNeedsCopyForTailCall(SelectionDAG &DAG, SDValue Src,
SDValue Dst,
ISD::ArgFlagsTy Flags) const;
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
Expand Down
188 changes: 156 additions & 32 deletions llvm/lib/Target/X86/X86ISelLoweringCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2018,6 +2018,49 @@ SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
}

// Returns the type of copying which is required to set up a byval argument to
// a tail-called function. This isn't needed for non-tail calls, because they
// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
// avoid clobbering another argument (CopyViaTemp), and sometimes can be
// optimised to zero copies when forwarding an argument from the caller's
// caller (NoCopy).
X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

// Globals are always safe to copy from.
if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
return CopyOnce;

// Can only analyse frame index nodes, conservatively assume we need a
// temporary.
auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
if (!SrcFrameIdxNode || !DstFrameIdxNode)
return CopyViaTemp;

int SrcFI = SrcFrameIdxNode->getIndex();
int DstFI = DstFrameIdxNode->getIndex();
assert(MFI.isFixedObjectIndex(DstFI) &&
"byval passed in non-fixed stack slot");

int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
int64_t DstOffset = MFI.getObjectOffset(DstFI);

// If the source is in the local frame, then the copy to the argument
// memory is always valid.
bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
if (!FixedSrc || (FixedSrc && SrcOffset < 0))
return CopyOnce;

// If the value is already in the correct location, then no copying is
// needed. If not, then we need to copy via a temporary.
if (SrcOffset == DstOffset)
return NoCopy;
else
return CopyViaTemp;
}

SDValue
X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
Expand All @@ -2037,9 +2080,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool Is64Bit = Subtarget.is64Bit();
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
bool IsSibcall = false;
bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
bool ShouldGuaranteeTCO = shouldGuaranteeTCO(
CallConv, MF.getTarget().Options.GuaranteedTailCallOpt);
bool IsCalleePopSRet =
!ShouldGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
bool HasNCSR = (CB && isa<CallInst>(CB) &&
CB->hasFnAttr("no_caller_saved_registers"));
Expand Down Expand Up @@ -2086,7 +2130,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}

bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
if (Subtarget.isPICStyleGOT() && !ShouldGuaranteeTCO && !IsMustTail) {
// If we are using a GOT, disable tail calls to external symbols with
// default visibility. Tail calling such a symbol requires using a GOT
// relocation, which forces early binding of the symbol. This breaks code
Expand All @@ -2098,15 +2142,18 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
}

if (isTailCall && !IsMustTail) {
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
IsCalleePopSRet);
IsSibcall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
IsCalleePopSRet);

// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
if (!IsGuaranteeTCO && isTailCall)
IsSibcall = true;
if (!IsMustTail) {
isTailCall = IsSibcall;

// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
IsSibcall = IsSibcall && !ShouldGuaranteeTCO;
}

if (isTailCall)
++NumTailCalls;
Expand All @@ -2125,13 +2172,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
else if (ShouldGuaranteeTCO && canGuaranteeTCO(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);

// A sibcall is ABI-compatible and does not need to adjust the stack pointer.
int FPDiff = 0;
if (isTailCall &&
shouldGuaranteeTCO(CallConv,
MF.getTarget().Options.GuaranteedTailCallOpt)) {
if (isTailCall && ShouldGuaranteeTCO && !IsSibcall) {
// Lower arguments at fp - stackoffset + fpdiff.
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();

Expand All @@ -2146,6 +2192,74 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned NumBytesToPush = NumBytes;
unsigned NumBytesToPop = NumBytes;

SDValue StackPtr;
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();

// If we are doing a tail-call, any byval arguments will be written to stack
// space which was used for incoming arguments. If any the values being used
// are incoming byval arguments to this function, then they might be
// overwritten by the stores of the outgoing arguments. To avoid this, we
// need to make a temporary copy of them in local stack space, then copy back
// to the argument area.
DenseMap<unsigned, SDValue> ByValTemporaries;
SDValue ByValTempChain;
if (isTailCall) {
SmallVector<SDValue, 8> ByValCopyChains;
for (const CCValAssign &VA : ArgLocs) {
unsigned ArgIdx = VA.getValNo();
SDValue Src = OutVals[ArgIdx];
ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;

if (!Flags.isByVal())
continue;

auto PtrVT = getPointerTy(DAG.getDataLayout());

if (!StackPtr.getNode())
StackPtr =
DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT);

// Destination: where this byval should live in the callee’s frame
// after the tail call.
int32_t Offset = VA.getLocMemOffset() + FPDiff;
int Size = VA.getLocVT().getFixedSizeInBits() / 8;
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
SDValue Dst = DAG.getFrameIndex(FI, PtrVT);

ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);

if (Copy == NoCopy) {
// If the argument is already at the correct offset on the stack
// (because we are forwarding a byval argument from our caller), we
// don't need any copying.
continue;
} else if (Copy == CopyOnce) {
// If the argument is in our local stack frame, no other argument
// preparation can clobber it, so we can copy it to the final location
// later.
ByValTemporaries[ArgIdx] = Src;
} else {
assert(Copy == CopyViaTemp && "unexpected enum value");
// If we might be copying this argument from the outgoing argument
// stack area, we need to copy via a temporary in the local stack
// frame.
MachineFrameInfo &MFI = MF.getFrameInfo();
int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(),
Flags.getNonZeroByValAlign(),
/*isSS=*/false);
SDValue Temp =
DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));

SDValue CopyChain =
CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl);
ByValCopyChains.push_back(CopyChain);
}
}
if (!ByValCopyChains.empty())
ByValTempChain =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
}

// If we have an inalloca argument, all stack space has already been allocated
// for us and be right at the top of the stack. We don't support multiple
// arguments passed in memory when using inalloca.
Expand Down Expand Up @@ -2186,7 +2300,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;

// The next loop assumes that the locations are in the same order of the
// input arguments.
Expand All @@ -2195,7 +2308,6 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
++I, ++OutIndex) {
assert(OutIndex < Outs.size() && "Invalid Out index");
Expand Down Expand Up @@ -2285,7 +2397,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (ShadowReg)
RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
}
} else if (!IsSibcall && (!isTailCall || isByVal)) {
} else if (!IsSibcall && (!isTailCall || (isByVal && !IsMustTail))) {
assert(VA.isMemLoc());
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
Expand Down Expand Up @@ -2362,7 +2474,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// For tail calls lower the arguments to the 'real' stack slots. Sibcalls
// don't need this because the eligibility check rejects calls that require
// shuffling arguments passed in memory.
if (!IsSibcall && isTailCall) {
if (isTailCall && !IsSibcall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments or the return address are stored to the
// stack, because the outgoing stack slots may alias the incoming argument
Expand All @@ -2372,6 +2484,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// would clobber.
Chain = DAG.getStackArgumentTokenFactor(Chain);

if (ByValTempChain)
Chain =
DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain);

SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
Expand Down Expand Up @@ -2404,16 +2520,24 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

if (Flags.isByVal()) {
// Copy relative to framepointer.
SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
getPointerTy(DAG.getDataLayout()));
Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
StackPtr, Source);

MemOpChains2.push_back(
CreateCopyOfByValArgument(Source, FIN, Chain, Flags, DAG, dl));
SDValue ByValSrc;
bool NeedsStackCopy;
if (auto It = ByValTemporaries.find(OutsIndex);
It != ByValTemporaries.end()) {
ByValSrc = It->second;
NeedsStackCopy = true;
} else {
ByValSrc = Arg;
NeedsStackCopy = false;
}

if (NeedsStackCopy) {
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);

MemOpChains2.push_back(CreateCopyOfByValArgument(
ByValSrc, DstAddr, Chain, Flags, DAG, dl));
}
} else {
// Store relative to framepointer.
MemOpChains2.push_back(DAG.getStore(
Expand Down Expand Up @@ -2846,16 +2970,16 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
bool CCMatch = CallerCC == CalleeCC;
bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
bool ShouldGuaranteeTCO = shouldGuaranteeTCO(
CalleeCC, MF.getTarget().Options.GuaranteedTailCallOpt);

// Win64 functions have extra shadow space for argument homing. Don't do the
// sibcall if the caller and callee have mismatched expectations for this
// space.
if (IsCalleeWin64 != IsCallerWin64)
return false;

if (IsGuaranteeTCO) {
if (ShouldGuaranteeTCO) {
if (canGuaranteeTCO(CalleeCC) && CCMatch)
return true;
return false;
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/hipe-cc64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,13 @@ define void @zap(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: movl $2, %ecx
; CHECK-NEXT: movl $3, %r8d
; CHECK-NEXT: movq %rax, %r9
; CHECK-NEXT: callq foo@PLT
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
; CHECK-NEXT: jmp foo@PLT # TAILCALL
entry:
%0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
%res = extractvalue {i64, i64, i64} %0, 2
Expand Down
Loading