From bf7a4a82623c22474d22a86ef91dbcf003921a1c Mon Sep 17 00:00:00 2001 From: jashook Date: Tue, 23 Oct 2018 10:27:39 -0700 Subject: [PATCH 01/24] Use fgInitArgInfo in fgCanFastTailCall This leverages the work done in #19658 to use fgInitArgInfo in fgCanFastTailCall. This removes much of the duplicate ABI code that was required before #19658. In addition, the change more clearly explains the issues with LowerFastTailCall and more clearly bails on when it can and cannot fastTailCall. Note: Part of the old logic would report no to a fastTailCall on Windows AMD64, if the struct was a power of two (engregisterable). This has been removed; however, it is unclear currently whether LowerFastTailCall/Codgen will support the change. --- src/jit/compiler.h | 32 ++- src/jit/gentree.cpp | 8 + src/jit/lower.cpp | 1 - src/jit/morph.cpp | 244 ++++++------------ .../FastTailCall/FastTailCallCandidates.cs | 146 ++++++++++- .../FastTailCallCandidates.csproj | 2 +- 6 files changed, 258 insertions(+), 175 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 674ec5394d01..9ad191fb7002 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1327,6 +1327,14 @@ struct fgArgTabEntry // Note that on ARM, if we have a double hfa, this reflects the number // of DOUBLE registers. +#if defined(UNIX_AMD64_ABI) + // Unix amd64 will split floating point types and integer types in structs + // between floating point and general purpose registers. Keep track of that + // information so we do not need to re-compute it later. + unsigned structIntRegs; + unsigned structFloatRegs; +#endif // UNIX_AMD64_ABI + // A slot is a pointer sized region in the OutArg area. unsigned slotNum; // When an argument is passed in the OutArg area this is the slot number in the OutArg area unsigned numSlots; // Count of number of slots that this argument uses @@ -1453,8 +1461,26 @@ struct fgArgTabEntry #endif } - __declspec(property(get = GetHfaType)) var_types hfaType; - var_types GetHfaType() + jitstd::pair getNumIntRegAndFloatRegForStructArg() + { + assert (this->isStruct); + +#if defined(UNIX_AMD64_ABI) + return { this->structIntRegs, this->structFloatRegs }; +#endif // UNIX_AMD64_ABI + + if (this->isPassedInFloatRegisters()) + { + return { 0, this->numRegs }; + } + else + { + return { this->numRegs, 0 }; + } + } + + __declspec(property(get = getHfaType)) var_types hfaType; + var_types getHfaType() { #ifdef FEATURE_HFA return HfaTypeFromElemKind(_hfaElemKind); @@ -1728,6 +1754,8 @@ class fgArgInfo const bool isStruct, const bool isVararg, const regNumber otherRegNum, + const unsigned structIntRegs, + const unsigned structFloatRegs, const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr); #endif // UNIX_AMD64_ABI diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index c635c327c58a..2093758cdea2 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -1078,7 +1078,15 @@ bool GenTreeCall::AreArgsComplete() const assert((gtCallLateArgs != nullptr) || !fgArgInfo->HasRegArgs()); return true; } + +#if FEATURE_FASTTAILCALL + // If the args are not complete and fgArgInfo is not null. Then most + // most likely the argInfo has been populated in fgCanFastTailCall and + // we are just re-querying this information in fgMorphArgs. +#else assert(gtCallArgs == nullptr); +#endif + return false; } diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index 809f48f0f876..b06591042bc3 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -2015,7 +2015,6 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) fgArgTabEntry* argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode); assert(argTabEntry); unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount; - noway_assert(callerArgNum < comp->info.compArgsCount); unsigned callerArgLclNum = callerArgNum; LclVarDsc* callerArgDsc = comp->lvaTable + callerArgLclNum; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 5450b4396cf5..5f131be48c15 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -1177,12 +1177,16 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned const bool isStruct, const bool isVararg, const regNumber otherRegNum, + const unsigned structIntRegs, + const unsigned structFloatRegs, const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr) { fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct, isVararg); assert(curArgTabEntry != nullptr); curArgTabEntry->isStruct = isStruct; // is this a struct arg + curArgTabEntry->structIntRegs = structIntRegs; + curArgTabEntry->structFloatRegs = structFloatRegs; curArgTabEntry->checkIsStruct(); assert(numRegs <= 2); @@ -2958,7 +2962,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // This is a register argument - put it in the table. call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1, false, - callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); + callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(0) UNIX_AMD64_ABI_ONLY_ARG(0) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); intArgRegNum++; #ifdef WINDOWS_AMD64_ABI @@ -3535,12 +3539,12 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) if (structDesc.IsIntegralSlot(i)) { *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs); - structIntRegs++; + ++structIntRegs; } else if (structDesc.IsSseSlot(i)) { *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs); - structFloatRegs++; + ++structFloatRegs; } } } @@ -3560,7 +3564,9 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // This is a register argument - put it in the table newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign, isStructArg, - callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) + callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) + UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) + UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); newArgEntry->SetIsBackFilled(isBackFilled); @@ -7001,6 +7007,11 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } #endif + assert(!callee->AreArgsComplete()); + + fgInitArgInfo(callee); + fgArgInfo* argInfo = callee->fgArgInfo; + auto reportFastTailCallDecision = [this, callee](const char* msg, size_t callerStackSize, size_t calleeStackSize) { #if DEBUG if ((JitConfig.JitReportFastTailCallDecisions()) == 1) @@ -7048,6 +7059,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // // Note that callee being a vararg method is not a problem since we can account the params being passed. unsigned nCallerArgs = info.compArgsCount; + unsigned nCalleeArgs = argInfo->ArgCount(); size_t callerArgRegCount = codeGen->intRegState.rsCalleeRegArgCount; size_t callerFloatArgRegCount = codeGen->floatRegState.rsCalleeRegArgCount; @@ -7058,160 +7070,93 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) size_t calleeArgRegCount = 0; size_t calleeFloatArgRegCount = 0; - if (callee->gtCallObjp) // thisPtr - { - ++calleeArgRegCount; - } - - if (callee->HasRetBufArg()) // RetBuf - { - // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs. - - // If callee has RetBuf param, caller too must have it. - // Otherwise go the slow route. - if (info.compRetBuffArg == BAD_VAR_NUM) - { - reportFastTailCallDecision("Callee has RetBuf but caller does not.", 0, 0); - return false; - } - } - - // Count user args while tracking whether any of them is a multi-byte params - // that cannot be passed in a register. Note that we don't need to count + // Count user args while tracking whether any of them has a larger than one + // stack slot sized requirement. This requirement is required to support + // lowering the fast tail call. Which, currently only supports copying + // stack slot arguments which have only one stack slot. + // + // Note that we don't need to count // non-standard and secret params passed in registers (e.g. R10, R11) since // these won't contribute to out-going arg size. - // For each struct arg, hasMultiByteStackArgs will track if it can be passed in registers. - // If it cannot we will break the loop and not fastTailCall. This is an implementation limitation - // where the callee only is checked for non enregisterable structs. - // It is tracked with https://github.com/dotnet/coreclr/issues/12644. - bool hasMultiByteStackArgs = false; - bool hasTwoSlotSizedStruct = false; - bool hasHfaArg = false; - size_t nCalleeArgs = calleeArgRegCount; // Keep track of how many args we have. - size_t calleeStackSize = 0; - for (GenTree* args = callee->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2) - { - ++nCalleeArgs; - assert(args->OperIsList()); - GenTree* argx = args->gtOp.gtOp1; + // + // For each struct arg, determine whether the argument would have > 1 stack + // slot if on the stack If it has > 1 stack slot we will not fastTailCall. + // This is an implementation limitation of LowerFastTailCall that is tracked by: + // https://github.com/dotnet/coreclr/issues/12644. - if (varTypeIsStruct(argx)) - { - // Actual arg may be a child of a GT_COMMA. Skip over comma opers. - argx = argx->gtEffectiveVal(true /*commaOnly*/); + bool hasLargerThanOneStackSlotSizedStruct = false; + bool hasNonEnregisterableStructs = false; + size_t calleeStackSize = 0; - // Get the size of the struct and see if it is register passable. - CORINFO_CLASS_HANDLE objClass = nullptr; + for (unsigned index = 0; index < nCalleeArgs; ++index) + { + fgArgTabEntry* arg = argInfo->GetArgEntry(index); - if (argx->OperGet() == GT_OBJ) + if (!arg->isStruct) + { + if (arg->numSlots > 0) { - objClass = argx->AsObj()->gtClass; + calleeStackSize += (TARGET_POINTER_SIZE * arg->numSlots); } - else if (argx->IsLocal()) + + else if (arg->isPassedInFloatRegisters()) { - objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle(); + ++calleeFloatArgRegCount; } - if (objClass != nullptr) + else { -#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) - - unsigned typeSize = 0; - // We should have already broken out of the loop if we've set hasMultiByteStackArgs to true. - assert(!hasMultiByteStackArgs); - hasMultiByteStackArgs = - !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false, false); - -#if defined(UNIX_AMD64_ABI) - SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc; - - assert(objClass != nullptr); - eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc); - - if (structDesc.passedInRegisters) - { - if (structDesc.eightByteCount == 2) - { - hasTwoSlotSizedStruct = true; - } + ++calleeArgRegCount; + } + + } + else + { + // Struct Arg +#ifdef FEATURE_ARG_SPLIT + if (arg->isSplit || arg->numSlots > 0) +#else + if (arg->numSlots > 0) +#endif // FEATURE_ARG_SPLIT + { + calleeStackSize += (TARGET_POINTER_SIZE * arg->numSlots); - for (unsigned int i = 0; i < structDesc.eightByteCount; i++) - { - if (structDesc.IsIntegralSlot(i)) - { - ++calleeArgRegCount; - } - else if (structDesc.IsSseSlot(i)) - { - ++calleeFloatArgRegCount; - } - else - { - assert(false && "Invalid eightbyte classification type."); - break; - } - } - } - else + if (arg->numSlots > 1) { - calleeStackSize += roundUp(typeSize, TARGET_POINTER_SIZE); - hasMultiByteStackArgs = true; + // TODO-CQ: Fix LowerFastTailCall (GH #12644) + hasNonEnregisterableStructs = true; } + } -#elif defined(_TARGET_ARM64_) // ARM64 - var_types hfaType = GetHfaType(argx); - bool isHfaArg = varTypeIsValidHfaType(hfaType); - size_t size = 1; - - if (isHfaArg) - { - hasHfaArg = true; - - calleeFloatArgRegCount += GetHfaCount(argx); - } - else - { - // Structs are either passed in 1 or 2 (64-bit) slots - size_t roundupSize = roundUp(typeSize, TARGET_POINTER_SIZE); - size = roundupSize / TARGET_POINTER_SIZE; + if (arg->numRegs > 0) + { +#ifndef FEATURE_ARG_SPLIT + assert(arg->numSlots == 0); +#endif // FEATURE_ARG_SPLIT - if (size > 2) - { - size = 1; - } + auto numIntRegAndFloatRegArgs = arg->getNumIntRegAndFloatRegForStructArg(); - else if (size == 2) - { - hasTwoSlotSizedStruct = true; - } + calleeArgRegCount += numIntRegAndFloatRegArgs.first; + calleeFloatArgRegCount += numIntRegAndFloatRegArgs.second; - calleeArgRegCount += size; + if ((calleeArgRegCount > 1 || calleeFloatArgRegCount > 1) || ((calleeArgRegCount + calleeFloatArgRegCount) > 0)) + { + // TODO-CQ: Fix LowerFastTailCall(GH $12644) + hasLargerThanOneStackSlotSizedStruct = true; } - -#elif defined(WINDOWS_AMD64_ABI) - - ++calleeArgRegCount; - -#endif // UNIX_AMD64_ABI - -#else - assert(!"Target platform ABI rules regarding passing struct type args in registers"); - unreached(); -#endif //_TARGET_AMD64_ || _TARGET_ARM64_ - } - else - { - hasMultiByteStackArgs = true; } } - else - { - varTypeIsFloating(argx) ? ++calleeFloatArgRegCount : ++calleeArgRegCount; - } + } - // We can break early on multiByte cases. - if (hasMultiByteStackArgs) + if (callee->HasRetBufArg()) // RetBuf + { + // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs. + + // If callee has RetBuf param, caller too must have it. + // Otherwise go the slow route. + if (info.compRetBuffArg == BAD_VAR_NUM) { - break; + reportFastTailCallDecision("Callee has RetBuf but caller does not.", 0, 0); + return false; } } @@ -7241,14 +7186,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) hasStackArgs = true; } - // Go the slow route, if it has multi-byte params. This is an implementation - // limitatio see https://github.com/dotnet/coreclr/issues/12644. - if (hasMultiByteStackArgs) - { - reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize); - return false; - } - // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then // make sure the callee's incoming arguments is less than the caller's if (hasStackArgs && (nCalleeArgs > nCallerArgs)) @@ -7283,30 +7220,21 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) hasStackArgs = true; } - // Go the slow route, if it has multi-byte params. This is an implementation - // limitation see https://github.com/dotnet/coreclr/issues/12644. - if (hasMultiByteStackArgs) - { - reportFastTailCallDecision("Will not fastTailCall hasMultiByteStackArgs", callerStackSize, calleeStackSize); - return false; - } - // Either the caller or callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not // fastTailCall. // // When either the caller or callee have multi-stlot stack arguments we cannot safely // shuffle arguments in LowerFastTailCall. See https://github.com/dotnet/coreclr/issues/12468. - if (hasStackArgs && hasTwoSlotSizedStruct) + if (hasLargerThanOneStackSlotSizedStruct && calleeStackSize) { - reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasTwoSlotSizedStruct", - callerStackSize, calleeStackSize); + reportFastTailCallDecision("Will not fastTailCall hasLargerThanOneStackSlotSizedStruct && calleeStackSize", callerStackSize, + calleeStackSize); return false; } - // Callee has an HFA struct and arguments that has to go on the stack. Do not fastTailCall. - if (calleeStackSize > 0 && hasHfaArg) + if (hasNonEnregisterableStructs) { - reportFastTailCallDecision("Will not fastTailCall calleeStackSize > 0 && hasHfaArg", callerStackSize, + reportFastTailCallDecision("Will not fastTailCall hasNonEnregisterableStructs", callerStackSize, calleeStackSize); return false; } diff --git a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs index dd2e33198b8f..fd08a2920cc5 100644 --- a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs +++ b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs @@ -844,6 +844,94 @@ public static int CallerHFaCaseCalleeStackArgs(double one, // Stack Based args. //////////////////////////////////////////////////////////////////////////// + public struct StructSizeOneNotExplicit + { + public byte a; + + public StructSizeOneNotExplicit(byte a) + { + this.a = a; + } + } + + public struct StructSizeTwoNotExplicit + { + public byte a; + public byte b; + + public StructSizeTwoNotExplicit(byte a, byte b) + { + this.a = a; + this.b = b; + } + } + + public struct StructSizeThreeNotExplicit + { + public byte a; + public byte b; + public byte c; + + public StructSizeThreeNotExplicit(byte a, byte b, byte c) + { + this.a = a; + this.b = b; + this.c = c; + } + } + + public struct StructSizeFourNotExplicit + { + public int a; + + public StructSizeFourNotExplicit(int a) + { + this.a = a; + } + } + + public struct StructSizeFiveNotExplicit + { + public int a; + public byte b; + + public StructSizeFiveNotExplicit(int a, byte b) + { + this.a = a; + this.b = b; + } + } + + public struct StructSizeSixNotExplicit + { + public int a; + public byte b; + public byte c; + + public StructSizeSixNotExplicit(int a, byte b, byte c) + { + this.a = a; + this.b = b; + this.c = c; + } + } + + public struct StructSizeSevenNotExplicit + { + public int a; + public byte b; + public byte c; + public byte d; + + public StructSizeSevenNotExplicit(int a, byte b, byte c, byte d) + { + this.a = a; + this.b = b; + this.c = c; + this.d = d; + } + } + public struct StructSizeEightNotExplicit { public long a; @@ -876,7 +964,32 @@ public StructSizeSixteenNotExplicit(long a, long b) this.a = a; this.b = b; } + } + + public struct StructSize24NotExplicit + { + public long a; + public long b; + public long c; + public StructSize24NotExplicit(long a, long b, long c) + { + this.a = a; + this.b = b; + this.c = c; + } + } + + public struct StructSize48Nested + { + public StructSize24NotExplicit a; + public StructSize24NotExplicit b; + + public StructSize48Nested(long a, long b, long c, long d, long e, long f) + { + this.a = new StructSize24NotExplicit(a, b, c); + this.b = new StructSize24NotExplicit(d, e, f); + } } /// @@ -974,10 +1087,10 @@ public static int CallerGithubIssue12468(int one, [StructLayout(LayoutKind.Explicit, Size=8, CharSet=CharSet.Ansi)] public struct StructSizeThirtyTwo { - [FieldOffset(0)] public int a; - [FieldOffset(8)] public int b; - [FieldOffset(16)] public int c; - [FieldOffset(24)] public int d; + [FieldOffset(0)] public long a; + [FieldOffset(8)] public long b; + [FieldOffset(16)] public long c; + [FieldOffset(24)] public long d; public StructSizeThirtyTwo(int a, int b, int c, int d) { @@ -991,9 +1104,9 @@ public StructSizeThirtyTwo(int a, int b, int c, int d) [StructLayout(LayoutKind.Explicit, Size=8, CharSet=CharSet.Ansi)] public struct StructSizeTwentyFour { - [FieldOffset(0)] public int a; - [FieldOffset(8)] public int b; - [FieldOffset(16)] public int c; + [FieldOffset(0)] public long a; + [FieldOffset(8)] public long b; + [FieldOffset(16)] public long c; public StructSizeTwentyFour(int a, int b, int c) { @@ -1010,6 +1123,7 @@ public StructSizeTwentyFour(int a, int b, int c) public static int StackBasedCallee(int a, int b, StructSizeThirtyTwo sstt) { int count = 0; + long max = sstt.a; for (int i = 0; i < sstt.a; ++i) { if (i % 10 == 0) @@ -1098,17 +1212,23 @@ public static StructSizeThirtyTwo DoubleCountRetBuffCallerWrapper(int a, int b) { if (a % 2 == 0) { - StructSizeEightIntNotExplicit eightBytes = new StructSizeEightIntNotExplicit(a, a); a = 1; b = b + 2; - return DoubleCountRetBuffCallee(eightBytes, eightBytes, eightBytes, eightBytes, eightBytes); + return DoubleCountRetBuffCallee(new StructSizeEightIntNotExplicit(a, a), + new StructSizeEightIntNotExplicit(a, a), + new StructSizeEightIntNotExplicit(a, a), + new StructSizeEightIntNotExplicit(a, a), + new StructSizeEightIntNotExplicit(a, a)); } else { - StructSizeEightIntNotExplicit eightBytes = new StructSizeEightIntNotExplicit(b, b); a = 4; b = b + 1; - return DoubleCountRetBuffCallee(eightBytes, eightBytes, eightBytes, eightBytes, eightBytes); + return DoubleCountRetBuffCallee(new StructSizeEightIntNotExplicit(b, b), + new StructSizeEightIntNotExplicit(b, b), + new StructSizeEightIntNotExplicit(b, b), + new StructSizeEightIntNotExplicit(b, b), + new StructSizeEightIntNotExplicit(b, b)); } } @@ -1132,7 +1252,7 @@ public static int DoubleCountRetBuffCaller(int i) { StructSizeThirtyTwo retVal = DoubleCountRetBuffCallerWrapper(4, 2); - if (retVal.b == 4.0) + if (retVal.b == 6.0) { return 100; } @@ -1145,7 +1265,7 @@ public static int DoubleCountRetBuffCaller(int i) { StructSizeThirtyTwo retVal = DoubleCountRetBuffCallerWrapper(3, 1); - if (retVal.b == 1.0) + if (retVal.b == 2.0) { return 100; } diff --git a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.csproj b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.csproj index e7e45d848566..63669431584b 100644 --- a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.csproj +++ b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.csproj @@ -24,7 +24,7 @@ - None + PdbOnly True True True From 2ffa9fb183c2b5b19ff4d7af5b857dc5223ee2a9 Mon Sep 17 00:00:00 2001 From: jashook Date: Mon, 15 Jul 2019 10:53:17 -0700 Subject: [PATCH 02/24] Address pr feedback for FastTailCallCandidates --- tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs index fd08a2920cc5..660b2185613b 100644 --- a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs +++ b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs @@ -1123,7 +1123,6 @@ public StructSizeTwentyFour(int a, int b, int c) public static int StackBasedCallee(int a, int b, StructSizeThirtyTwo sstt) { int count = 0; - long max = sstt.a; for (int i = 0; i < sstt.a; ++i) { if (i % 10 == 0) From 63d88bc083c75d30fd0d52f15248393a0efe7aa3 Mon Sep 17 00:00:00 2001 From: jashook Date: Mon, 15 Jul 2019 10:53:48 -0700 Subject: [PATCH 03/24] Update variable name based on rebase --- src/jit/morph.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 5f131be48c15..ae5c693b005c 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7161,15 +7161,15 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } const unsigned maxRegArgs = MAX_REG_ARG; - hasTwoSlotSizedStruct = hasTwoSlotSizedStruct || info.compHasMultiSlotArgs; + hasLargerThanOneStackSlotSizedStruct = hasLargerThanOneStackSlotSizedStruct || info.compHasMultiSlotArgs; -// If we reached here means that callee has only those argument types which can be passed in -// a register and if passed on stack will occupy exactly one stack slot in out-going arg area. -// If we are passing args on stack for the callee and it has more args passed on stack than -// the caller, then fast tail call cannot be performed. -// -// Note that the GC'ness of on stack args need not match since the arg setup area is marked -// as non-interruptible for fast tail calls. + // If we reached here means that callee has only those argument types which can be passed in + // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. + // If we are passing args on stack for the callee and it has more args passed on stack than + // the caller, then fast tail call cannot be performed. + // + // Note that the GC'ness of on stack args need not match since the arg setup area is marked + // as non-interruptible for fast tail calls. #ifdef WINDOWS_AMD64_ABI assert(calleeStackSize == 0); From 589a09d33159f151eec3f69aff28015acdd19514 Mon Sep 17 00:00:00 2001 From: jashook Date: Mon, 15 Jul 2019 10:57:58 -0700 Subject: [PATCH 04/24] Fix comment headers based on feedback --- src/jit/morph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index ae5c693b005c..8872ca986370 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -6954,14 +6954,14 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // callee(int, int, float, int) // // -- Callee requires stack space that is equal to the caller -- -// caller({ int, int }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack +// caller({ long, long }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack // space // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space // // -- Callee requires stack space that is less than the caller -- -// caller({ int, int }, int, { int, int }, int, { int, int }, { int, int }) 6 int register arguments, 32 byte stack +// caller({ long, long }, int, { long, long }, int, { long, long }, { long, long }) 6 int register arguments, 32 byte stack // space -// callee(int, int, int, int, int, int, { int, int } ) // 6 int register arguments, 16 byte stack space +// callee(int, int, int, int, int, int, { long, long } ) // 6 int register arguments, 16 byte stack space // // -- Callee will have all register arguments -- // caller(int) From a80b5e4b51dddd25db806cc6ddef883b2a7fdd61 Mon Sep 17 00:00:00 2001 From: jashook Date: Mon, 15 Jul 2019 11:01:41 -0700 Subject: [PATCH 05/24] More feedback addressed --- src/jit/morph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 8872ca986370..1e4a9b8ff6c4 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7072,7 +7072,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // Count user args while tracking whether any of them has a larger than one // stack slot sized requirement. This requirement is required to support - // lowering the fast tail call. Which, currently only supports copying + // lowering the fast tail call, which, currently only supports copying // stack slot arguments which have only one stack slot. // // Note that we don't need to count @@ -7080,7 +7080,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // these won't contribute to out-going arg size. // // For each struct arg, determine whether the argument would have > 1 stack - // slot if on the stack If it has > 1 stack slot we will not fastTailCall. + // slot if on the stack. If it has > 1 stack slot we will not fastTailCall. // This is an implementation limitation of LowerFastTailCall that is tracked by: // https://github.com/dotnet/coreclr/issues/12644. @@ -7090,7 +7090,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) for (unsigned index = 0; index < nCalleeArgs; ++index) { - fgArgTabEntry* arg = argInfo->GetArgEntry(index); + fgArgTabEntry* arg = argInfo->GetArgEntry(index, false); if (!arg->isStruct) { From 1c066dbe44d27f6b004bde412c8542e50e249329 Mon Sep 17 00:00:00 2001 From: jashook Date: Mon, 15 Jul 2019 11:02:44 -0700 Subject: [PATCH 06/24] Fix rebase error --- src/jit/compiler.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 9ad191fb7002..cdef769e7491 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1478,9 +1478,9 @@ struct fgArgTabEntry return { this->numRegs, 0 }; } } - - __declspec(property(get = getHfaType)) var_types hfaType; - var_types getHfaType() + + __declspec(property(get = GetHfaType)) var_types hfaType; + var_types GetHfaType() { #ifdef FEATURE_HFA return HfaTypeFromElemKind(_hfaElemKind); From 398889b986a3d2a6c4ec9ad13e30329ac2c42252 Mon Sep 17 00:00:00 2001 From: jashook Date: Mon, 15 Jul 2019 21:32:36 -0700 Subject: [PATCH 07/24] Address feedback and correctly bail on byref args --- src/jit/compiler.h | 41 ++++++++++++++---- src/jit/morph.cpp | 102 +++++++++++++++++++-------------------------- 2 files changed, 76 insertions(+), 67 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index cdef769e7491..da9148c2a6af 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1461,22 +1461,45 @@ struct fgArgTabEntry #endif } - jitstd::pair getNumIntRegAndFloatRegForStructArg() + unsigned intRegCount() { - assert (this->isStruct); - #if defined(UNIX_AMD64_ABI) - return { this->structIntRegs, this->structFloatRegs }; -#endif // UNIX_AMD64_ABI + if (this->isStruct) + { + return this->structIntRegs; + } +#endif // defined(UNIX_AMD64_ABI) - if (this->isPassedInFloatRegisters()) + if (!this->isPassedInFloatRegisters()) { - return { 0, this->numRegs }; + return this->numRegs; } - else + + return 0; + + } + + unsigned floatRegCount() + { +#if defined(UNIX_AMD64_ABI) + if (this->isStruct) + { + return this->structFloatRegs; + } +#endif // defined(UNIX_AMD64_ABI) + + if (this->isPassedInFloatRegisters()) { - return { this->numRegs, 0 }; + return this->numRegs; } + + return 0; + + } + + unsigned stackSize() + { + return (TARGET_POINTER_SIZE * this->numSlots); } __declspec(property(get = GetHfaType)) var_types hfaType; diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 1e4a9b8ff6c4..b868d4a06ddc 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7058,6 +7058,19 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // out-going area required for callee is bounded by caller's fixed argument space. // // Note that callee being a vararg method is not a problem since we can account the params being passed. + // + // We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg + // method. This is due to the ABI differences for native vararg methods for these platforms. There is + // work required to shuffle arguments to the correct locations. + +#if (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM_)) || (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)) + if (info.compIsVarArgs || callee->IsVarargs()) + { + reportFastTailCallDecision("Tail calls not supported on windows armarch.", 0, 0); + return false; + } +#endif // (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM_)) || defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)) + unsigned nCallerArgs = info.compArgsCount; unsigned nCalleeArgs = argInfo->ArgCount(); @@ -7074,10 +7087,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // stack slot sized requirement. This requirement is required to support // lowering the fast tail call, which, currently only supports copying // stack slot arguments which have only one stack slot. - // - // Note that we don't need to count - // non-standard and secret params passed in registers (e.g. R10, R11) since - // these won't contribute to out-going arg size. // // For each struct arg, determine whether the argument would have > 1 stack // slot if on the stack. If it has > 1 stack slot we will not fastTailCall. @@ -7086,71 +7095,42 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) bool hasLargerThanOneStackSlotSizedStruct = false; bool hasNonEnregisterableStructs = false; + bool hasByrefParameter = false; size_t calleeStackSize = 0; for (unsigned index = 0; index < nCalleeArgs; ++index) { fgArgTabEntry* arg = argInfo->GetArgEntry(index, false); - if (!arg->isStruct) - { - if (arg->numSlots > 0) - { - calleeStackSize += (TARGET_POINTER_SIZE * arg->numSlots); - } + unsigned argStackSize = arg->stackSize(); + unsigned argFloatRegCount = arg->floatRegCount(); + unsigned argIntRegCount = arg->intRegCount(); - else if (arg->isPassedInFloatRegisters()) - { - ++calleeFloatArgRegCount; - } - else - { - ++calleeArgRegCount; - } - - } - else - { - // Struct Arg -#ifdef FEATURE_ARG_SPLIT - if (arg->isSplit || arg->numSlots > 0) -#else - if (arg->numSlots > 0) -#endif // FEATURE_ARG_SPLIT - { - calleeStackSize += (TARGET_POINTER_SIZE * arg->numSlots); + unsigned countRegistersUsedForArg = argIntRegCount + argFloatRegCount; - if (arg->numSlots > 1) - { - // TODO-CQ: Fix LowerFastTailCall (GH #12644) - hasNonEnregisterableStructs = true; - } - } - - if (arg->numRegs > 0) - { -#ifndef FEATURE_ARG_SPLIT - assert(arg->numSlots == 0); -#endif // FEATURE_ARG_SPLIT - - auto numIntRegAndFloatRegArgs = arg->getNumIntRegAndFloatRegForStructArg(); + calleeStackSize += argStackSize; + calleeFloatArgRegCount += argFloatRegCount; + calleeArgRegCount += argIntRegCount; - calleeArgRegCount += numIntRegAndFloatRegArgs.first; - calleeFloatArgRegCount += numIntRegAndFloatRegArgs.second; + // This exists to account for special case situations where we will not + // fast tail call. + if (arg->isStruct) + { + hasNonEnregisterableStructs = argStackSize > 0 ? true : hasNonEnregisterableStructs; + hasLargerThanOneStackSlotSizedStruct = countRegistersUsedForArg > 1 ? true : hasLargerThanOneStackSlotSizedStruct; + } - if ((calleeArgRegCount > 1 || calleeFloatArgRegCount > 1) || ((calleeArgRegCount + calleeFloatArgRegCount) > 0)) - { - // TODO-CQ: Fix LowerFastTailCall(GH $12644) - hasLargerThanOneStackSlotSizedStruct = true; - } - } + // Byref arguments are not allowed to fast tail call as the information + // of the caller's stack is lost when the callee is compiled. + if (arg->passedByRef) + { + hasByrefParameter = true; + break; } } if (callee->HasRetBufArg()) // RetBuf { - // We don't increment calleeArgRegCount here, since it is already in callee->gtCallArgs. - // If callee has RetBuf param, caller too must have it. // Otherwise go the slow route. if (info.compRetBuffArg == BAD_VAR_NUM) @@ -7163,9 +7143,15 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) const unsigned maxRegArgs = MAX_REG_ARG; hasLargerThanOneStackSlotSizedStruct = hasLargerThanOneStackSlotSizedStruct || info.compHasMultiSlotArgs; + if (hasByrefParameter) + { + reportFastTailCallDecision("Callee has a byref parameter", 0, 0); + return false; + } + // If we reached here means that callee has only those argument types which can be passed in // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. - // If we are passing args on stack for the callee and it has more args passed on stack than + // If we are passing args on stack for the callee and it haas a larger stack size than // the caller, then fast tail call cannot be performed. // // Note that the GC'ness of on stack args need not match since the arg setup area is marked @@ -7186,8 +7172,8 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) hasStackArgs = true; } - // x64 Windows: If we have more callee registers used than MAX_REG_ARG, then - // make sure the callee's incoming arguments is less than the caller's + // x64 Windows: If we have stack args then make sure the callee's incoming + // arguments is less than the caller's if (hasStackArgs && (nCalleeArgs > nCallerArgs)) { reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize, @@ -7197,7 +7183,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) #elif (defined(_TARGET_AMD64_) && defined(UNIX_AMD64_ABI)) || defined(_TARGET_ARM64_) - // For *nix Amd64 and Arm64 check to see if all arguments for the callee + // For unix Amd64 and Arm64 check to see if all arguments for the callee // and caller are passing in registers. If not, ensure that the outgoing argument stack size // requirement for the callee is less than or equal to the caller's entire stack frame usage. // From 6335d6a27bd6197c58aad02b6180412292de7db5 Mon Sep 17 00:00:00 2001 From: jashook Date: Mon, 15 Jul 2019 22:14:55 -0700 Subject: [PATCH 08/24] Restrict to only bail on byref struct args --- src/jit/morph.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index b868d4a06ddc..a5073c420d97 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7118,14 +7118,14 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) { hasNonEnregisterableStructs = argStackSize > 0 ? true : hasNonEnregisterableStructs; hasLargerThanOneStackSlotSizedStruct = countRegistersUsedForArg > 1 ? true : hasLargerThanOneStackSlotSizedStruct; - } - // Byref arguments are not allowed to fast tail call as the information - // of the caller's stack is lost when the callee is compiled. - if (arg->passedByRef) - { - hasByrefParameter = true; - break; + // Byref struct arguments are not allowed to fast tail call as the information + // of the caller's stack is lost when the callee is compiled. + if (arg->passedByRef) + { + hasByrefParameter = true; + break; + } } } From f63738e4b82843a998c45600d86950e7a04a760b Mon Sep 17 00:00:00 2001 From: jashook Date: Tue, 16 Jul 2019 08:17:31 -0700 Subject: [PATCH 09/24] Correctly used computations from fgArgInfo --- src/jit/morph.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index a5073c420d97..cb5b1721a0f6 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7158,11 +7158,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // as non-interruptible for fast tail calls. #ifdef WINDOWS_AMD64_ABI - assert(calleeStackSize == 0); - size_t calleeStackSlots = ((calleeArgRegCount + calleeFloatArgRegCount) > maxRegArgs) - ? (calleeArgRegCount + calleeFloatArgRegCount) - maxRegArgs - : 0; - calleeStackSize = calleeStackSlots * TARGET_POINTER_SIZE; size_t callerStackSize = info.compArgStackSize; bool hasStackArgs = false; @@ -7191,15 +7186,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // that we are not dealing with structs that are >8 bytes. bool hasStackArgs = false; - size_t maxFloatRegArgs = MAX_FLOAT_REG_ARG; - - size_t calleeIntStackArgCount = calleeArgRegCount > maxRegArgs ? calleeArgRegCount - maxRegArgs : 0; - size_t calleeFloatStackArgCount = - calleeFloatArgRegCount > maxFloatRegArgs ? calleeFloatArgRegCount - maxFloatRegArgs : 0; - - size_t calleeStackArgCount = calleeIntStackArgCount + calleeFloatStackArgCount; size_t callerStackSize = info.compArgStackSize; - calleeStackSize += calleeStackArgCount * TARGET_POINTER_SIZE; if (callerStackSize > 0 || calleeStackSize > 0) { From f61485b02aa666daf86615d33af27de6506bc8b4 Mon Sep 17 00:00:00 2001 From: jashook Date: Tue, 16 Jul 2019 15:55:08 -0700 Subject: [PATCH 10/24] Correctly re-init arg infor for explicit tailcalls This is required because morphTailCall modifies the argument list. It would be nice in the future to go through each argument on subsequent calls to fgInitArgInfo to verify there have been no changes. --- src/jit/compiler.h | 2 +- src/jit/morph.cpp | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index da9148c2a6af..167a80695a4a 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -5372,7 +5372,7 @@ class Compiler GenTree* fgMorphCast(GenTree* tree); GenTree* fgUnwrapProxy(GenTree* objRef); GenTreeFieldList* fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl); - void fgInitArgInfo(GenTreeCall* call); + void fgInitArgInfo(GenTreeCall* call, bool reInitArgInfo = false); GenTreeCall* fgMorphArgs(GenTreeCall* call); GenTreeArgList* fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac); diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index cb5b1721a0f6..7eace2242aa3 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -2588,7 +2588,10 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE // fgInitArgInfo: Construct the fgArgInfo for the call with the fgArgEntry for each arg // // Arguments: -// callNode - the call for which we are generating the fgArgInfo +// callNode - the call for which we are generating the fgArgInfo +// reInitArgInfo - force the argInfo to be regenerated. This is only useful +// if the arguments have changed between the first and +// subsequent calls. Defaults to false. // // Return Value: // None @@ -2599,7 +2602,7 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE // This method only computes the arg table and arg entries for the call (the fgArgInfo), // and makes no modification of the args themselves. // -void Compiler::fgInitArgInfo(GenTreeCall* call) +void Compiler::fgInitArgInfo(GenTreeCall* call, bool reInitArgInfo/*=false*/) { GenTree* args; GenTree* argx; @@ -2623,7 +2626,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number #endif - if (call->fgArgInfo != nullptr) + if (call->fgArgInfo != nullptr && !reInitArgInfo) { // We've already initialized and set the fgArgInfo. return; @@ -7626,6 +7629,9 @@ void Compiler::fgMorphTailCall(GenTreeCall* call, void* pfnCopyArgs) // The function is responsible for doing explicit null check when it is necessary. assert(!call->NeedsNullCheck()); + // Force, re-evaluating the args as we have just changed the argument list. + fgInitArgInfo(call, /*reInitArgInfo=*/true); + JITDUMP("fgMorphTailCall (after):\n"); DISPTREE(call); } @@ -8152,7 +8158,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) szFailReason = "Method with non-standard args passed in callee trash register cannot be tail " "called via helper"; } -#ifdef _TARGET_ARM64_ +#if defined(_TARGET_ARM64_) || defined(_TARGET_UNIX_) else { // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER. From a345485bae1db0cd050ce6c9322b90a4a94a6e8d Mon Sep 17 00:00:00 2001 From: jashook Date: Wed, 17 Jul 2019 23:30:46 -0700 Subject: [PATCH 11/24] Fix x86 assert --- src/jit/gentree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 2093758cdea2..2eb95e4ae379 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -1079,7 +1079,7 @@ bool GenTreeCall::AreArgsComplete() const return true; } -#if FEATURE_FASTTAILCALL +#if defined(FEATURE_FASTTAILCALL) || defined(FEATURE_TAILCALL) // If the args are not complete and fgArgInfo is not null. Then most // most likely the argInfo has been populated in fgCanFastTailCall and // we are just re-querying this information in fgMorphArgs. From 9a24d4699b0a26c1c632743d709925406b4ebbfd Mon Sep 17 00:00:00 2001 From: jashook Date: Thu, 18 Jul 2019 10:03:22 -0700 Subject: [PATCH 12/24] Address PR feedback related to recomputing initArgs --- src/jit/compiler.h | 2 +- src/jit/morph.cpp | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 167a80695a4a..da9148c2a6af 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -5372,7 +5372,7 @@ class Compiler GenTree* fgMorphCast(GenTree* tree); GenTree* fgUnwrapProxy(GenTree* objRef); GenTreeFieldList* fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl); - void fgInitArgInfo(GenTreeCall* call, bool reInitArgInfo = false); + void fgInitArgInfo(GenTreeCall* call); GenTreeCall* fgMorphArgs(GenTreeCall* call); GenTreeArgList* fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac); diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 7eace2242aa3..e02ea959cc39 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -2602,7 +2602,7 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE // This method only computes the arg table and arg entries for the call (the fgArgInfo), // and makes no modification of the args themselves. // -void Compiler::fgInitArgInfo(GenTreeCall* call, bool reInitArgInfo/*=false*/) +void Compiler::fgInitArgInfo(GenTreeCall* call) { GenTree* args; GenTree* argx; @@ -2626,7 +2626,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call, bool reInitArgInfo/*=false*/) const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number #endif - if (call->fgArgInfo != nullptr && !reInitArgInfo) + if (call->fgArgInfo != nullptr) { // We've already initialized and set the fgArgInfo. return; @@ -7629,9 +7629,6 @@ void Compiler::fgMorphTailCall(GenTreeCall* call, void* pfnCopyArgs) // The function is responsible for doing explicit null check when it is necessary. assert(!call->NeedsNullCheck()); - // Force, re-evaluating the args as we have just changed the argument list. - fgInitArgInfo(call, /*reInitArgInfo=*/true); - JITDUMP("fgMorphTailCall (after):\n"); DISPTREE(call); } @@ -8234,6 +8231,9 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) // temp = call // ret temp + // Force re-evaluating the argInfo as the return argument has changed. + call->fgArgInfo = nullptr; + // Create a new temp. unsigned tmpNum = lvaGrabTemp(false DEBUGARG("Return value temp for multi-reg return (rejected tail call).")); @@ -8321,6 +8321,9 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) // fast calls. if (!canFastTailCall) { + // Force re-evaluating the argInfo. fgMorphTailCall will modify the + // argument list, invalidating the argInfo. + call->fgArgInfo = nullptr; fgMorphTailCall(call, pfnCopyArgs); } @@ -8603,6 +8606,8 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) && !(dest->OperGet() == GT_LCL_VAR && dest->gtLclVar.gtLclNum == info.compRetBuffArg)) { + // Force re-evaluating the argInfo as the return argument has changed. + call->fgArgInfo = nullptr; origDest = dest; retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg")); From f1f3ac62d93c96dbfe1d115ee81921060b24b375 Mon Sep 17 00:00:00 2001 From: jashook Date: Thu, 18 Jul 2019 10:22:08 -0700 Subject: [PATCH 13/24] Fix comment --- src/jit/morph.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index e02ea959cc39..daf88b542ec9 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -2588,10 +2588,7 @@ GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE // fgInitArgInfo: Construct the fgArgInfo for the call with the fgArgEntry for each arg // // Arguments: -// callNode - the call for which we are generating the fgArgInfo -// reInitArgInfo - force the argInfo to be regenerated. This is only useful -// if the arguments have changed between the first and -// subsequent calls. Defaults to false. +// callNode - the call for which we are generating the fgArgInfo // // Return Value: // None From 7abbb6035506e1eec30eb3ebf7fcfae7659379f0 Mon Sep 17 00:00:00 2001 From: jashook Date: Thu, 18 Jul 2019 10:29:36 -0700 Subject: [PATCH 14/24] Update fgCanFastTailCall header --- src/jit/morph.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index daf88b542ec9..9775c3e23fc7 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -6898,7 +6898,7 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // caller(int, int, int, int) // callee(int, int, float, int) // -// -- Callee requires stack space that is equal to the caller -- +// -- Callee requires stack space that is equal or less than the caller -- // caller(struct, struct, struct, struct, struct, struct) // callee(int, int, int, int, int, int) // @@ -6916,6 +6916,10 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // caller(struct, double, struct, float, struct, struct) // callee(int, int, int, int, int, double, double, double) // +// -- Callee has a byref struct argument -- +// caller(int, int, int) +// callee(struct(size 3 bytes)) +// // Unix Amd64 && Arm64: // A fastTailCall decision can be made whenever the callee's stack space is // less than or equal to the caller's stack space. There are many permutations @@ -6941,12 +6945,6 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // decision of do not fast tail call is taken. This limitations should be // removed if/when fgMorphArgs no longer depends on fgCanFastTailCall. // -// 4) Arm64 Only, if there are HFA arguments and the callee has stack -// arguments, the decision will be reported as cannot fast tail call. -// This is because before fgMorphArgs is done, the struct is unknown whether it -// will be placed on the stack or enregistered. Therefore, the conservative -// decision of do not fast tail call is taken. -// // Can fast tail call examples (amd64 Unix): // // -- Callee will have all register arguments -- From 4a408ef19cbd3f85309f3841bb8b53d79164651c Mon Sep 17 00:00:00 2001 From: jashook Date: Thu, 18 Jul 2019 10:38:15 -0700 Subject: [PATCH 15/24] Address PR feedback --- src/jit/gentree.cpp | 8 ++++---- src/jit/morph.cpp | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 2eb95e4ae379..35cfd3c571e0 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -1079,10 +1079,10 @@ bool GenTreeCall::AreArgsComplete() const return true; } -#if defined(FEATURE_FASTTAILCALL) || defined(FEATURE_TAILCALL) - // If the args are not complete and fgArgInfo is not null. Then most - // most likely the argInfo has been populated in fgCanFastTailCall and - // we are just re-querying this information in fgMorphArgs. +#if defined(FEATURE_FASTTAILCALL) + // If we have FEATURE_FASTTAILCALL, 'fgCanFastTailCall()' can call 'fgInitArgInfo()', and in that + // scenario it is valid to have 'fgArgInfo' be non-null when 'fgMorphArgs()' first queries this, + // when it hasn't yet morphed the arguments. #else assert(gtCallArgs == nullptr); #endif diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 9775c3e23fc7..adca4b0bd0b9 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -5129,6 +5129,8 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, // struct parameters if they are passed as arguments to a tail call. if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt(RCS_EARLY) == 1) && !fgMightHaveLoop()) { + assert(!call->IsTailCall()); + varDsc->setLvRefCnt(0, RCS_EARLY); args->gtOp.gtOp1 = lcl; argEntry->node = lcl; From 903f89a40c0833b48b04e4dfaec16759f706f0fa Mon Sep 17 00:00:00 2001 From: jashook Date: Thu, 18 Jul 2019 11:34:58 -0700 Subject: [PATCH 16/24] Apply format patch --- src/jit/compiler.h | 6 ++-- src/jit/gentree.cpp | 8 ++--- src/jit/morph.cpp | 80 ++++++++++++++++++++++++--------------------- 3 files changed, 48 insertions(+), 46 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index da9148c2a6af..c27bab3799fc 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1476,9 +1476,8 @@ struct fgArgTabEntry } return 0; - } - + unsigned floatRegCount() { #if defined(UNIX_AMD64_ABI) @@ -1494,14 +1493,13 @@ struct fgArgTabEntry } return 0; - } unsigned stackSize() { return (TARGET_POINTER_SIZE * this->numSlots); } - + __declspec(property(get = GetHfaType)) var_types hfaType; var_types GetHfaType() { diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 35cfd3c571e0..e8f0717c1e87 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -1078,11 +1078,11 @@ bool GenTreeCall::AreArgsComplete() const assert((gtCallLateArgs != nullptr) || !fgArgInfo->HasRegArgs()); return true; } - + #if defined(FEATURE_FASTTAILCALL) - // If we have FEATURE_FASTTAILCALL, 'fgCanFastTailCall()' can call 'fgInitArgInfo()', and in that - // scenario it is valid to have 'fgArgInfo' be non-null when 'fgMorphArgs()' first queries this, - // when it hasn't yet morphed the arguments. +// If we have FEATURE_FASTTAILCALL, 'fgCanFastTailCall()' can call 'fgInitArgInfo()', and in that +// scenario it is valid to have 'fgArgInfo' be non-null when 'fgMorphArgs()' first queries this, +// when it hasn't yet morphed the arguments. #else assert(gtCallArgs == nullptr); #endif diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index adca4b0bd0b9..cd9150108f5c 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -1184,8 +1184,8 @@ fgArgTabEntry* fgArgInfo::AddRegArg(unsigned fgArgTabEntry* curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment, isStruct, isVararg); assert(curArgTabEntry != nullptr); - curArgTabEntry->isStruct = isStruct; // is this a struct arg - curArgTabEntry->structIntRegs = structIntRegs; + curArgTabEntry->isStruct = isStruct; // is this a struct arg + curArgTabEntry->structIntRegs = structIntRegs; curArgTabEntry->structFloatRegs = structFloatRegs; curArgTabEntry->checkIsStruct(); @@ -2962,7 +2962,8 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // This is a register argument - put it in the table. call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1, false, - callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(0) UNIX_AMD64_ABI_ONLY_ARG(0) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); + callIsVararg UNIX_AMD64_ABI_ONLY_ARG(REG_STK) UNIX_AMD64_ABI_ONLY_ARG(0) + UNIX_AMD64_ABI_ONLY_ARG(0) UNIX_AMD64_ABI_ONLY_ARG(nullptr)); intArgRegNum++; #ifdef WINDOWS_AMD64_ABI @@ -3564,10 +3565,10 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) // This is a register argument - put it in the table newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign, isStructArg, - callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) + callIsVararg UNIX_AMD64_ABI_ONLY_ARG(nextOtherRegNum) UNIX_AMD64_ABI_ONLY_ARG(structIntRegs) - UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) - UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); + UNIX_AMD64_ABI_ONLY_ARG(structFloatRegs) + UNIX_AMD64_ABI_ONLY_ARG(&structDesc)); newArgEntry->SetIsBackFilled(isBackFilled); newArgEntry->isNonStandard = isNonStandard; @@ -6954,12 +6955,14 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // callee(int, int, float, int) // // -- Callee requires stack space that is equal to the caller -- -// caller({ long, long }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte stack +// caller({ long, long }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte +// stack // space // callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space // // -- Callee requires stack space that is less than the caller -- -// caller({ long, long }, int, { long, long }, int, { long, long }, { long, long }) 6 int register arguments, 32 byte stack +// caller({ long, long }, int, { long, long }, int, { long, long }, { long, long }) 6 int register arguments, 32 byte +// stack // space // callee(int, int, int, int, int, int, { long, long } ) // 6 int register arguments, 16 byte stack space // @@ -7051,17 +7054,17 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) #endif // DEBUG }; - // Note on vararg methods: - // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. - // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its - // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as - // out-going area required for callee is bounded by caller's fixed argument space. - // - // Note that callee being a vararg method is not a problem since we can account the params being passed. - // - // We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg - // method. This is due to the ABI differences for native vararg methods for these platforms. There is - // work required to shuffle arguments to the correct locations. +// Note on vararg methods: +// If the caller is vararg method, we don't know the number of arguments passed by caller's caller. +// But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its +// fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as +// out-going area required for callee is bounded by caller's fixed argument space. +// +// Note that callee being a vararg method is not a problem since we can account the params being passed. +// +// We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg +// method. This is due to the ABI differences for native vararg methods for these platforms. There is +// work required to shuffle arguments to the correct locations. #if (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM_)) || (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)) if (info.compIsVarArgs || callee->IsVarargs()) @@ -7087,9 +7090,9 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // stack slot sized requirement. This requirement is required to support // lowering the fast tail call, which, currently only supports copying // stack slot arguments which have only one stack slot. - // - // For each struct arg, determine whether the argument would have > 1 stack - // slot if on the stack. If it has > 1 stack slot we will not fastTailCall. + // + // For each struct arg, determine whether the argument would have > 1 stack + // slot if on the stack. If it has > 1 stack slot we will not fastTailCall. // This is an implementation limitation of LowerFastTailCall that is tracked by: // https://github.com/dotnet/coreclr/issues/12644. @@ -7102,9 +7105,9 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) { fgArgTabEntry* arg = argInfo->GetArgEntry(index, false); - unsigned argStackSize = arg->stackSize(); + unsigned argStackSize = arg->stackSize(); unsigned argFloatRegCount = arg->floatRegCount(); - unsigned argIntRegCount = arg->intRegCount(); + unsigned argIntRegCount = arg->intRegCount(); unsigned countRegistersUsedForArg = argIntRegCount + argFloatRegCount; @@ -7117,7 +7120,8 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) if (arg->isStruct) { hasNonEnregisterableStructs = argStackSize > 0 ? true : hasNonEnregisterableStructs; - hasLargerThanOneStackSlotSizedStruct = countRegistersUsedForArg > 1 ? true : hasLargerThanOneStackSlotSizedStruct; + hasLargerThanOneStackSlotSizedStruct = + countRegistersUsedForArg > 1 ? true : hasLargerThanOneStackSlotSizedStruct; // Byref struct arguments are not allowed to fast tail call as the information // of the caller's stack is lost when the callee is compiled. @@ -7140,7 +7144,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } } - const unsigned maxRegArgs = MAX_REG_ARG; + const unsigned maxRegArgs = MAX_REG_ARG; hasLargerThanOneStackSlotSizedStruct = hasLargerThanOneStackSlotSizedStruct || info.compHasMultiSlotArgs; if (hasByrefParameter) @@ -7149,13 +7153,13 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) return false; } - // If we reached here means that callee has only those argument types which can be passed in - // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. - // If we are passing args on stack for the callee and it haas a larger stack size than - // the caller, then fast tail call cannot be performed. - // - // Note that the GC'ness of on stack args need not match since the arg setup area is marked - // as non-interruptible for fast tail calls. +// If we reached here means that callee has only those argument types which can be passed in +// a register and if passed on stack will occupy exactly one stack slot in out-going arg area. +// If we are passing args on stack for the callee and it haas a larger stack size than +// the caller, then fast tail call cannot be performed. +// +// Note that the GC'ness of on stack args need not match since the arg setup area is marked +// as non-interruptible for fast tail calls. #ifdef WINDOWS_AMD64_ABI size_t callerStackSize = info.compArgStackSize; @@ -7167,7 +7171,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) hasStackArgs = true; } - // x64 Windows: If we have stack args then make sure the callee's incoming + // x64 Windows: If we have stack args then make sure the callee's incoming // arguments is less than the caller's if (hasStackArgs && (nCalleeArgs > nCallerArgs)) { @@ -7186,7 +7190,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // that we are not dealing with structs that are >8 bytes. bool hasStackArgs = false; - size_t callerStackSize = info.compArgStackSize; + size_t callerStackSize = info.compArgStackSize; if (callerStackSize > 0 || calleeStackSize > 0) { @@ -7200,8 +7204,8 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // shuffle arguments in LowerFastTailCall. See https://github.com/dotnet/coreclr/issues/12468. if (hasLargerThanOneStackSlotSizedStruct && calleeStackSize) { - reportFastTailCallDecision("Will not fastTailCall hasLargerThanOneStackSlotSizedStruct && calleeStackSize", callerStackSize, - calleeStackSize); + reportFastTailCallDecision("Will not fastTailCall hasLargerThanOneStackSlotSizedStruct && calleeStackSize", + callerStackSize, calleeStackSize); return false; } @@ -8605,7 +8609,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) { // Force re-evaluating the argInfo as the return argument has changed. call->fgArgInfo = nullptr; - origDest = dest; + origDest = dest; retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg")); lvaSetStruct(retValTmpNum, structHnd, true); From b4567f45a8fe36680f261a1389c2dfa2b15a7ef0 Mon Sep 17 00:00:00 2001 From: jashook Date: Thu, 18 Jul 2019 19:39:07 -0700 Subject: [PATCH 17/24] Correctly reset arginfo after morphtailcall --- src/jit/morph.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index cd9150108f5c..f92d66a76217 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -8322,10 +8322,11 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) // fast calls. if (!canFastTailCall) { + fgMorphTailCall(call, pfnCopyArgs); + // Force re-evaluating the argInfo. fgMorphTailCall will modify the // argument list, invalidating the argInfo. call->fgArgInfo = nullptr; - fgMorphTailCall(call, pfnCopyArgs); } // Implementation note : If we optimize tailcall to do a direct jump From e877c25a8befd7bcbfc98978e262bc191e576efd Mon Sep 17 00:00:00 2001 From: jashook Date: Wed, 24 Jul 2019 22:23:12 +0000 Subject: [PATCH 18/24] Commit does several small things, and fixes a bug Addresses PR feedback Removes dead code Changes to bail a fast tail call when the caller has a multi slot struct --- src/jit/compiler.h | 25 ++++++- src/jit/lclvars.cpp | 5 ++ src/jit/morph.cpp | 73 ++++++++----------- .../FastTailCall/FastTailCallCandidates.cs | 4 +- 4 files changed, 61 insertions(+), 46 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index c27bab3799fc..e2e2579ec5b7 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1330,7 +1330,7 @@ struct fgArgTabEntry #if defined(UNIX_AMD64_ABI) // Unix amd64 will split floating point types and integer types in structs // between floating point and general purpose registers. Keep track of that - // information so we do not need to re-compute it later. + // information so we do not need to recompute it later. unsigned structIntRegs; unsigned structFloatRegs; #endif // UNIX_AMD64_ABI @@ -1466,12 +1466,23 @@ struct fgArgTabEntry #if defined(UNIX_AMD64_ABI) if (this->isStruct) { + if (this->numSlots > 0) + { + return 0; + } return this->structIntRegs; } #endif // defined(UNIX_AMD64_ABI) if (!this->isPassedInFloatRegisters()) { +#if !defined(FEATURE_ARG_SPLIT) + if (this->numSlots > 0) + { + return 0; + } +#endif // !(FEATURE_ARG_SPLIT) + return this->numRegs; } @@ -1483,12 +1494,24 @@ struct fgArgTabEntry #if defined(UNIX_AMD64_ABI) if (this->isStruct) { + if (this->numSlots > 0) + { + return 0; + } + return this->structFloatRegs; } #endif // defined(UNIX_AMD64_ABI) if (this->isPassedInFloatRegisters()) { +#if !defined(FEATURE_ARG_SPLIT) + if (this->numSlots > 0) + { + return 0; + } +#endif // !(FEATURE_ARG_SPLIT) + return this->numRegs; } diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 70cc7fe1757c..399c80f5432f 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -1003,6 +1003,11 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) #endif // _TARGET_XXX_ + if (cSlots > 1) + { + varDscInfo->hasMultiSlotStruct = true; + } + #if FEATURE_FASTTAILCALL if (cSlots > 1) { diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index f92d66a76217..e730836043e3 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7069,23 +7069,11 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) #if (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM_)) || (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)) if (info.compIsVarArgs || callee->IsVarargs()) { - reportFastTailCallDecision("Tail calls not supported on windows armarch.", 0, 0); + reportFastTailCallDecision("Fast tail calls with varargs not supported on Windows ARM/ARM64", 0, 0); return false; } #endif // (defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM_)) || defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)) - unsigned nCallerArgs = info.compArgsCount; - unsigned nCalleeArgs = argInfo->ArgCount(); - - size_t callerArgRegCount = codeGen->intRegState.rsCalleeRegArgCount; - size_t callerFloatArgRegCount = codeGen->floatRegState.rsCalleeRegArgCount; - - // Count the callee args including implicit and hidden. - // Note that GenericContext and VarargCookie are added by importer while - // importing the call to gtCallArgs list along with explicit user args. - size_t calleeArgRegCount = 0; - size_t calleeFloatArgRegCount = 0; - // Count user args while tracking whether any of them has a larger than one // stack slot sized requirement. This requirement is required to support // lowering the fast tail call, which, currently only supports copying @@ -7101,7 +7089,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) bool hasByrefParameter = false; size_t calleeStackSize = 0; - for (unsigned index = 0; index < nCalleeArgs; ++index) + for (unsigned index = 0; index < argInfo->ArgCount(); ++index) { fgArgTabEntry* arg = argInfo->GetArgEntry(index, false); @@ -7109,19 +7097,30 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) unsigned argFloatRegCount = arg->floatRegCount(); unsigned argIntRegCount = arg->intRegCount(); +#if !defined(FEATURE_ARG_SPLIT) + if (argStackSize > 0) + { + assert(argFloatRegCount == 0 && argIntRegCount == 0); + } +#endif // !defined(FEATURE_ARG_SPLIT) + unsigned countRegistersUsedForArg = argIntRegCount + argFloatRegCount; calleeStackSize += argStackSize; - calleeFloatArgRegCount += argFloatRegCount; - calleeArgRegCount += argIntRegCount; // This exists to account for special case situations where we will not // fast tail call. if (arg->isStruct) { - hasNonEnregisterableStructs = argStackSize > 0 ? true : hasNonEnregisterableStructs; - hasLargerThanOneStackSlotSizedStruct = - countRegistersUsedForArg > 1 ? true : hasLargerThanOneStackSlotSizedStruct; + if (argStackSize > 0) + { + hasNonEnregisterableStructs = true; + } + + if (countRegistersUsedForArg > 1) + { + hasLargerThanOneStackSlotSizedStruct = true; + } // Byref struct arguments are not allowed to fast tail call as the information // of the caller's stack is lost when the callee is compiled. @@ -7145,8 +7144,15 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } const unsigned maxRegArgs = MAX_REG_ARG; + size_t callerStackSize = info.compArgStackSize; hasLargerThanOneStackSlotSizedStruct = hasLargerThanOneStackSlotSizedStruct || info.compHasMultiSlotArgs; + bool hasStackArgs = false; + if (callerStackSize > 0 || calleeStackSize > 0) + { + hasStackArgs = true; + } + if (hasByrefParameter) { reportFastTailCallDecision("Callee has a byref parameter", 0, 0); @@ -7155,15 +7161,13 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // If we reached here means that callee has only those argument types which can be passed in // a register and if passed on stack will occupy exactly one stack slot in out-going arg area. -// If we are passing args on stack for the callee and it haas a larger stack size than +// If we are passing args on stack for the callee and it has a larger stack size than // the caller, then fast tail call cannot be performed. // // Note that the GC'ness of on stack args need not match since the arg setup area is marked // as non-interruptible for fast tail calls. #ifdef WINDOWS_AMD64_ABI - size_t callerStackSize = info.compArgStackSize; - bool hasStackArgs = false; if (callerStackSize > 0 || calleeStackSize > 0) @@ -7173,9 +7177,9 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // x64 Windows: If we have stack args then make sure the callee's incoming // arguments is less than the caller's - if (hasStackArgs && (nCalleeArgs > nCallerArgs)) + if (hasStackArgs && (calleeStackSize > callerStackSize)) { - reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize, + reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (calleeStackSize > callerStackSize)", callerStackSize, calleeStackSize); return false; } @@ -7189,20 +7193,12 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // Also, in the case that we have to pass arguments on the stack make sure // that we are not dealing with structs that are >8 bytes. - bool hasStackArgs = false; - size_t callerStackSize = info.compArgStackSize; - - if (callerStackSize > 0 || calleeStackSize > 0) - { - hasStackArgs = true; - } - // Either the caller or callee has a >8 and <=16 byte struct and arguments that has to go on the stack. Do not // fastTailCall. // // When either the caller or callee have multi-stlot stack arguments we cannot safely // shuffle arguments in LowerFastTailCall. See https://github.com/dotnet/coreclr/issues/12468. - if (hasLargerThanOneStackSlotSizedStruct && calleeStackSize) + if (hasLargerThanOneStackSlotSizedStruct && calleeStackSize > 0) { reportFastTailCallDecision("Will not fastTailCall hasLargerThanOneStackSlotSizedStruct && calleeStackSize", callerStackSize, calleeStackSize); @@ -7223,16 +7219,9 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // not true in many cases on x64 linux, remove this pessimization when // LowerFastTailCall is fixed. See https://github.com/dotnet/coreclr/issues/12468 // for more information. - if (hasStackArgs && (nCalleeArgs > nCallerArgs)) - { - reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (nCalleeArgs > nCallerArgs)", callerStackSize, - calleeStackSize); - return false; - } - - if (calleeStackSize > callerStackSize) + if (hasStackArgs && (calleeStackSize > callerStackSize)) { - reportFastTailCallDecision("Will not fastTailCall calleeStackSize > callerStackSize", callerStackSize, + reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (calleeStackSize > callerStackSize)", callerStackSize, calleeStackSize); return false; } diff --git a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs index 660b2185613b..cda88e280750 100644 --- a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs +++ b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs @@ -1092,7 +1092,7 @@ public struct StructSizeThirtyTwo [FieldOffset(16)] public long c; [FieldOffset(24)] public long d; - public StructSizeThirtyTwo(int a, int b, int c, int d) + public StructSizeThirtyTwo(long a, long b, long c, long d) { this.a = a; this.b = b; @@ -1212,7 +1212,6 @@ public static StructSizeThirtyTwo DoubleCountRetBuffCallerWrapper(int a, int b) if (a % 2 == 0) { a = 1; - b = b + 2; return DoubleCountRetBuffCallee(new StructSizeEightIntNotExplicit(a, a), new StructSizeEightIntNotExplicit(a, a), new StructSizeEightIntNotExplicit(a, a), @@ -1221,7 +1220,6 @@ public static StructSizeThirtyTwo DoubleCountRetBuffCallerWrapper(int a, int b) } else { - a = 4; b = b + 1; return DoubleCountRetBuffCallee(new StructSizeEightIntNotExplicit(b, b), new StructSizeEightIntNotExplicit(b, b), From 84bc0c9a49428e4969d18be79455eef4019b5ed2 Mon Sep 17 00:00:00 2001 From: jashook Date: Wed, 24 Jul 2019 23:01:16 +0000 Subject: [PATCH 19/24] Fix windows build --- src/jit/morph.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index e730836043e3..0fd7ed050041 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -7168,13 +7168,6 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // as non-interruptible for fast tail calls. #ifdef WINDOWS_AMD64_ABI - bool hasStackArgs = false; - - if (callerStackSize > 0 || calleeStackSize > 0) - { - hasStackArgs = true; - } - // x64 Windows: If we have stack args then make sure the callee's incoming // arguments is less than the caller's if (hasStackArgs && (calleeStackSize > callerStackSize)) From ef7f4d0a3800927d9461d307cba7d63a0eb3f081 Mon Sep 17 00:00:00 2001 From: jashook Date: Wed, 24 Jul 2019 23:04:49 +0000 Subject: [PATCH 20/24] Address pr feedback --- src/jit/compiler.h | 23 ----------------------- src/jit/morph.cpp | 4 ++++ 2 files changed, 4 insertions(+), 23 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index e2e2579ec5b7..9a7e06447f75 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -1466,23 +1466,12 @@ struct fgArgTabEntry #if defined(UNIX_AMD64_ABI) if (this->isStruct) { - if (this->numSlots > 0) - { - return 0; - } return this->structIntRegs; } #endif // defined(UNIX_AMD64_ABI) if (!this->isPassedInFloatRegisters()) { -#if !defined(FEATURE_ARG_SPLIT) - if (this->numSlots > 0) - { - return 0; - } -#endif // !(FEATURE_ARG_SPLIT) - return this->numRegs; } @@ -1494,24 +1483,12 @@ struct fgArgTabEntry #if defined(UNIX_AMD64_ABI) if (this->isStruct) { - if (this->numSlots > 0) - { - return 0; - } - return this->structFloatRegs; } #endif // defined(UNIX_AMD64_ABI) if (this->isPassedInFloatRegisters()) { -#if !defined(FEATURE_ARG_SPLIT) - if (this->numSlots > 0) - { - return 0; - } -#endif // !(FEATURE_ARG_SPLIT) - return this->numRegs; } diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 0fd7ed050041..d1fbec33d3d6 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -1223,6 +1223,10 @@ fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, curArgTabEntry->parent = parent; curArgTabEntry->slotNum = nextSlotNum; curArgTabEntry->numRegs = 0; +#if defined (UNIX_AMD64_ABI) + curArgTabEntry->structIntRegs = 0; + curArgTabEntry->structFloatRegs = 0; +#endif // defined(UNIX_AMD64_ABI) curArgTabEntry->numSlots = numSlots; curArgTabEntry->alignment = alignment; curArgTabEntry->lateArgInx = UINT_MAX; From 799468d149f85e044c2c2db89a9ae77d5631bac6 Mon Sep 17 00:00:00 2001 From: jashook Date: Thu, 25 Jul 2019 04:10:10 +0000 Subject: [PATCH 21/24] Fix arm and x86 builds --- src/jit/lclvars.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 399c80f5432f..6302e267aef6 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -1003,6 +1003,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) #endif // _TARGET_XXX_ +#if FEATURE_FASTTAILCALL if (cSlots > 1) { varDscInfo->hasMultiSlotStruct = true; From f7b0d280ac65cab665039ec5e8bca22764a96d34 Mon Sep 17 00:00:00 2001 From: jashook Date: Thu, 25 Jul 2019 11:13:47 -0700 Subject: [PATCH 22/24] apply format patch and remove #25885 --- src/jit/lclvars.cpp | 8 +------- src/jit/morph.cpp | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 6302e267aef6..cb68505ea0b3 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -1008,13 +1008,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) { varDscInfo->hasMultiSlotStruct = true; } - -#if FEATURE_FASTTAILCALL - if (cSlots > 1) - { - varDscInfo->hasMultiSlotStruct = true; - } - + varDscInfo->stackArgSize += roundUp(argSize, TARGET_POINTER_SIZE); #endif // FEATURE_FASTTAILCALL } diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index d1fbec33d3d6..b91d40845942 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -1217,13 +1217,13 @@ fgArgTabEntry* fgArgInfo::AddStkArg(unsigned argNum, nextSlotNum = roundUp(nextSlotNum, alignment); curArgTabEntry->setRegNum(0, REG_STK); - curArgTabEntry->argNum = argNum; - curArgTabEntry->node = node; - curArgTabEntry->argType = node->TypeGet(); - curArgTabEntry->parent = parent; - curArgTabEntry->slotNum = nextSlotNum; - curArgTabEntry->numRegs = 0; -#if defined (UNIX_AMD64_ABI) + curArgTabEntry->argNum = argNum; + curArgTabEntry->node = node; + curArgTabEntry->argType = node->TypeGet(); + curArgTabEntry->parent = parent; + curArgTabEntry->slotNum = nextSlotNum; + curArgTabEntry->numRegs = 0; +#if defined(UNIX_AMD64_ABI) curArgTabEntry->structIntRegs = 0; curArgTabEntry->structFloatRegs = 0; #endif // defined(UNIX_AMD64_ABI) @@ -7148,10 +7148,10 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) } const unsigned maxRegArgs = MAX_REG_ARG; - size_t callerStackSize = info.compArgStackSize; + size_t callerStackSize = info.compArgStackSize; hasLargerThanOneStackSlotSizedStruct = hasLargerThanOneStackSlotSizedStruct || info.compHasMultiSlotArgs; - bool hasStackArgs = false; + bool hasStackArgs = false; if (callerStackSize > 0 || calleeStackSize > 0) { hasStackArgs = true; @@ -7176,8 +7176,8 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // arguments is less than the caller's if (hasStackArgs && (calleeStackSize > callerStackSize)) { - reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (calleeStackSize > callerStackSize)", callerStackSize, - calleeStackSize); + reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (calleeStackSize > callerStackSize)", + callerStackSize, calleeStackSize); return false; } @@ -7218,8 +7218,8 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) // for more information. if (hasStackArgs && (calleeStackSize > callerStackSize)) { - reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (calleeStackSize > callerStackSize)", callerStackSize, - calleeStackSize); + reportFastTailCallDecision("Will not fastTailCall hasStackArgs && (calleeStackSize > callerStackSize)", + callerStackSize, calleeStackSize); return false; } From d4e95d0ecd2c7756dbc6dfd4370aff737f9426d7 Mon Sep 17 00:00:00 2001 From: jashook Date: Fri, 26 Jul 2019 17:00:38 -0700 Subject: [PATCH 23/24] Rebase on master --- .../FastTailCall/FastTailCallCandidates.cs | 375 +++++++++++++++++- 1 file changed, 374 insertions(+), 1 deletion(-) diff --git a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs index cda88e280750..190714909973 100644 --- a/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs +++ b/tests/src/JIT/opt/FastTailCall/FastTailCallCandidates.cs @@ -65,6 +65,12 @@ public static int Tester(int a) CheckOutput(DoubleCountRetBuffCaller(1)); CheckOutput(Struct32CallerWrapper()); CheckOutput(Struct32CallerWrapperCalleeHasStack(2)); + CheckOutput(CallerEnregisterableAmd64WindowsStructs8Bytes(1, 2)); + CheckOutput(CallerAmd64WindowsStructs7Bytes(1, 2)); + CheckOutput(CallerAmd64WindowsStructs6Bytes(1, 2)); + CheckOutput(CallerAmd64WindowsStructs5Bytes(1, 2)); + CheckOutput(CallerAmd64WindowsStructs4Bytes(1, 2)); + CheckOutput(CallerAmd64WindowsStructs3Bytes(1, 2)); return s_ret_value; @@ -1471,7 +1477,7 @@ public static int Struct32CalleeWithStack(long one, /// The callee uses 6 integer registers, 32 bytes of stack (3 args) /// /// Return 100 is a pass. - /// Return 113 is a failure. + /// Return 114 is a failure. /// /// public static int Struct32CallerWrapperCalleeHasStack(int two) @@ -1495,6 +1501,373 @@ public static int Struct32CallerWrapperCalleeHasStack(int two) return 100; } + /// + /// Decision to fast tail call. See CallerEnregisterableAmd64WindowsStructs8Bytes for more + /// information. + /// + public static int CalleeEnregisterableAmd64WindowsStructs8Bytes(StructSizeEightNotExplicit eightByteStruct) + { + long a = eightByteStruct.a; + + // Force this to not be inlined + int count = 0; + for (int i = 0; i < a; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1000000) + { + a = count; + } + + if (count == 1) + { + a = 100; + } + else + { + a = 115; + } + + return (int)a; + } + + /// + /// Windows x64 tail call tests + /// + /// + /// + /// All targets will fast tail call + /// + /// The caller uses 2 integer registers (2 args) + /// The callee uses 1 integer registers (1 args) + /// + /// Return 100 is a pass. + /// Return 115 is a failure. + /// + /// + public static int CallerEnregisterableAmd64WindowsStructs8Bytes(int a, int b) + { + if (a % 2 == 0) + { + return CalleeEnregisterableAmd64WindowsStructs8Bytes(new StructSizeEightNotExplicit(a)); + } + else + { + return CalleeEnregisterableAmd64WindowsStructs8Bytes(new StructSizeEightNotExplicit(b)); + } + } + + /// + /// Decision to fast tail call. See CallerAmd64WindowsStructs7Bytes for more + /// information. + /// + public static int CalleeAmd64WindowsStructs7Bytes(StructSizeSevenNotExplicit sevenByteStruct) + { + int a = sevenByteStruct.a; + + // Force this to not be inlined + int count = 0; + for (int i = 0; i < a; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1000000) + { + a = count; + } + + if (count == 1) + { + a = 100; + } + else + { + a = 116; + } + + return (int)a; + } + + /// + /// Windows x64 tail call tests + /// + /// + /// + /// All targets will fast tail call + /// + /// The caller uses 2 integer registers (2 args) + /// The callee uses 1 integer registers (1 args) + /// + /// Return 100 is a pass. + /// Return 116 is a failure. + /// + /// + public static int CallerAmd64WindowsStructs7Bytes(int a, int b) + { + if (a % 2 == 0) + { + return CalleeAmd64WindowsStructs7Bytes(new StructSizeSevenNotExplicit(a, 1, 2, 3)); + } + else + { + return CalleeAmd64WindowsStructs7Bytes(new StructSizeSevenNotExplicit(b, 1, 2, 3)); + } + } + + /// + /// Decision to fast tail call. See CallerAmd64WindowsStructs6Bytes for more + /// information. + /// + public static int CalleeAmd64WindowsStructs6Bytes(StructSizeSixNotExplicit sixByteStruct) + { + int a = sixByteStruct.a; + + // Force this to not be inlined + int count = 0; + for (int i = 0; i < a; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1000000) + { + a = count; + } + + if (count == 1) + { + a = 100; + } + else + { + a = 117; + } + + return (int)a; + } + + /// + /// Windows x64 tail call tests + /// + /// + /// + /// All targets will fast tail call + /// + /// The caller uses 2 integer registers (2 args) + /// The callee uses 1 integer registers (1 args) + /// + /// Return 100 is a pass. + /// Return 117 is a failure. + /// + /// + public static int CallerAmd64WindowsStructs6Bytes(int a, int b) + { + if (a % 2 == 0) + { + return CalleeAmd64WindowsStructs6Bytes(new StructSizeSixNotExplicit(a, 1, 2)); + } + else + { + return CalleeAmd64WindowsStructs6Bytes(new StructSizeSixNotExplicit(b, 1, 2)); + } + } + + /// + /// Decision to fast tail call. See CallerAmd64WindowsStructs5Bytes for more + /// information. + /// + public static int CalleeAmd64WindowsStructs5Bytes(StructSizeFiveNotExplicit fiveByteStruct) + { + int a = fiveByteStruct.a; + + // Force this to not be inlined + int count = 0; + for (int i = 0; i < a; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1000000) + { + a = count; + } + + if (count == 1) + { + a = 100; + } + else + { + a = 118; + } + + return (int)a; + } + + /// + /// Windows x64 tail call tests + /// + /// + /// + /// All targets will fast tail call + /// + /// The caller uses 2 integer registers (2 args) + /// The callee uses 1 integer registers (1 args) + /// + /// Return 100 is a pass. + /// Return 118 is a failure. + /// + /// + public static int CallerAmd64WindowsStructs5Bytes(int a, int b) + { + if (a % 2 == 0) + { + return CalleeAmd64WindowsStructs5Bytes(new StructSizeFiveNotExplicit(a, 1)); + } + else + { + return CalleeAmd64WindowsStructs5Bytes(new StructSizeFiveNotExplicit(b, 1)); + } + } + + /// + /// Decision to fast tail call. See CallerAmd64WindowsStructs4Bytes for more + /// information. + /// + public static int CalleeAmd64WindowsStructs4Bytes(StructSizeFourNotExplicit fourByteStruct) + { + int a = fourByteStruct.a; + + // Force this to not be inlined + int count = 0; + for (int i = 0; i < a; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1000000) + { + a = count; + } + + if (count == 1) + { + a = 100; + } + else + { + a = 119; + } + + return (int)a; + } + + /// + /// Windows x64 tail call tests + /// + /// + /// + /// All targets will fast tail call + /// + /// The caller uses 2 integer registers (2 args) + /// The callee uses 1 integer registers (1 args) + /// + /// Return 100 is a pass. + /// Return 119 is a failure. + /// + /// + public static int CallerAmd64WindowsStructs4Bytes(int a, int b) + { + if (a % 2 == 0) + { + return CalleeAmd64WindowsStructs4Bytes(new StructSizeFourNotExplicit(a)); + } + else + { + return CalleeAmd64WindowsStructs4Bytes(new StructSizeFourNotExplicit(a)); + } + } + + /// + /// Decision to fast tail call. See CallerAmd64WindowsStructs3Bytes for more + /// information. + /// + public static int CalleeAmd64WindowsStructs3Bytes(StructSizeThreeNotExplicit threeByteStruct) + { + int a = threeByteStruct.a; + + // Force this to not be inlined + int count = 0; + for (int i = 0; i < a; ++i) + { + if (i % 2 == 0) + { + ++count; + } + } + + if (count == 1000000) + { + a = count; + } + + if (count == 1) + { + a = 100; + } + else + { + a = 120; + } + + return (int)a; + } + + /// + /// Windows x64 tail call tests + /// + /// + /// + /// x64 windows will not fast tail call because the struct is passed + /// byref. + /// + /// The caller uses 2 integer registers (2 args) + /// The callee uses 1 integer registers (1 args) + /// + /// Return 100 is a pass. + /// Return 120 is a failure. + /// + /// + public static int CallerAmd64WindowsStructs3Bytes(byte a, byte b) + { + if (a % 2 == 0) + { + return CalleeAmd64WindowsStructs3Bytes(new StructSizeThreeNotExplicit(a, a, a)); + } + else + { + return CalleeAmd64WindowsStructs3Bytes(new StructSizeThreeNotExplicit(b, b, b)); + } + } + //////////////////////////////////////////////////////////////////////////// // Main //////////////////////////////////////////////////////////////////////////// From 75b2f245c1b8e7035e45beee776cd39f41c19ea8 Mon Sep 17 00:00:00 2001 From: jashook Date: Fri, 26 Jul 2019 21:57:24 -0700 Subject: [PATCH 24/24] Apply format patch --- src/jit/lclvars.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index cb68505ea0b3..70cc7fe1757c 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -1008,7 +1008,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) { varDscInfo->hasMultiSlotStruct = true; } - + varDscInfo->stackArgSize += roundUp(argSize, TARGET_POINTER_SIZE); #endif // FEATURE_FASTTAILCALL }