Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3020,7 +3020,7 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
}
else if (compiler->info.genCPU == CPU_X64)
{
if (compiler->canUseAVX())
if (compiler->canUseVexEncoding())
{
printf("X64 CPU with AVX");
}
Expand Down Expand Up @@ -11175,7 +11175,7 @@ void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)

if (emitVzeroUpper)
{
assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
assert(compiler->canUseVexEncoding());
instGen(INS_vzeroupper);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5357,7 +5357,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
// when there's preceding 256-bit AVX to legacy SSE transition penalty.
if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX())
{
assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
assert(compiler->canUseVexEncoding());
instGen(INS_vzeroupper);
}

Expand Down
66 changes: 28 additions & 38 deletions src/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2500,43 +2500,6 @@ void Compiler::compSetProcessor()
//
CLANG_FORMAT_COMMENT_ANCHOR;

#ifdef _TARGET_XARCH_
opts.compCanUseSSE4 = false;
if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE41) &&
jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE42))
{
if (JitConfig.EnableSSE3_4() != 0)
{
opts.compCanUseSSE4 = true;
}
}

// COMPlus_EnableAVX can be used to disable using AVX if available on a target machine.
opts.compCanUseAVX = false;
if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this being handled now? This was the condition that caused us not to generate AVX code during crossgen, as we can't be assured that the target will be the same.

{
if (JitConfig.EnableAVX() != 0)
{
opts.compCanUseAVX = true;
}
}

if (!compIsForInlining())
{
if (opts.compCanUseAVX)
{
codeGen->getEmitter()->SetUseAVX(true);
// Assume each JITted method does not contain AVX instruction at first
codeGen->getEmitter()->SetContainsAVX(false);
codeGen->getEmitter()->SetContains256bitAVX(false);
}
else if (opts.compCanUseSSE4)
{
codeGen->getEmitter()->SetUseSSE4(true);
}
}
#endif // _TARGET_XARCH_

#ifdef _TARGET_AMD64_
opts.compUseFCOMI = false;
opts.compUseCMOV = true;
Expand Down Expand Up @@ -2620,7 +2583,9 @@ void Compiler::compSetProcessor()
}
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
{
if (configEnableISA(InstructionSet_AVX2))
// COMPlus_EnableAVX is also used to control the code generation of
// System.Numerics.Vectors and floating-point arithmetics
if (configEnableISA(InstructionSet_AVX) && configEnableISA(InstructionSet_AVX2))
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this being handled now? This was the condition that caused us not to generate AVX code during crossgen, as we can't be assured that the target will be the same.

@CarolEidt I am using InstructionSet_AVX and InstructionSet_AVX2 instead of UseAVX, which is already guarded by !jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT).

{
opts.setSupportedISA(InstructionSet_AVX2);
}
Expand Down Expand Up @@ -2697,6 +2662,31 @@ void Compiler::compSetProcessor()
}
}
}

opts.compCanUseSSE4 = false;
if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE41) &&
jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE42))
{
if (JitConfig.EnableSSE3_4() != 0)
{
opts.compCanUseSSE4 = true;
}
}

if (!compIsForInlining())
{
if (canUseVexEncoding())
{
codeGen->getEmitter()->SetUseVEXEncoding(true);
// Assume each JITted method does not contain AVX instruction at first
codeGen->getEmitter()->SetContainsAVX(false);
codeGen->getEmitter()->SetContains256bitAVX(false);
}
else if (CanUseSSE4())
{
codeGen->getEmitter()->SetUseSSE4(true);
}
}
#endif
}

Expand Down
35 changes: 11 additions & 24 deletions src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7323,11 +7323,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

// Get highest available level for floating point codegen
SIMDLevel getFloatingPointCodegenLevel()
// Get highest available level for SIMD codegen
SIMDLevel getSIMDSupportLevel()
{
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
if (canUseAVX())
if (compSupports(InstructionSet_AVX2))
{
return SIMD_AVX2_Supported;
}
Expand All @@ -7340,18 +7340,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// min bar is SSE2
assert(canUseSSE2());
return SIMD_SSE2_Supported;
#else
assert(!"getFPInstructionSet() is not implemented for target arch");
unreached();
return SIMD_Not_Supported;
#endif
}

// Get highest available level for SIMD codegen
SIMDLevel getSIMDSupportLevel()
{
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
return getFloatingPointCodegenLevel();
#else
assert(!"Available instruction set(s) for SIMD codegen is not defined for target arch");
unreached();
Expand Down Expand Up @@ -7635,13 +7623,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
var_types getSIMDVectorType()
{
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
if (canUseAVX())
if (getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
return TYP_SIMD32;
}
else
{
assert(canUseSSE2());
assert(getSIMDSupportLevel() >= SIMD_SSE2_Supported);
return TYP_SIMD16;
}
#elif defined(_TARGET_ARM64_)
Expand Down Expand Up @@ -7673,13 +7661,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
unsigned getSIMDVectorRegisterByteLength()
{
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
if (canUseAVX())
if (getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
return YMM_REGSIZE_BYTES;
}
else
{
assert(canUseSSE2());
assert(getSIMDSupportLevel() >= SIMD_SSE2_Supported);
return XMM_REGSIZE_BYTES;
}
#elif defined(_TARGET_ARM64_)
Expand Down Expand Up @@ -7828,19 +7816,19 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#endif
}

bool canUseAVX() const
bool compSupports(InstructionSet isa) const
{
#ifdef _TARGET_XARCH_
return opts.compCanUseAVX;
return (opts.compSupportsISA & (1ULL << isa)) != 0;
#else
return false;
#endif
}

bool compSupports(InstructionSet isa)
bool canUseVexEncoding() const
{
#ifdef _TARGET_XARCH_
return (opts.compSupportsISA & (1ULL << isa)) != 0;
return compSupports(InstructionSet_AVX);
#else
return false;
#endif
Expand Down Expand Up @@ -7954,7 +7942,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#ifdef _TARGET_XARCH_
bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions
bool compCanUseSSE4; // Allow CodeGen to use SSE3, SSSE3, SSE4.1 and SSE4.2 instructions
bool compCanUseAVX; // Allow CodeGen to use AVX 256-bit vectors for SIMD operations
#endif // _TARGET_XARCH_

#ifdef _TARGET_XARCH_
Expand Down
2 changes: 1 addition & 1 deletion src/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ class emitter

#ifdef _TARGET_XARCH_
SetUseSSE4(false);
SetUseAVX(false);
SetUseVEXEncoding(false);
#endif // _TARGET_XARCH_
}

Expand Down
18 changes: 9 additions & 9 deletions src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ bool IsAVXOnlyInstruction(instruction ins)
bool emitter::IsAVXInstruction(instruction ins)
{
#ifndef LEGACY_BACKEND
return (UseAVX() && IsSSEOrAVXInstruction(ins));
return (UseVEXEncoding() && IsSSEOrAVXInstruction(ins));
#else
return false;
#endif
Expand Down Expand Up @@ -120,7 +120,7 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
// that use the SSE38 or SSE3A macro.
bool emitter::Is4ByteAVXInstruction(instruction ins)
{
return UseAVX() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins)) && EncodedBySSE38orSSE3A(ins);
return UseVEXEncoding() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins)) && EncodedBySSE38orSSE3A(ins);
}
#endif // !LEGACY_BACKEND

Expand Down Expand Up @@ -353,7 +353,7 @@ unsigned RegEncoding(regNumber reg)
// AVX: specific bits within VEX prefix need to be set in bit-inverted form.
emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
{
if (UseAVX() && IsAVXInstruction(ins))
if (UseVEXEncoding() && IsAVXInstruction(ins))
{
// W-bit is available only in 3-byte VEX prefix that starts with byte C4.
assert(hasVexPrefix(code));
Expand All @@ -373,7 +373,7 @@ emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)

emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
{
if (UseAVX() && IsAVXInstruction(ins))
if (UseVEXEncoding() && IsAVXInstruction(ins))
{
// Right now support 3-byte VEX prefix
assert(hasVexPrefix(code));
Expand All @@ -387,7 +387,7 @@ emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)

emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
{
if (UseAVX() && IsAVXInstruction(ins))
if (UseVEXEncoding() && IsAVXInstruction(ins))
{
// Right now support 3-byte VEX prefix
assert(hasVexPrefix(code));
Expand All @@ -401,7 +401,7 @@ emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)

emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
{
if (UseAVX() && IsAVXInstruction(ins))
if (UseVEXEncoding() && IsAVXInstruction(ins))
{
// Right now support 3-byte VEX prefix
assert(hasVexPrefix(code));
Expand All @@ -416,7 +416,7 @@ emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
// Adds REX prefix (0x40) without W, R, X or B bits set
emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
{
assert(!UseAVX() || !IsAVXInstruction(ins));
assert(!UseVEXEncoding() || !IsAVXInstruction(ins));
return code | 0x4000000000ULL;
}

Expand Down Expand Up @@ -446,7 +446,7 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c
if (hasVexPrefix(code))
{
// Only AVX instructions should have a VEX prefix
assert(UseAVX() && IsAVXInstruction(ins));
assert(UseVEXEncoding() && IsAVXInstruction(ins));
code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
code &= 0x00000000FFFFFFFFLL;

Expand Down Expand Up @@ -3771,7 +3771,7 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN
// AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
// SSE4: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
// SSE2: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
sz = (UseAVX() || UseSSE4()) ? 6 : 5;
sz = (UseVEXEncoding() || UseSSE4()) ? 6 : 5;
}

#ifdef _TARGET_AMD64_
Expand Down
14 changes: 7 additions & 7 deletions src/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,14 +147,14 @@ code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr
return code;
}

bool useAVXEncodings;
bool UseAVX()
bool useVEXEncodings;
bool UseVEXEncoding()
{
return useAVXEncodings;
return useVEXEncodings;
}
void SetUseAVX(bool value)
void SetUseVEXEncoding(bool value)
{
useAVXEncodings = value;
useVEXEncodings = value;
}

bool containsAVXInstruction = false;
Expand Down Expand Up @@ -185,11 +185,11 @@ bool IsThreeOperandAVXInstruction(instruction ins)
}
bool Is4ByteAVXInstruction(instruction ins);
#else // LEGACY_BACKEND
bool UseAVX()
bool UseVEXEncoding()
{
return false;
}
void SetUseAVX(bool value)
void SetUseVEXEncoding(bool value)
{
}
bool ContainsAVX()
Expand Down
4 changes: 2 additions & 2 deletions src/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3274,7 +3274,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
}
else
#endif // FEATURE_SIMD
if (compiler->canUseAVX())
if (compiler->canUseVexEncoding())
{
return (aligned) ? INS_movapd : INS_movupd;
}
Expand Down Expand Up @@ -3439,7 +3439,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
}
else
#endif // FEATURE_SIMD
if (compiler->canUseAVX())
if (compiler->canUseVexEncoding())
{
return (aligned) ? INS_movapd : INS_movupd;
}
Expand Down
17 changes: 10 additions & 7 deletions src/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2219,7 +2219,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
// No need to set isInternalRegDelayFree since targetReg is a
// an int type reg and guaranteed to be different from xmm/ymm
// regs.
info->internalFloatCount = compiler->canUseAVX() ? 2 : 1;
info->internalFloatCount = (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) ? 2 : 1;
info->setInternalCandidates(this, allSIMDRegs());
}
info->srcCount = 2;
Expand Down Expand Up @@ -2431,6 +2431,12 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)

void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
{
NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
InstructionSet isa = compiler->isaOfHWIntrinsic(intrinsicID);
if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2)
{
SetContainsAVXFlags(true, 32);
}
TreeNodeInfo* info = &(intrinsicTree->gtLsraInfo);
if (intrinsicTree->gtGetOp2IfPresent() != nullptr)
{
Expand Down Expand Up @@ -2804,13 +2810,10 @@ void LinearScan::TreeNodeInfoInitMul(GenTreePtr tree)
//
void LinearScan::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsigned sizeOfSIMDVector /* = 0*/)
{
if (isFloatingPointType)
if (isFloatingPointType && compiler->canUseVexEncoding())
{
if (compiler->getFloatingPointCodegenLevel() == SIMD_AVX2_Supported)
{
compiler->getEmitter()->SetContainsAVX(true);
}
if (sizeOfSIMDVector == 32 && compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
compiler->getEmitter()->SetContainsAVX(true);
if (sizeOfSIMDVector == 32)
{
compiler->getEmitter()->SetContains256bitAVX(true);
}
Expand Down
Loading