dotnet · CarolEidt · Nov 14, 2017 · Nov 14, 2017 · CarolEidt · Nov 14, 2017
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
@@ -3020,7 +3020,7 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
         }
         else if (compiler->info.genCPU == CPU_X64)
         {
-            if (compiler->canUseAVX())
+            if (compiler->canUseVexEncoding())
             {
                 printf("X64 CPU with AVX");
             }
@@ -11175,7 +11175,7 @@ void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
 
     if (emitVzeroUpper)
     {
-        assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
+        assert(compiler->canUseVexEncoding());
         instGen(INS_vzeroupper);
     }
 }

diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
@@ -5357,7 +5357,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
     // when there's preceding 256-bit AVX to legacy SSE transition penalty.
     if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX())
     {
-        assert(compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported);
+        assert(compiler->canUseVexEncoding());
         instGen(INS_vzeroupper);
     }
 

diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
@@ -2500,43 +2500,6 @@ void Compiler::compSetProcessor()
     //
     CLANG_FORMAT_COMMENT_ANCHOR;
 
-#ifdef _TARGET_XARCH_
-    opts.compCanUseSSE4 = false;
-    if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE41) &&
-        jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE42))
-    {
-        if (JitConfig.EnableSSE3_4() != 0)
-        {
-            opts.compCanUseSSE4 = true;
-        }
-    }
-
-    // COMPlus_EnableAVX can be used to disable using AVX if available on a target machine.
-    opts.compCanUseAVX = false;
-    if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
-    {
-        if (JitConfig.EnableAVX() != 0)
-        {
-            opts.compCanUseAVX = true;
-        }
-    }
-
-    if (!compIsForInlining())
-    {
-        if (opts.compCanUseAVX)
-        {
-            codeGen->getEmitter()->SetUseAVX(true);
-            // Assume each JITted method does not contain AVX instruction at first
-            codeGen->getEmitter()->SetContainsAVX(false);
-            codeGen->getEmitter()->SetContains256bitAVX(false);
-        }
-        else if (opts.compCanUseSSE4)
-        {
-            codeGen->getEmitter()->SetUseSSE4(true);
-        }
-    }
-#endif // _TARGET_XARCH_
-
 #ifdef _TARGET_AMD64_
     opts.compUseFCOMI   = false;
     opts.compUseCMOV    = true;
@@ -2620,7 +2583,9 @@ void Compiler::compSetProcessor()
             }
             if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
             {
-                if (configEnableISA(InstructionSet_AVX2))
+                // COMPlus_EnableAVX is also used to control the code generation of
+                // System.Numerics.Vectors and floating-point arithmetics
+                if (configEnableISA(InstructionSet_AVX) && configEnableISA(InstructionSet_AVX2))
                 {
                     opts.setSupportedISA(InstructionSet_AVX2);
                 }
@@ -2697,6 +2662,31 @@ void Compiler::compSetProcessor()
             }
         }
     }
+
+    opts.compCanUseSSE4 = false;
+    if (!jitFlags.IsSet(JitFlags::JIT_FLAG_PREJIT) && jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE41) &&
+        jitFlags.IsSet(JitFlags::JIT_FLAG_USE_SSE42))
+    {
+        if (JitConfig.EnableSSE3_4() != 0)
+        {
+            opts.compCanUseSSE4 = true;
+        }
+    }
+
+    if (!compIsForInlining())
+    {
+        if (canUseVexEncoding())
+        {
+            codeGen->getEmitter()->SetUseVEXEncoding(true);
+            // Assume each JITted method does not contain AVX instruction at first
+            codeGen->getEmitter()->SetContainsAVX(false);
+            codeGen->getEmitter()->SetContains256bitAVX(false);
+        }
+        else if (CanUseSSE4())
+        {
+            codeGen->getEmitter()->SetUseSSE4(true);
+        }
+    }
 #endif
 }
 

diff --git a/src/jit/compiler.h b/src/jit/compiler.h
@@ -7323,11 +7323,11 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     */
 
-    // Get highest available level for floating point codegen
-    SIMDLevel getFloatingPointCodegenLevel()
+    // Get highest available level for SIMD codegen
+    SIMDLevel getSIMDSupportLevel()
     {
 #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
-        if (canUseAVX())
+        if (compSupports(InstructionSet_AVX2))
         {
             return SIMD_AVX2_Supported;
         }
@@ -7340,18 +7340,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
         // min bar is SSE2
         assert(canUseSSE2());
         return SIMD_SSE2_Supported;
-#else
-        assert(!"getFPInstructionSet() is not implemented for target arch");
-        unreached();
-        return SIMD_Not_Supported;
-#endif
-    }
-
-    // Get highest available level for SIMD codegen
-    SIMDLevel getSIMDSupportLevel()
-    {
-#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
-        return getFloatingPointCodegenLevel();
 #else
         assert(!"Available instruction set(s) for SIMD codegen is not defined for target arch");
         unreached();
@@ -7635,13 +7623,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     var_types getSIMDVectorType()
     {
 #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
-        if (canUseAVX())
+        if (getSIMDSupportLevel() == SIMD_AVX2_Supported)
         {
             return TYP_SIMD32;
         }
         else
         {
-            assert(canUseSSE2());
+            assert(getSIMDSupportLevel() >= SIMD_SSE2_Supported);
             return TYP_SIMD16;
         }
 #elif defined(_TARGET_ARM64_)
@@ -7673,13 +7661,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     unsigned getSIMDVectorRegisterByteLength()
     {
 #if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
-        if (canUseAVX())
+        if (getSIMDSupportLevel() == SIMD_AVX2_Supported)
         {
             return YMM_REGSIZE_BYTES;
         }
         else
         {
-            assert(canUseSSE2());
+            assert(getSIMDSupportLevel() >= SIMD_SSE2_Supported);
             return XMM_REGSIZE_BYTES;
         }
 #elif defined(_TARGET_ARM64_)
@@ -7828,19 +7816,19 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #endif
     }
 
-    bool canUseAVX() const
+    bool compSupports(InstructionSet isa) const
     {
 #ifdef _TARGET_XARCH_
-        return opts.compCanUseAVX;
+        return (opts.compSupportsISA & (1ULL << isa)) != 0;
 #else
         return false;
 #endif
     }
 
-    bool compSupports(InstructionSet isa)
+    bool canUseVexEncoding() const
     {
 #ifdef _TARGET_XARCH_
-        return (opts.compSupportsISA & (1ULL << isa)) != 0;
+        return compSupports(InstructionSet_AVX);
 #else
         return false;
 #endif
@@ -7954,7 +7942,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #ifdef _TARGET_XARCH_
         bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions
         bool compCanUseSSE4; // Allow CodeGen to use SSE3, SSSE3, SSE4.1 and SSE4.2 instructions
-        bool compCanUseAVX;  // Allow CodeGen to use AVX 256-bit vectors for SIMD operations
 #endif                       // _TARGET_XARCH_
 
 #ifdef _TARGET_XARCH_

diff --git a/src/jit/emit.h b/src/jit/emit.h
@@ -428,7 +428,7 @@ class emitter
 
 #ifdef _TARGET_XARCH_
         SetUseSSE4(false);
-        SetUseAVX(false);
+        SetUseVEXEncoding(false);
 #endif // _TARGET_XARCH_
     }
 

diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
@@ -60,7 +60,7 @@ bool IsAVXOnlyInstruction(instruction ins)
 bool emitter::IsAVXInstruction(instruction ins)
 {
 #ifndef LEGACY_BACKEND
-    return (UseAVX() && IsSSEOrAVXInstruction(ins));
+    return (UseVEXEncoding() && IsSSEOrAVXInstruction(ins));
 #else
     return false;
 #endif
@@ -120,7 +120,7 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins)
 // that use the SSE38 or SSE3A macro.
 bool emitter::Is4ByteAVXInstruction(instruction ins)
 {
-    return UseAVX() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins)) && EncodedBySSE38orSSE3A(ins);
+    return UseVEXEncoding() && (IsSSE4Instruction(ins) || IsAVXOnlyInstruction(ins)) && EncodedBySSE38orSSE3A(ins);
 }
 #endif // !LEGACY_BACKEND
 
@@ -353,7 +353,7 @@ unsigned RegEncoding(regNumber reg)
 // AVX:  specific bits within VEX prefix need to be set in bit-inverted form.
 emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
 {
-    if (UseAVX() && IsAVXInstruction(ins))
+    if (UseVEXEncoding() && IsAVXInstruction(ins))
     {
         // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
         assert(hasVexPrefix(code));
@@ -373,7 +373,7 @@ emitter::code_t emitter::AddRexWPrefix(instruction ins, code_t code)
 
 emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
 {
-    if (UseAVX() && IsAVXInstruction(ins))
+    if (UseVEXEncoding() && IsAVXInstruction(ins))
     {
         // Right now support 3-byte VEX prefix
         assert(hasVexPrefix(code));
@@ -387,7 +387,7 @@ emitter::code_t emitter::AddRexRPrefix(instruction ins, code_t code)
 
 emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
 {
-    if (UseAVX() && IsAVXInstruction(ins))
+    if (UseVEXEncoding() && IsAVXInstruction(ins))
     {
         // Right now support 3-byte VEX prefix
         assert(hasVexPrefix(code));
@@ -401,7 +401,7 @@ emitter::code_t emitter::AddRexXPrefix(instruction ins, code_t code)
 
 emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
 {
-    if (UseAVX() && IsAVXInstruction(ins))
+    if (UseVEXEncoding() && IsAVXInstruction(ins))
     {
         // Right now support 3-byte VEX prefix
         assert(hasVexPrefix(code));
@@ -416,7 +416,7 @@ emitter::code_t emitter::AddRexBPrefix(instruction ins, code_t code)
 // Adds REX prefix (0x40) without W, R, X or B bits set
 emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
 {
-    assert(!UseAVX() || !IsAVXInstruction(ins));
+    assert(!UseVEXEncoding() || !IsAVXInstruction(ins));
     return code | 0x4000000000ULL;
 }
 
@@ -446,7 +446,7 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c
     if (hasVexPrefix(code))
     {
         // Only AVX instructions should have a VEX prefix
-        assert(UseAVX() && IsAVXInstruction(ins));
+        assert(UseVEXEncoding() && IsAVXInstruction(ins));
         code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
         code &= 0x00000000FFFFFFFFLL;
 
@@ -3771,7 +3771,7 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN
         // AVX: 3 byte VEX prefix + 1 byte opcode + 1 byte ModR/M + 1 byte immediate
         // SSE4: 4 byte opcode + 1 byte ModR/M + 1 byte immediate
         // SSE2: 3 byte opcode + 1 byte ModR/M + 1 byte immediate
-        sz = (UseAVX() || UseSSE4()) ? 6 : 5;
+        sz = (UseVEXEncoding() || UseSSE4()) ? 6 : 5;
     }
 
 #ifdef _TARGET_AMD64_

diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
@@ -147,14 +147,14 @@ code_t AddVexPrefixIfNeededAndNotPresent(instruction ins, code_t code, emitAttr
     return code;
 }
 
-bool useAVXEncodings;
-bool UseAVX()
+bool useVEXEncodings;
+bool UseVEXEncoding()
 {
-    return useAVXEncodings;
+    return useVEXEncodings;
 }
-void SetUseAVX(bool value)
+void SetUseVEXEncoding(bool value)
 {
-    useAVXEncodings = value;
+    useVEXEncodings = value;
 }
 
 bool containsAVXInstruction = false;
@@ -185,11 +185,11 @@ bool IsThreeOperandAVXInstruction(instruction ins)
 }
 bool Is4ByteAVXInstruction(instruction ins);
 #else  // LEGACY_BACKEND
-bool UseAVX()
+bool UseVEXEncoding()
 {
     return false;
 }
-void SetUseAVX(bool value)
+void SetUseVEXEncoding(bool value)
 {
 }
 bool ContainsAVX()

diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
@@ -3274,7 +3274,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*
         }
         else
 #endif // FEATURE_SIMD
-            if (compiler->canUseAVX())
+            if (compiler->canUseVexEncoding())
         {
             return (aligned) ? INS_movapd : INS_movupd;
         }
@@ -3439,7 +3439,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false
         }
         else
 #endif // FEATURE_SIMD
-            if (compiler->canUseAVX())
+            if (compiler->canUseVexEncoding())
         {
             return (aligned) ? INS_movapd : INS_movupd;
         }

diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
@@ -2219,7 +2219,7 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
                 // No need to set isInternalRegDelayFree since targetReg is a
                 // an int type reg and guaranteed to be different from xmm/ymm
                 // regs.
-                info->internalFloatCount = compiler->canUseAVX() ? 2 : 1;
+                info->internalFloatCount = (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported) ? 2 : 1;
                 info->setInternalCandidates(this, allSIMDRegs());
             }
             info->srcCount = 2;
@@ -2431,6 +2431,12 @@ void LinearScan::TreeNodeInfoInitSIMD(GenTreeSIMD* simdTree)
 
 void LinearScan::TreeNodeInfoInitHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
 {
+    NamedIntrinsic intrinsicID = intrinsicTree->gtHWIntrinsicId;
+    InstructionSet isa         = compiler->isaOfHWIntrinsic(intrinsicID);
+    if (isa == InstructionSet_AVX || isa == InstructionSet_AVX2)
+    {
+        SetContainsAVXFlags(true, 32);
+    }
     TreeNodeInfo* info = &(intrinsicTree->gtLsraInfo);
     if (intrinsicTree->gtGetOp2IfPresent() != nullptr)
     {
@@ -2804,13 +2810,10 @@ void LinearScan::TreeNodeInfoInitMul(GenTreePtr tree)
 //
 void LinearScan::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsigned sizeOfSIMDVector /* = 0*/)
 {
-    if (isFloatingPointType)
+    if (isFloatingPointType && compiler->canUseVexEncoding())
     {
-        if (compiler->getFloatingPointCodegenLevel() == SIMD_AVX2_Supported)
-        {
-            compiler->getEmitter()->SetContainsAVX(true);
-        }
-        if (sizeOfSIMDVector == 32 && compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
+        compiler->getEmitter()->SetContainsAVX(true);
+        if (sizeOfSIMDVector == 32)
         {
             compiler->getEmitter()->SetContains256bitAVX(true);
         }