Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/coreclr/jit/abi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ var_types ABIPassingSegment::GetRegisterType() const
{
switch (Size)
{
case 2:
return TYP_HALF;
case 4:
return TYP_FLOAT;
case 8:
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7363,7 +7363,7 @@ bool CodeGen::isStructReturn(GenTree* treeNode)
}

#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
assert(!varTypeIsStruct(treeNode));
assert(!varTypeIsStruct(treeNode) || treeNode->TypeGet() == TYP_HALF);
return false;
#else
return varTypeIsStruct(treeNode) && (m_compiler->info.compRetNativeType == TYP_STRUCT);
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6063,7 +6063,7 @@ void CodeGen::genCall(GenTreeCall* call)
}
else
#endif // TARGET_X86
if (varTypeIsFloating(returnType))
if (varTypeIsFloating(returnType) || returnType == TYP_HALF)
{
returnReg = REG_FLOATRET;
}
Expand Down Expand Up @@ -6158,7 +6158,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA
}
else
{
assert(!varTypeIsStruct(call));
assert(!varTypeIsStruct(call) || call->TypeIs(TYP_HALF));

if (call->TypeIs(TYP_REF))
{
Expand Down Expand Up @@ -8346,7 +8346,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
}
#endif // defined(TARGET_X86) && defined(FEATURE_SIMD)

if (varTypeIsSIMD(targetType))
if (varTypeIsSIMD(targetType) || targetType == TYP_HALF)
{
regNumber srcReg = genConsumeReg(source);
assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
Expand Down
28 changes: 27 additions & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,32 @@ bool Compiler::isNativePrimitiveStructType(CORINFO_CLASS_HANDLE clsHnd)
return strcmp(typeName, "CLong") == 0 || strcmp(typeName, "CULong") == 0 || strcmp(typeName, "NFloat") == 0;
}

bool Compiler::isNativeHalfStructType(CORINFO_CLASS_HANDLE clsHnd)
{
#if defined(TARGET_XARCH)
if (!isIntrinsicType(clsHnd))
{
return false;
}
const char* namespaceName = nullptr;
const char* typeName = getClassNameFromMetadata(clsHnd, &namespaceName);

if (strcmp(namespaceName, "System") != 0)
{
return false;
}

if (strcmp(typeName, "Half") != 0)
{
return false;
}

return compOpportunisticallyDependsOn(InstructionSet_AVX10v1);
#else
return false;
#endif
}

//-----------------------------------------------------------------------------
// getPrimitiveTypeForStruct:
// Get the "primitive" type that is used for a struct
Expand Down Expand Up @@ -651,7 +677,7 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
break;

case 2:
useType = TYP_USHORT;
useType = isNativeHalfStructType(clsHnd) ? TYP_HALF : TYP_USHORT;
break;

#if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI)
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4725,6 +4725,11 @@ class Compiler

NamedIntrinsic lookupPrimitiveFloatNamedIntrinsic(CORINFO_METHOD_HANDLE method, const char* methodName);
NamedIntrinsic lookupPrimitiveIntNamedIntrinsic(CORINFO_METHOD_HANDLE method, const char* methodName);

NamedIntrinsic lookupHalfIntrinsic(NamedIntrinsic ni);
NamedIntrinsic lookupHalfConversionIntrinsic(var_types fromType, var_types toType);
int lookupHalfRoundingMode(NamedIntrinsic ni);

GenTree* impUnsupportedNamedIntrinsic(unsigned helper,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
Expand Down Expand Up @@ -5928,6 +5933,7 @@ class Compiler
// Returns true if the provided type should be treated as a primitive type
// for the unmanaged calling conventions.
bool isNativePrimitiveStructType(CORINFO_CLASS_HANDLE clsHnd);
bool isNativeHalfStructType(CORINFO_CLASS_HANDLE clsHnd);

enum structPassingKind
{
Expand Down Expand Up @@ -9970,8 +9976,11 @@ class Compiler

// Use to determine if a struct *might* be a SIMD type. As this function only takes a size, many
// structs will fit the criteria.
bool structSizeMightRepresentSIMDType(size_t structSize)
bool structSizeMightRepresentAcceleratedType(size_t structSize)
{
if (structSize == 2)
return true;

#ifdef FEATURE_SIMD
return (structSize >= getMinVectorByteLength()) && (structSize <= getMaxVectorByteLength());
#else
Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2029,18 +2029,20 @@ class emitter

#define PERFSCORE_THROUGHPUT_ZERO 0.0f // Only used for pseudo-instructions that don't generate code

#define PERFSCORE_THROUGHPUT_9X (1.0f / 9.0f)
#define PERFSCORE_THROUGHPUT_6X (1.0f / 6.0f) // Hextuple issue
#define PERFSCORE_THROUGHPUT_5X 0.20f // Pentuple issue
#define PERFSCORE_THROUGHPUT_4X 0.25f // Quad issue
#define PERFSCORE_THROUGHPUT_3X (1.0f / 3.0f) // Three issue
#define PERFSCORE_THROUGHPUT_2X 0.5f // Dual issue
#define PERFSCORE_THROUGHPUT_9X (1.0f / 9.0f)
#define PERFSCORE_THROUGHPUT_6X (1.0f / 6.0f) // Hextuple issue
#define PERFSCORE_THROUGHPUT_5X 0.20f // Pentuple issue
#define PERFSCORE_THROUGHPUT_4X 0.25f // Quad issue
#define PERFSCORE_THROUGHPUT_3X (1.0f / 3.0f) // Three issue
#define PERFSCORE_THROUGHPUT_2X 0.5f // Dual issue
#define PERFSCORE_THROUGHPUT_1P5X 0.67f // Dual issue

#define PERFSCORE_THROUGHPUT_1C 1.0f // Single Issue

#define PERFSCORE_THROUGHPUT_2C 2.0f // slower - 2 cycles
#define PERFSCORE_THROUGHPUT_3C 3.0f // slower - 3 cycles
#define PERFSCORE_THROUGHPUT_4C 4.0f // slower - 4 cycles
#define PERFSCORE_THROUGHPUT_4P5C 4.5f // slower - 4.5 cycles
#define PERFSCORE_THROUGHPUT_5C 5.0f // slower - 5 cycles
#define PERFSCORE_THROUGHPUT_6C 6.0f // slower - 6 cycles
#define PERFSCORE_THROUGHPUT_7C 7.0f // slower - 7 cycles
Expand Down
148 changes: 117 additions & 31 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,9 @@ bool emitter::Is3OpRmwInstruction(instruction ins)
default:
{
return ((ins >= FIRST_FMA_INSTRUCTION) && (ins <= LAST_FMA_INSTRUCTION)) ||
(IsAVXVNNIFamilyInstruction(ins)) ||
((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION));
IsAVXVNNIFamilyInstruction(ins) ||
((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION)) ||
((ins >= FIRST_AVX10V1_FMA_INSTR) && (ins <= LAST_AVX10V1_FMA_INSTR));
}
}
}
Expand Down Expand Up @@ -3077,7 +3078,7 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
// 1. An escape byte 0F (For isa before AVX10.2)
// 2. A map number from 0 to 7 (For AVX10.2 and above)
leadingBytes = check;
assert((leadingBytes == 0x0F) || ((m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) ||
assert((leadingBytes == 0x0F) || ((m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1) ||
(m_compiler->compIsaSupportedDebugOnly(InstructionSet_APX))) &&
(leadingBytes >= 0x00) && (leadingBytes <= 0x07)));

Expand Down Expand Up @@ -3159,15 +3160,21 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co

case 0x05:
{
assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v2));
assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1));
evexPrefix |= (0x05 << 16);
break;
}

case 0x06:
{
assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1));
evexPrefix |= (0x06 << 16);
break;
}

case 0x01:
case 0x02:
case 0x03:
case 0x06:
case 0x07:
default:
{
Expand Down Expand Up @@ -5388,10 +5395,8 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)

assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
|| (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) || (attrSize == EA_64BYTE) // only for x64
|| (ins == INS_movzx) || (ins == INS_movsx) ||
(ins == INS_cmpxchg)
// kmov instructions reach this path with EA_8BYTE size, even on x86
|| IsKMOVInstruction(ins)
Comment on lines -5393 to -5394
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for removing this part of the assert?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think that was an error, will fix.

|| (ins == INS_movzx) || (ins == INS_movsx) || (ins == INS_vmovsh) || (ins == INS_cmpxchg) ||
IsKMOVInstruction(ins)
// The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded
|| isPrefetch(ins));

Expand Down Expand Up @@ -7424,6 +7429,7 @@ bool emitter::IsMovInstruction(instruction ins)
case INS_kmovw_gpr:
case INS_kmovd_gpr:
case INS_kmovq_gpr:
case INS_vmovsh:
{
return true;
}
Expand Down Expand Up @@ -7622,6 +7628,13 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
break;
}

case INS_vmovsh:
{
// Clears the upper bits
hasSideEffect = true;
break;
}

default:
{
unreached();
Expand Down Expand Up @@ -7895,6 +7908,12 @@ bool emitter::emitIns_Mov(
break;
}

case INS_vmovsh:
{
assert(isFloatReg(dstReg) && isFloatReg(srcReg));
break;
}

default:
{
unreached();
Expand Down Expand Up @@ -11797,6 +11816,10 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) con

case EA_2BYTE:
{
if (IsXMMReg(reg))
{
return emitXMMregName(reg);
}
Comment on lines +11819 to +11822
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be TARGET_AMD64 exclusive either.

#if defined(TARGET_AMD64)
if (reg > REG_RDI)
{
Expand Down Expand Up @@ -14522,7 +14545,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
// Is this a 'big' opcode?
else if (code & 0xFF000000)
{
if (size == EA_2BYTE)
if (size == EA_2BYTE && ins != INS_vmovsh)
{
assert(ins == INS_movbe);

Expand Down Expand Up @@ -15390,7 +15413,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
// Is this a 'big' opcode?
else if (code & 0xFF000000)
{
if (size == EA_2BYTE)
if (size == EA_2BYTE && !IsSimdInstruction(ins))
{
assert(ins == INS_movbe);

Expand Down Expand Up @@ -20894,28 +20917,29 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_movups:
case INS_movapd:
case INS_movupd:
{
if (memAccessKind == PERFSCORE_MEMORY_NONE)
// todo-xarch-half: come back to fix
{
// ins reg, reg
result.insThroughput = PERFSCORE_THROUGHPUT_4X;
result.insLatency = PERFSCORE_LATENCY_ZERO;
}
else if (memAccessKind == PERFSCORE_MEMORY_READ)
{
// ins reg, mem
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_2C;
}
else
{
// ins mem, reg
assert(memAccessKind == PERFSCORE_MEMORY_WRITE);
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_2C;
if (memAccessKind == PERFSCORE_MEMORY_NONE)
{
// ins reg, reg
result.insThroughput = PERFSCORE_THROUGHPUT_4X;
result.insLatency = PERFSCORE_LATENCY_ZERO;
}
else if (memAccessKind == PERFSCORE_MEMORY_READ)
{
// ins reg, mem
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_3C : PERFSCORE_LATENCY_2C;
}
else
{
// ins mem, reg
assert(memAccessKind == PERFSCORE_MEMORY_WRITE);
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_2C;
}
break;
}
break;
}

case INS_movhps:
case INS_movhpd:
Expand Down Expand Up @@ -20946,6 +20970,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_movss:
case INS_movsd_simd:
case INS_movddup:
case INS_vmovsh:
{
if (memAccessKind == PERFSCORE_MEMORY_NONE)
{
Expand Down Expand Up @@ -21377,6 +21402,67 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;
}

case INS_vaddsh:
case INS_vsubsh:
case INS_vmulsh:
case INS_vfmadd213sh:
case INS_vmaxsh:
case INS_vminsh:
case INS_vcvtsh2ss:
result.insLatency = PERFSCORE_LATENCY_4C;
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
break;

case INS_vdivsh:
result.insLatency = PERFSCORE_LATENCY_14C;
result.insThroughput = PERFSCORE_THROUGHPUT_4C;
break;

case INS_vsqrtsh:
result.insLatency = PERFSCORE_LATENCY_14C;
result.insThroughput = PERFSCORE_THROUGHPUT_4P5C;
break;

case INS_vrsqrtsh:
case INS_vcomish:
case INS_vucomish:
case INS_vrcpsh:
result.insLatency = PERFSCORE_LATENCY_4C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

case INS_vrndscalesh:
result.insLatency = PERFSCORE_LATENCY_8C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

case INS_vcvtss2sh:
result.insLatency = PERFSCORE_LATENCY_6C;
result.insThroughput = PERFSCORE_THROUGHPUT_1P5X;
break;

case INS_vcvtsd2sh:
result.insLatency = PERFSCORE_THROUGHPUT_ILLEGAL;
result.insThroughput = PERFSCORE_THROUGHPUT_ILLEGAL;
break;

case INS_vcvtsh2sd:
result.insLatency = PERFSCORE_LATENCY_10C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

case INS_vcvtsi2sh32:
case INS_vcvtsi2sh64:
case INS_vcvtsh2si32:
case INS_vcvtsh2si64:
case INS_vcvtusi2sh32:
case INS_vcvtusi2sh64:
case INS_vcvtsh2usi32:
case INS_vcvtsh2usi64:
result.insLatency = PERFSCORE_LATENCY_7C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

default:
{
assert((unsigned)ins < ArrLen(insThroughputInfos));
Expand Down
Loading
Loading