diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 2520864b707421..6717204ece3cff 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -708,6 +708,7 @@ class CodeGen final : public CodeGenInterface void genAmd64EmitterUnitTestsSse2(); void genAmd64EmitterUnitTestsApx(); void genAmd64EmitterUnitTestsAvx10v2(); + void genAmd64EmitterUnitTestsCCMP(); #endif #endif // defined(DEBUG) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 945f468d86fa81..cb895c1d71fa29 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2716,6 +2716,10 @@ void CodeGen::genEmitterUnitTests() { genAmd64EmitterUnitTestsAvx10v2(); } + if (unitTestSectionAll || (strstr(unitTestSection, "ccmp") != nullptr)) + { + genAmd64EmitterUnitTestsCCMP(); + } #elif defined(TARGET_ARM64) if (unitTestSectionAll || (strstr(unitTestSection, "general") != nullptr)) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index e0da7904ecc688..f9a5b1f896d253 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -9490,6 +9490,84 @@ void CodeGen::genAmd64EmitterUnitTestsAvx10v2() theEmitter->emitIns_R_R(INS_vmovw, EA_16BYTE, REG_XMM0, REG_XMM1); } +/***************************************************************************** + * Unit tests for the CCMP instructions. + */ + +void CodeGen::genAmd64EmitterUnitTestsCCMP() +{ + emitter* theEmitter = GetEmitter(); + genDefineTempLabel(genCreateTempLabel()); + + // ============ + // Test RR form + // ============ + + // Test all sizes + theEmitter->emitIns_R_R(INS_ccmpe, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_ccmpe, EA_8BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_ccmpe, EA_2BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_ccmpe, EA_1BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + + // Test all CC codes + for (uint32_t ins = INS_FIRST_CCMP_INSTRUCTION + 1; ins < INS_LAST_CCMP_INSTRUCTION; ins++) + { + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + } + + // Test all dfv + for (int i = 0; i < 16; i++) + { + theEmitter->emitIns_R_R(INS_ccmpe, EA_4BYTE, REG_RAX, REG_RCX, (insOpts)(i << INS_OPTS_EVEX_dfv_byte_offset)); + } + + // ============ + // Test RS form + // ============ + + // Test all sizes + theEmitter->emitIns_R_S(INS_ccmpe, EA_4BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_S(INS_ccmpe, EA_8BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_S(INS_ccmpe, EA_2BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_S(INS_ccmpe, EA_1BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + + // Test all CC codes + for (uint32_t ins = INS_FIRST_CCMP_INSTRUCTION + 1; ins < INS_LAST_CCMP_INSTRUCTION; ins++) + { + theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_RAX, 0, 0, INS_OPTS_EVEX_dfv_cf); + } + + // Test all dfv + for (int i = 0; i < 16; i++) + { + theEmitter->emitIns_R_S(INS_ccmpe, EA_4BYTE, REG_RAX, 0, 0, (insOpts)(i << INS_OPTS_EVEX_dfv_byte_offset)); + } + + // ============ + // Test RI form (test small and large sizes and constants) + // ============ + + theEmitter->emitIns_R_I(INS_ccmpe, EA_4BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_ccmpe, EA_4BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_ccmpe, EA_8BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_ccmpe, EA_8BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_ccmpe, EA_2BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_ccmpe, EA_2BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_ccmpe, EA_1BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_ccmpe, EA_1BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + // ============ + // Test RC form + // ============ + + CORINFO_FIELD_HANDLE hnd = theEmitter->emitFltOrDblConst(1.0f, EA_4BYTE); + theEmitter->emitIns_R_C(INS_ccmpe, EA_4BYTE, REG_RAX, hnd, 0, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_C(INS_ccmpe, EA_4BYTE, REG_RAX, hnd, 4, INS_OPTS_EVEX_dfv_cf); +} + #endif // defined(DEBUG) && defined(TARGET_AMD64) #ifdef PROFILING_SUPPORTED diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 55728540cd2036..5574fc0b439194 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -803,6 +803,9 @@ class emitter #define _idEvexNdContext _idCustom5 /* bits used for the APX-EVEX.nd context for promoted legacy instructions */ #define _idEvexNfContext _idCustom6 /* bits used for the APX-EVEX.nf context for promoted legacy/vex instructions */ + // We repurpose 4 bits for the default flag value bits for ccmp instructions. +#define _idEvexDFV (_idCustom4 << 3) | (_idCustom3 << 2) | (_idCustom2 << 1) | _idCustom1 + // In certian cases, we do not allow instructions to be promoted to APX-EVEX. // e.g. instructions like add/and/or/inc/dec can be used with LOCK prefix, but cannot be prefixed by LOCK and // EVEX together. @@ -1753,6 +1756,23 @@ class emitter assert(!idIsNoApxEvexPromotion()); _idNoApxEvexXPromotion = 1; } + + unsigned idGetEvexDFV() const + { + return _idEvexDFV; + } + + void idSetEvexDFV(insOpts instOptions) + { + unsigned value = static_cast((instOptions & INS_OPTS_EVEX_dfv_MASK) >> 8); + + _idCustom1 = ((value >> 0) & 1); + _idCustom2 = ((value >> 1) & 1); + _idCustom3 = ((value >> 2) & 1); + _idCustom4 = ((value >> 3) & 1); + + assert(value == idGetEvexDFV()); + } #endif #ifdef TARGET_ARMARCH diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 273dff1367bb54..ad9eace6712c3f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -85,6 +85,11 @@ bool emitter::IsAvx512OnlyInstruction(instruction ins) return (ins >= INS_FIRST_AVX512_INSTRUCTION) && (ins <= INS_LAST_AVX512_INSTRUCTION); } +bool emitter::IsApxOnlyInstruction(instruction ins) +{ + return (ins >= INS_FIRST_APX_INSTRUCTION) && (ins <= INS_LAST_APX_INSTRUCTION); +} + bool emitter::IsFMAInstruction(instruction ins) { return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION); @@ -359,7 +364,17 @@ bool emitter::IsApxExtendedEvexInstruction(instruction ins) const return false; } - return HasApxNdd(ins) || HasApxNf(ins); + if (HasApxNdd(ins) || HasApxNf(ins)) + { + return true; + } + + if (IsApxOnlyInstruction(ins)) + { + return true; + } + + return false; } //------------------------------------------------------------------------ @@ -777,6 +792,65 @@ bool emitter::DoJitUseApxNDD(instruction ins) const #endif } +inline bool emitter::IsCCMP(instruction ins) +{ + return (ins > INS_FIRST_CCMP_INSTRUCTION && ins < INS_LAST_CCMP_INSTRUCTION); +} + +//------------------------------------------------------------------------ +// GetCCFromCCMP: Get a condition code from a ccmp instruction +// +// Arguments: +// ins - The instruction to check. +// +// Returns: +// `insCC` representing the condition code for a ccmp instruction. +// ccmpx instructions share the same instruction encoding unlike +// other x86 status bit instructions and instead have a CC coded into +// the EVEX prefix. +// +inline insCC emitter::GetCCFromCCMP(instruction ins) +{ + assert(IsCCMP(ins)); + switch (ins) + { + case INS_ccmpo: + return INS_CC_O; + case INS_ccmpno: + return INS_CC_NO; + case INS_ccmpb: + return INS_CC_B; + case INS_ccmpae: + return INS_CC_AE; + case INS_ccmpe: + return INS_CC_E; + case INS_ccmpne: + return INS_CC_NE; + case INS_ccmpbe: + return INS_CC_BE; + case INS_ccmpa: + return INS_CC_A; + case INS_ccmps: + return INS_CC_S; + case INS_ccmpns: + return INS_CC_NS; + case INS_ccmpt: + return INS_CC_TRUE; + case INS_ccmpf: + return INS_CC_FALSE; + case INS_ccmpl: + return INS_CC_L; + case INS_ccmpge: + return INS_CC_GE; + case INS_ccmple: + return INS_CC_LE; + case INS_ccmpg: + return INS_CC_G; + default: + unreached(); + } +} + #ifdef TARGET_64BIT //------------------------------------------------------------------------ // AreUpperBitsZero: check if some previously emitted @@ -1778,6 +1852,10 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const return true; } #endif // DEBUG + if (IsApxOnlyInstruction(ins)) + { + return true; + } return false; } @@ -1897,6 +1975,14 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt // it to EVEX when needed with some helper functions. code &= 0xFF7FFFFFFFFFFFFFULL; } +#ifdef TARGET_AMD64 + if (IsCCMP(ins)) + { + code &= 0xFFFF87F0FFFFFFFF; + code |= ((size_t)id->idGetEvexDFV()) << 43; + code |= ((size_t)GetCCFromCCMP(ins)) << 32; + } +#endif return code; } @@ -2004,6 +2090,12 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt default: { +#ifdef TARGET_AMD64 + if (IsCCMP(id->idIns())) // Special case for conditional ins such as CCMP, CCMOV + { + break; + } +#endif unsigned aaaContext = id->idGetEvexAaaContext(); if (aaaContext != 0) @@ -6843,6 +6935,7 @@ void emitter::emitIns_R_I(instruction ins, #endif SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); if (isSimdInsAndValInByte) { @@ -7640,6 +7733,7 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum SetEvexNdIfNeeded(id, instOptions); SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); if (id->idIsEvexNdContextSet() && IsApxNDDEncodableInstruction(ins)) { @@ -8562,6 +8656,7 @@ void emitter::emitIns_R_C( { SetEvexBroadcastIfNeeded(id, instOptions); SetEvexEmbMaskIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); sz = emitInsSizeCV(id, insCodeRM(ins)); } @@ -10492,6 +10587,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int va SetEvexBroadcastIfNeeded(id, instOptions); SetEvexEmbMaskIfNeeded(id, instOptions); SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs); id->idCodeSize(sz); @@ -12394,6 +12490,27 @@ void emitter::emitDispIns( sstr = codeGen->genInsDisplayName(id); printf(" %-9s", sstr); +#ifdef TARGET_AMD64 + if (IsCCMP(id->idIns())) + { + // print finite set notation for DFV + unsigned dfv = id->idGetEvexDFV(); + char dfvstr[20] = {0}; + int len = 0; + if (dfv & INS_FLAGS_OF) + len += snprintf(dfvstr + len, 4, "of,"); + if (dfv & INS_FLAGS_SF) + len += snprintf(dfvstr + len, 4, "sf,"); + if (dfv & INS_FLAGS_ZF) + len += snprintf(dfvstr + len, 4, "zf,"); + if (dfv & INS_FLAGS_CF) + len += snprintf(dfvstr + len, 4, "cf,"); + if (len) + dfvstr[len - 1] = 0; + printf("{dfv=%s} ", dfvstr); + } +#endif // TARGET_AMD64 + #ifndef HOST_UNIX if (strnlen_s(sstr, 10) >= 9) #else // HOST_UNIX @@ -16423,7 +16540,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // So the logic should be: // checking if those new features are used, then check if EGPRs are involved. // EGPRs will be supported by EVEX anyway, so don't need to check in the first place. - assert(!TakesSimdPrefix(id)); + assert(!TakesSimdPrefix(id) || TakesApxExtendedEvexPrefix(id)); code = insCodeMR(ins); code = AddX86PrefixIfNeeded(id, code, size); code = insEncodeMRreg(id, code); @@ -16444,7 +16561,6 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) // Output a size prefix for a 16-bit operand if (TakesApxExtendedEvexPrefix(id)) { - assert(IsApxExtendedEvexInstruction(ins)); assert(hasEvexPrefix(code)); // Evex.pp should already be added when adding the prefix. assert((code & EXTENDED_EVEX_PP_BITS) != 0); @@ -16453,10 +16569,21 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) { dst += emitOutputByte(dst, 0x66); } - FALLTHROUGH; + + code |= 0x1; + break; case EA_4BYTE: // Set the 'w' bit to get the large version + +#ifdef TARGET_AMD64 + if (TakesApxExtendedEvexPrefix(id)) + { + assert(hasEvexPrefix(code)); + // Evex.pp should already be added when adding the prefix + assert((code & EXTENDED_EVEX_PP_BITS) == 0); + } +#endif code |= 0x1; break; @@ -16516,7 +16643,11 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) regCode = insEncodeReg012(id, reg2, size, &code); } +#ifdef TARGET_AMD64 + if (TakesSimdPrefix(id) && !IsCCMP(ins)) +#else if (TakesSimdPrefix(id)) +#endif { // In case of AVX instructions that take 3 operands, we generally want to encode reg1 // as first source. In this case, reg1 is both a source and a destination. @@ -19951,6 +20082,26 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cmovge: case INS_cmovle: case INS_cmovg: +#ifdef TARGET_AMD64 + // todo-xarch-apx: we need to double check the logic for ccmp + case INS_ccmpo: + case INS_ccmpno: + case INS_ccmpb: + case INS_ccmpae: + case INS_ccmpe: + case INS_ccmpne: + case INS_ccmpbe: + case INS_ccmpa: + case INS_ccmps: + case INS_ccmpns: + case INS_ccmpt: + case INS_ccmpf: + case INS_ccmpl: + case INS_ccmpge: + case INS_ccmple: + case INS_ccmpg: +#endif + if (memFmt == IF_NONE) { result.insThroughput = PERFSCORE_THROUGHPUT_4X; diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 62f1d580ae0f9b..93bd131f5a3472 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -128,6 +128,7 @@ static bool IsAVXVNNIInstruction(instruction ins); static bool IsBMIInstruction(instruction ins); static bool IsKInstruction(instruction ins); static bool IsKInstructionWithLBit(instruction ins); +static bool IsApxOnlyInstruction(instruction ins); static regNumber getBmiRegNumber(instruction ins); static regNumber getSseShiftRegNumber(instruction ins); @@ -571,6 +572,25 @@ void SetEvexNfIfNeeded(instrDesc* id, insOpts instOptions) } } +//------------------------------------------------------------------------ +// SetEvexDFVIfNeeded: set default flag values on an instrDesc +// +// Arguments: +// id - instruction descriptor +// instOptions - emit options +// +void SetEvexDFVIfNeeded(instrDesc* id, insOpts instOptions) +{ +#if defined(TARGET_AMD64) + if ((instOptions & INS_OPTS_EVEX_dfv_MASK) != 0) + { + assert(UsePromotedEVEXEncoding()); + assert(IsCCMP(id->idIns())); + id->idSetEvexDFV(instOptions); + } +#endif +} + //------------------------------------------------------------------------ // AddSimdPrefixIfNeeded: Add the correct SIMD prefix. // Check if the prefix already exists befpre adding. @@ -683,6 +703,9 @@ static bool IsRexW1Instruction(instruction ins); static bool IsRexWXInstruction(instruction ins); static bool IsRexW1EvexInstruction(instruction ins); +static bool IsCCMP(instruction ins); +static insCC GetCCFromCCMP(instruction ins); + bool isAvx512Blendv(instruction ins) { return ins == INS_vblendmps || ins == INS_vblendmpd || ins == INS_vpblendmb || ins == INS_vpblendmd || diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 098afb0238cdd5..d5d14b3cd5df08 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -9523,7 +9523,71 @@ enum insCflags : unsigned INS_FLAGS_NZC, INS_FLAGS_NZCV, }; +#elif defined(TARGET_XARCH) +enum insCflags : unsigned +{ + INS_FLAGS_NONE = 0x0, + INS_FLAGS_CF = 0x1, + INS_FLAGS_ZF = 0x2, + INS_FLAGS_SF = 0x4, + INS_FLAGS_OF = 0x8 +}; + +// todo-apx-xarch : this data structure might not be necessary, but nice to have the CC +// encoded somewhere +enum insCC : unsigned +{ + INS_CC_O = 0x0, // OF = 1 + + INS_CC_NO = 0x1, // OF = 0 + + INS_CC_B = 0x2, // CF = 1 + INS_CC_C = 0x2, // CF = 1 + INS_CC_NAE = 0x2, // CF = 1 + + INS_CC_NB = 0x3, // CF = 0 + INS_CC_NC = 0x3, // CF = 0 + INS_CC_AE = 0x3, // CF = 0 + + INS_CC_E = 0x4, // ZF = 1 + INS_CC_Z = 0x4, // ZF = 1 + + INS_CC_NE = 0x5, // ZF = 0 + INS_CC_NZ = 0x5, // ZF = 0 + + INS_CC_BE = 0x6, // (CF OR ZF) = 1 + INS_CC_NA = 0x6, // (CF OR ZF) = 1 + + INS_CC_NBE = 0x7, // (CF OR ZF) = 0 + INS_CC_A = 0x7, // (CF OR ZF) = 0 + INS_CC_S = 0x8, // (SF = 1) + + INS_CC_NS = 0x9, // (SF = 0) + + // no parity flag in ccmp/ctest + + // 0b1010 special always evals to true + INS_CC_TRUE = 0xA, + + // 0b1011 special always evals to false + INS_CC_FALSE = 0xB, + + INS_CC_L = 0xC, // (SF XOR OF) = 1 + INS_CC_NGE = 0xC, // (SF XOR OF) = 1 + + INS_CC_NL = 0xD, // (SF XOR OF) = 0 + INS_CC_GE = 0xD, // (SF XOR OF) = 0 + + INS_CC_LE = 0xE, // (SF XOR OF) OR ZF) = 1 + INS_CC_NG = 0xE, // (SF XOR OF) OR ZF) = 1 + + INS_CC_NLE = 0xF, // (SF XOR OF) OR ZF) = 0 + INS_CC_G = 0xF, // (SF XOR OF) OR ZF) = 0 +}; +#endif + +#if defined(TARGET_ARM64) struct GenTreeCCMP final : public GenTreeOpCC { insCflags gtFlagsVal; diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 3ff3708785cd4b..e90102caf241fe 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -275,8 +275,16 @@ enum insOpts: unsigned INS_OPTS_EVEX_nf_MASK = 0x80, // mask for APX-EVEX.nf related features INS_OPTS_EVEX_nf = 1 << 7, // NDD form for legacy instructions + INS_OPTS_EVEX_dfv_byte_offset = 8, // save the bit offset for first dfv flag pos - INS_OPTS_EVEX_NoApxPromotion = 1 << 8, // Do not promote to APX-EVEX + INS_OPTS_EVEX_dfv_cf = 1 << 8, + INS_OPTS_EVEX_dfv_zf = 1 << 9, + INS_OPTS_EVEX_dfv_sf = 1 << 10, + INS_OPTS_EVEX_dfv_of = 1 << 11, + + INS_OPTS_EVEX_dfv_MASK = 0xF00, + + INS_OPTS_EVEX_NoApxPromotion = 1 << 12, // Do not promote to APX-EVEX }; diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index b8c00041ae97ac..5957b4deb9799b 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -147,6 +147,8 @@ INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, #endif // TARGET_AMD64 + + // the hex codes in this file represent the instruction encoding as follows: // 0x0000ff00 - modrm byte position // 0x000000ff - last byte of opcode (before modrm) @@ -936,6 +938,34 @@ INST3(vpdpbsuds, "pdpbsuds", IUM_WR, BAD_CODE, BAD_ INST3(vpdpbuud, "pdpbuud", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x50), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results INST3(vpdpbuuds, "pdpbuuds", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0x00, 0x51), INS_TT_FULL, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply individual bytes of first source operand with individual bytes of second source operand and add the results INST3(LAST_AVX10v2_INSTRUCTION, "LAST_AVX10v2_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) +// id nm um mr mi rm tt flags + +INST3(FIRST_APX_INSTRUCTION, "FIRST_APX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + +INST3(FIRST_CCMP_INSTRUCTION, "FIRST_CCMP_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + +INST3(ccmpo, "ccmpo", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpno, "ccmpno", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpb, "ccmpb", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpae, "ccmpae", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpe, "ccmpe", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpne, "ccmpne", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpbe, "ccmpbe", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpa, "ccmpa", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmps, "ccmps", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpns, "ccmpns", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpt, "ccmpt", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpf, "ccmpf", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpl, "ccmpl", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpge, "ccmpge", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmple, "ccmple", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ccmpg, "ccmpg", IUM_RD, 0x000038, 0x0003880, 0x00003A, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) + +INST3(LAST_CCMP_INSTRUCTION, "LAST_CCMP_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + +INST3(LAST_APX_INSTRUCTION, "LAST_APX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) + + // Scalar instructions in SSE4.2 INST3(crc32, "crc32", IUM_RW, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF0), INS_TT_NONE, INS_FLAGS_None)