diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index b099e7df8cba1e..ef668b59cd780f 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1338,7 +1338,7 @@ class CodeGen final : public CodeGenInterface void inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock); #endif - void inst_SET(emitJumpKind condition, regNumber reg); + void inst_SET(emitJumpKind condition, regNumber reg, insOpts instOptions = INS_OPTS_NONE); void inst_RV(instruction ins, regNumber reg, var_types type, emitAttr size = EA_UNKNOWN); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 350f28f92f44f3..9ef4bf7a04c10b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1807,19 +1807,29 @@ void CodeGen::inst_SETCC(GenCondition condition, var_types type, regNumber dstRe assert(varTypeIsIntegral(type)); assert(genIsValidIntReg(dstReg) && isByteReg(dstReg)); - const GenConditionDesc& desc = GenConditionDesc::Get(condition); + const GenConditionDesc& desc = GenConditionDesc::Get(condition); + insOpts instOptions = INS_OPTS_NONE; - inst_SET(desc.jumpKind1, dstReg); + bool needsMovzx = !varTypeIsByte(type); + if (needsMovzx && compiler->canUseApxEvexEncoding() && JitConfig.EnableApxZU()) + { + instOptions = INS_OPTS_EVEX_zu; + needsMovzx = false; + } + + inst_SET(desc.jumpKind1, dstReg, instOptions); if (desc.oper != GT_NONE) { BasicBlock* labelNext = genCreateTempLabel(); inst_JMP((desc.oper == GT_OR) ? desc.jumpKind1 : emitter::emitReverseJumpKind(desc.jumpKind1), labelNext); - inst_SET(desc.jumpKind2, dstReg); + inst_SET(desc.jumpKind2, dstReg, instOptions); genDefineTempLabel(labelNext); } - if (!varTypeIsByte(type)) + // we can apply EVEX.ZU to avoid this movzx. + // TODO-XArch-apx: evaluate setcc + movzx and xor + set + if (needsMovzx) { GetEmitter()->emitIns_Mov(INS_movzx, EA_1BYTE, dstReg, dstReg, /* canSkip */ false); } @@ -9450,6 +9460,8 @@ void CodeGen::genAmd64EmitterUnitTestsApx() theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM0, false); theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false); + + theEmitter->emitIns_R(INS_seto_apx, EA_1BYTE, REG_R11, INS_OPTS_EVEX_zu); } void CodeGen::genAmd64EmitterUnitTestsAvx10v2() @@ -10351,7 +10363,7 @@ void CodeGen::genPushCalleeSavedRegisters() #endif // DEBUG #ifdef TARGET_AMD64 - if (compiler->canUseApxEncoding() && compiler->canUseEvexEncoding() && JitConfig.EnableApxPPX()) + if (compiler->canUseApxEvexEncoding() && JitConfig.EnableApxPPX()) { genPushCalleeSavedRegistersFromMaskAPX(rsPushRegs); return; @@ -10477,7 +10489,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) return; } - if (compiler->canUseApxEncoding() && compiler->canUseEvexEncoding() && JitConfig.EnableApxPPX()) + if (compiler->canUseApxEvexEncoding() && JitConfig.EnableApxPPX()) { regMaskTP rsPopRegs = regSet.rsGetModifiedIntCalleeSavedRegsMask(); const unsigned popCount = genPopCalleeSavedRegistersFromMaskAPX(rsPopRegs); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 70cca1ec6cac0e..e79e8d29f3ed0b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9678,6 +9678,17 @@ class Compiler return compOpportunisticallyDependsOn(InstructionSet_APX); } + //------------------------------------------------------------------------ + // canUseApxEvexEncoding - Answer the question: Are APX-EVEX encodings supported on this target. + // + // Returns: + // `true` if APX-EVEX encoding is supported, `false` if not. + // + bool canUseApxEvexEncoding() const + { + return canUseApxEncoding() && canUseEvexEncoding(); + } + private: //------------------------------------------------------------------------ // DoJitStressEvexEncoding- Answer the question: Do we force EVEX encoding. diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 57190961499b66..ec7fffec071bba 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -846,6 +846,8 @@ class emitter #define _idEvexbContext (_idCustom6 << 1) | _idCustom5 /* Evex.b: embedded broadcast, rounding, SAE */ #define _idEvexNdContext _idCustom5 /* bits used for the APX-EVEX.nd context for promoted legacy instructions */ +#define _idEvexZuContext _idCustom5 /* bits used for the APX-EVEX.zu context for promoted legacy instructions */ + #define _idEvexNfContext _idCustom6 /* bits used for the APX-EVEX.nf context for promoted legacy/vex instructions */ // We repurpose 4 bits for the default flag value bits for ccmp instructions. @@ -1793,15 +1795,28 @@ class emitter bool idIsEvexNdContextSet() const { + assert(IsApxNddCompatibleInstruction(_idIns)); return _idEvexNdContext != 0; } + bool idIsEvexZuContextSet() const + { + assert(IsApxZuCompatibleInstruction(_idIns)); + return (_idEvexZuContext != 0); + } + void idSetEvexNdContext() { assert(!idIsEvexNdContextSet()); _idEvexNdContext = 1; } + void idSetEvexZuContext() + { + assert(!idIsEvexZuContextSet()); + _idEvexZuContext = 1; + } + bool idIsEvexNfContextSet() const { return _idEvexNfContext != 0; diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 9d561ef924eac7..f776d7defe4d1a 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -87,7 +87,10 @@ bool emitter::IsAvx512OnlyInstruction(instruction ins) bool emitter::IsApxOnlyInstruction(instruction ins) { +#ifdef TARGET_AMD64 return (ins >= FIRST_APX_INSTRUCTION) && (ins <= LAST_APX_INSTRUCTION); +#endif // TARGET_AMD64 + return false; } bool emitter::IsAVXVNNIFamilyInstruction(instruction ins) @@ -255,18 +258,55 @@ bool emitter::HasRex2Encoding(instruction ins) return (flags & Encoding_REX2) != 0; } -bool emitter::HasApxNdd(instruction ins) +//------------------------------------------------------------------------ +// IsApxNddCompatibleInstruction: Is this a APX-EVEX.ND compatible instruction? +// +// Arguments: +// ins - The instruction to check. +// +// Returns: +// `true` if it is a APX-EVEX.ND compatible instruction. +// +bool emitter::IsApxNddCompatibleInstruction(instruction ins) { insFlags flags = CodeGenInterface::instInfo[ins]; return (flags & INS_Flags_Has_NDD) != 0; } -bool emitter::HasApxNf(instruction ins) +//------------------------------------------------------------------------ +// IsApxNfCompatibleInstruction: Is this a APX-EVEX.NF compatible instruction? +// +// Arguments: +// ins - The instruction to check. +// +// Returns: +// `true` if it is a APX-EVEX.NF compatible instruction. +// +bool emitter::IsApxNfCompatibleInstruction(instruction ins) { insFlags flags = CodeGenInterface::instInfo[ins]; return (flags & INS_Flags_Has_NF) != 0; } +//------------------------------------------------------------------------ +// IsApxZuCompatibleInstruction: Is this a APX-EVEX.ZU compatible instruction? +// +// Arguments: +// ins - The instruction to check. +// +// Returns: +// `true` if it is a APX-EVEX.ZU compatible instruction. +// +bool emitter::IsApxZuCompatibleInstruction(instruction ins) +{ +#ifdef TARGET_AMD64 + // For now, we only have SETZUcc enabled for EVEX.ZU. + return ((ins >= INS_seto_apx) && (ins <= INS_setg_apx)); +#else + return false; +#endif +} + bool emitter::IsVexEncodableInstruction(instruction ins) const { if (!UseVEXEncoding()) @@ -417,7 +457,7 @@ bool emitter::IsRex2EncodableInstruction(instruction ins) const } //------------------------------------------------------------------------ -// IsApxNDDEncodableInstruction: Answer the question- does this instruction have apx ndd form. +// IsApxNddEncodableInstruction: Answer the question- does this instruction have apx ndd form. // // Arguments: // ins - The instruction to check. @@ -425,18 +465,18 @@ bool emitter::IsRex2EncodableInstruction(instruction ins) const // Returns: // `true` if ins has apx ndd form. // -bool emitter::IsApxNDDEncodableInstruction(instruction ins) const +bool emitter::IsApxNddEncodableInstruction(instruction ins) const { if (!UsePromotedEVEXEncoding()) { return false; } - return HasApxNdd(ins); + return IsApxNddCompatibleInstruction(ins); } //------------------------------------------------------------------------ -// IsApxNFEncodableInstruction: Answer the question - does this instruction have Evex.nf supported +// IsApxNfEncodableInstruction: Answer the question - does this instruction have Evex.nf supported // // Arguments: // ins - The instruction to check. @@ -444,14 +484,14 @@ bool emitter::IsApxNDDEncodableInstruction(instruction ins) const // Returns: // `true` if ins is Evex.nf supported. // -bool emitter::IsApxNFEncodableInstruction(instruction ins) const +bool emitter::IsApxNfEncodableInstruction(instruction ins) const { if (!UsePromotedEVEXEncoding()) { return false; } - return HasApxNf(ins); + return IsApxNfCompatibleInstruction(ins); } //------------------------------------------------------------------------ @@ -471,14 +511,13 @@ bool emitter::IsApxExtendedEvexInstruction(instruction ins) const return false; } - if (HasApxNdd(ins) || HasApxNf(ins)) + if (IsApxNddCompatibleInstruction(ins)) { return true; } - if (ins == INS_crc32_apx || ins == INS_movbe_apx) + if (IsApxNfCompatibleInstruction(ins)) { - // With the new opcode, CRC32 is promoted to EVEX with APX. return true; } @@ -898,7 +937,7 @@ bool emitter::DoJitUseApxNDD(instruction ins) const #if !defined(TARGET_AMD64) return false; #else - return JitConfig.EnableApxNDD() && IsApxNDDEncodableInstruction(ins); + return JitConfig.EnableApxNDD() && IsApxNddEncodableInstruction(ins); #endif } @@ -1988,18 +2027,15 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const return false; } - if (id->idIsEvexNdContextSet()) + if (IsApxNddCompatibleInstruction(ins) && id->idIsEvexNdContextSet()) { + // The instruction uses APX-ND hint, and it requires EVEX. return true; } - if (id->idIsEvexNfContextSet()) - { - return true; - } - - if (ins == INS_crc32_apx || ins == INS_movbe_apx) + if (IsApxNfCompatibleInstruction(ins) && id->idIsEvexNfContextSet()) { + // The instruction uses APX-NF hint, and it requires EVEX. return true; } @@ -2106,12 +2142,18 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt // TODO-XArch-APX: // verify if it is actually safe to reuse the EVEX.ND with EVEX.B on instrDesc. - if (id->idIsEvexNdContextSet()) + if (IsApxNddCompatibleInstruction(ins) && id->idIsEvexNdContextSet()) + { + code |= ND_BIT_IN_BYTE_EVEX_PREFIX; + } + + if (IsApxZuCompatibleInstruction(ins) && id->idIsEvexZuContextSet()) { + // EVEX.ZU reuses the EVEX.ND bit for SETcc and IMUL. code |= ND_BIT_IN_BYTE_EVEX_PREFIX; } - if (id->idIsEvexNfContextSet()) + if (IsApxNfCompatibleInstruction(ins) && id->idIsEvexNfContextSet()) { code |= NF_BIT_IN_BYTE_EVEX_PREFIX; } @@ -2124,11 +2166,6 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt if (instrIsExtendedReg3opImul(ins)) { // EVEX.R3 - // TODO-XArch-APX: - // A few side notes: based on how JIT defined IMUL, we may need to extend - // the definition to `IMUL_31` to cover EGPRs. And it can be defined in a - // similar way that opcodes comes with built-in REX2 prefix, and convert - // it to EVEX when needed with some helper functions. code &= 0xFF7FFFFFFFFFFFFFULL; } #ifdef TARGET_AMD64 @@ -2968,7 +3005,7 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co // // 00 - None (0F - packed float) // 01 - 66 (66 0F - packed double) - // 10 - F3 (F3 0F - scalar float + // 10 - F3 (F3 0F - scalar float) // 11 - F2 (F2 0F - scalar double) switch (sizePrefix) { @@ -3040,8 +3077,9 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co // 1. An escape byte 0F (For isa before AVX10.2) // 2. A map number from 0 to 7 (For AVX10.2 and above) leadingBytes = check; - assert(leadingBytes == 0x0F || (emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) && - leadingBytes >= 0x00 && leadingBytes <= 0x07)); + assert((leadingBytes == 0x0F) || ((emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) || + (emitComp->compIsaSupportedDebugOnly(InstructionSet_APX))) && + (leadingBytes >= 0x00) && (leadingBytes <= 0x07))); // Get rid of both sizePrefix and escape byte code &= 0x0000FFFFLL; @@ -3112,6 +3150,13 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co break; } + case 0x04: + { + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_APX)); + evexPrefix |= (0x04 << 16); + break; + } + case 0x05: { assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX10v2)); @@ -3122,7 +3167,6 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co case 0x01: case 0x02: case 0x03: - case 0x04: case 0x06: case 0x07: default: @@ -3896,7 +3940,7 @@ inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat #ifdef TARGET_AMD64 if (useNDD) { - assert(IsApxNDDEncodableInstruction(ins)); + assert(IsApxNddEncodableInstruction(ins)); if (ins == INS_rcl_N || ins == INS_rcr_N || ins == INS_rol_N || ins == INS_ror_N || ins == INS_shl_N || ins == INS_shr_N || ins == INS_sar_N) { @@ -6306,7 +6350,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G #else if (useNDD) { - assert(IsApxNDDEncodableInstruction(ins)); + assert(IsApxNddEncodableInstruction(ins)); // targetReg has to be an actual register if using NDD. assert(targetReg < REG_STK); // make sure target register is not either of the src registers. @@ -6918,6 +6962,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg, insOpts i } SetEvexNfIfNeeded(id, instOptions); + SetEvexZuIfNeeded(id, instOptions); // Vex bytes sz += emitGetAdjustedSize(id, insEncodeMRreg(id, reg, attr, insCodeMR(ins))); @@ -7936,7 +7981,7 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum } // Checking EVEX.ND and NDD compatibility together in case the ND slot is overridden by other features. - bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNDDEncodableInstruction(ins); + bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNddEncodableInstruction(ins); emitAttr size = EA_SIZE(attr); @@ -7983,7 +8028,7 @@ void emitter::emitIns_R_R_I( instrDesc* id = emitNewInstrSC(attr, ival); // Checking EVEX.ND and NDD compatibility together in case the ND slot is overridden by other features. - bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNDDEncodableInstruction(ins); + bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNddEncodableInstruction(ins); id->idIns(ins); id->idInsFmt(emitInsModeFormat(ins, IF_RRD_RRD_CNS, useNDD)); @@ -8381,7 +8426,7 @@ void emitter::emitIns_R_R_R( assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins) || IsApxExtendedEvexInstruction(ins)); // Checking EVEX.ND and NDD compatibility together in case the ND slot is overridden by other features. - bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNDDEncodableInstruction(ins); + bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNddEncodableInstruction(ins); instrDesc* id = emitNewInstr(attr); id->idIns(ins); @@ -8411,7 +8456,7 @@ void emitter::emitIns_R_R_S( instrDesc* id = emitNewInstr(attr); // Checking EVEX.ND and NDD compatibility together in case the ND slot is overridden by other features. - bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNDDEncodableInstruction(ins); + bool useNDD = ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) && IsApxNddEncodableInstruction(ins); id->idIns(ins); id->idInsFmt((ins == INS_mulx) ? IF_RWR_RWR_SRD : emitInsModeFormat(ins, IF_RRD_RRD_SRD, useNDD)); @@ -12751,7 +12796,7 @@ void emitter::emitDispIns( /* Display the instruction name */ #ifdef TARGET_AMD64 - if (IsApxNFEncodableInstruction(id->idIns()) && id->idIsEvexNfContextSet()) + if (IsApxNfEncodableInstruction(id->idIns()) && id->idIsEvexNfContextSet()) { // print the EVEX.NF indication in psudeo prefix style. printf("{nf} "); @@ -15520,7 +15565,6 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // // assert(!TryEvexCompressDisp8Byte(id, dsp, &compressedDsp, &dspInByte)); #endif - dspInByte = false; } else @@ -16426,20 +16470,22 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) case INS_setge: case INS_setle: case INS_setg: - + { assert(id->idGCref() == GCT_NONE); assert(size == EA_1BYTE); - code = insEncodeMRreg(id, reg, EA_1BYTE, insCodeMR(ins)); + code = insCodeMR(ins); if (TakesRex2Prefix(id)) { code = AddRex2Prefix(ins, code); + code = insEncodeMRreg(id, reg, EA_1BYTE, code); dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); dst += emitOutputWord(dst, code & 0x0000FFFF); } else { + code = insEncodeMRreg(id, reg, EA_1BYTE, code); // Output the REX prefix dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); // We expect this to always be a 'big' opcode @@ -16449,6 +16495,37 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) dst += emitOutputWord(dst, code & 0x0000FFFF); } break; + } + +#ifdef TARGET_AMD64 + case INS_seto_apx: + case INS_setno_apx: + case INS_setb_apx: + case INS_setae_apx: + case INS_sete_apx: + case INS_setne_apx: + case INS_setbe_apx: + case INS_seta_apx: + case INS_sets_apx: + case INS_setns_apx: + case INS_setp_apx: + case INS_setnp_apx: + case INS_setl_apx: + case INS_setge_apx: + case INS_setle_apx: + case INS_setg_apx: + { + assert(TakesApxExtendedEvexPrefix(id)); + assert(size == EA_1BYTE); + + code = insCodeMR(ins); + code = AddEvexPrefix(id, code, size); + code = insEncodeMRreg(id, reg, EA_1BYTE, code); + dst += emitOutputRexOrSimdPrefixIfNeeded(ins, dst, code); + dst += emitOutputWord(dst, code & 0x0000FFFF); + break; + } +#endif case INS_mulEAX: case INS_imulEAX: @@ -16800,7 +16877,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } unsigned regCode; - if (!id->idIsEvexNdContextSet() || !IsApxNDDEncodableInstruction(ins)) + if (!IsApxNddEncodableInstruction(ins) || !id->idIsEvexNdContextSet()) { regCode = insEncodeReg345(id, regFor345Bits, size, &code); regCode |= insEncodeReg012(id, regFor012Bits, size, &code); @@ -16872,7 +16949,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) dst += emitOutputByte(dst, (code >> 8) & 0xFF); dst += emitOutputByte(dst, (0xC0 | regCode)); } - else if (IsApxNDDEncodableInstruction(ins) && id->idIsEvexNdContextSet()) + else if (IsApxNddEncodableInstruction(ins) && id->idIsEvexNdContextSet()) { dst += emitOutputByte(dst, (code & 0xFF)); dst += emitOutputByte(dst, (0xC0 | regCode | (code >> 8))); @@ -19103,7 +19180,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { code = insCodeRM(ins); - if (id->idIsEvexNdContextSet() && TakesApxExtendedEvexPrefix(id)) + if (IsApxNddCompatibleInstruction(ins) && id->idIsEvexNdContextSet()) { // TODO-XArch-APX: // I'm not sure why instructions on this path can be with instruction @@ -19389,7 +19466,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { assert(IsVexOrEvexEncodableInstruction(ins) || IsApxExtendedEvexInstruction(ins)); - if (id->idIsEvexNdContextSet() && IsApxNDDEncodableInstruction(ins)) + if (IsApxNddEncodableInstruction(ins) && id->idIsEvexNdContextSet()) { // EVEX.vvvv has different semantic for APX-EVEX NDD instructions. code = insCodeRM(ins); @@ -20658,6 +20735,24 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_setge: case INS_setle: case INS_setg: +#ifdef TARGET_AMD64 + case INS_seto_apx: + case INS_setno_apx: + case INS_setb_apx: + case INS_setae_apx: + case INS_sete_apx: + case INS_setne_apx: + case INS_setbe_apx: + case INS_seta_apx: + case INS_sets_apx: + case INS_setns_apx: + case INS_setp_apx: + case INS_setnp_apx: + case INS_setl_apx: + case INS_setge_apx: + case INS_setle_apx: + case INS_setg_apx: +#endif { result.insLatency += PERFSCORE_LATENCY_1C; if (insFmt == IF_RRD) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 43c55b25727075..126d1a67a3593b 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -132,13 +132,14 @@ static bool IsApxOnlyInstruction(instruction ins); static regNumber getBmiRegNumber(instruction ins); static regNumber getSseShiftRegNumber(instruction ins); static bool HasRex2Encoding(instruction ins); -static bool HasApxNdd(instruction ins); -static bool HasApxNf(instruction ins); +static bool IsApxNddCompatibleInstruction(instruction ins); +static bool IsApxNfCompatibleInstruction(instruction ins); +static bool IsApxZuCompatibleInstruction(instruction ins); bool IsVexEncodableInstruction(instruction ins) const; bool IsEvexEncodableInstruction(instruction ins) const; bool IsRex2EncodableInstruction(instruction ins) const; -bool IsApxNDDEncodableInstruction(instruction ins) const; -bool IsApxNFEncodableInstruction(instruction ins) const; +bool IsApxNddEncodableInstruction(instruction ins) const; +bool IsApxNfEncodableInstruction(instruction ins) const; bool IsApxExtendedEvexInstruction(instruction ins) const; bool IsShiftInstruction(instruction ins) const; bool IsLegacyMap1(code_t code) const; @@ -572,7 +573,7 @@ void SetEvexNdIfNeeded(instrDesc* id, insOpts instOptions) if ((instOptions & INS_OPTS_EVEX_nd_MASK) != 0) { assert(UsePromotedEVEXEncoding()); - assert(IsApxNDDEncodableInstruction(id->idIns())); + assert(IsApxNddEncodableInstruction(id->idIns())); id->idSetEvexNdContext(); } else @@ -593,7 +594,7 @@ void SetEvexNfIfNeeded(instrDesc* id, insOpts instOptions) if ((instOptions & INS_OPTS_EVEX_nf_MASK) != 0) { assert(UsePromotedEVEXEncoding()); - assert(IsApxNFEncodableInstruction(id->idIns())); + assert(IsApxNfEncodableInstruction(id->idIns())); id->idSetEvexNfContext(); } else @@ -602,6 +603,33 @@ void SetEvexNfIfNeeded(instrDesc* id, insOpts instOptions) } } +//------------------------------------------------------------------------ +// SetEvexZuIfNeeded: set Evex.zu on instrDesc +// +// Arguments: +// id - instruction descriptor +// instOptions - emit options +// +void SetEvexZuIfNeeded(instrDesc* id, insOpts instOptions) +{ + if ((instOptions & INS_OPTS_EVEX_zu_MASK) != 0) + { + assert(UsePromotedEVEXEncoding()); + instruction ins = id->idIns(); +#ifdef TARGET_AMD64 + assert(IsApxZuCompatibleInstruction(ins)); +#else + // This method is not expected to be used on 32-bit systems. + unreached(); +#endif + id->idSetEvexZuContext(); + } + else + { + assert((instOptions & INS_OPTS_EVEX_zu_MASK) == 0); + } +} + //------------------------------------------------------------------------ // SetApxPpxIfNeeded: set APX.ppx on instrDesc // diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 5e4a775a49ecd2..77707dce951bf3 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -612,13 +612,12 @@ bool CodeGenInterface::instHasPseudoName(instruction ins) * Generate a set instruction. */ -void CodeGen::inst_SET(emitJumpKind condition, regNumber reg) +void CodeGen::inst_SET(emitJumpKind condition, regNumber reg, insOpts instOptions) { #ifdef TARGET_XARCH instruction ins; /* Convert the condition to an instruction opcode */ - switch (condition) { case EJ_js: @@ -672,10 +671,35 @@ void CodeGen::inst_SET(emitJumpKind condition, regNumber reg) return; } +#ifdef TARGET_AMD64 + // If using ZU feature, we need to promote the SETcc to the new instruction. + if ((instOptions & INS_OPTS_EVEX_zu_MASK) != 0) + { + const int offset = (INS_seto - INS_seto_apx); + assert(INS_seto == (INS_seto_apx + offset)); + assert(INS_setno == (INS_setno_apx + offset)); + assert(INS_setb == (INS_setb_apx + offset)); + assert(INS_setae == (INS_setae_apx + offset)); + assert(INS_sete == (INS_sete_apx + offset)); + assert(INS_setne == (INS_setne_apx + offset)); + assert(INS_setbe == (INS_setbe_apx + offset)); + assert(INS_seta == (INS_seta_apx + offset)); + assert(INS_sets == (INS_sets_apx + offset)); + assert(INS_setns == (INS_setns_apx + offset)); + assert(INS_setp == (INS_setp_apx + offset)); + assert(INS_setnp == (INS_setnp_apx + offset)); + assert(INS_setl == (INS_setl_apx + offset)); + assert(INS_setge == (INS_setge_apx + offset)); + assert(INS_setle == (INS_setle_apx + offset)); + assert(INS_setg == (INS_setg_apx + offset)); + ins = (instruction)(ins + offset); + } +#endif + assert(genRegMask(reg) & RBM_BYTE_REGS); // These instructions only write the low byte of 'reg' - GetEmitter()->emitIns_R(ins, EA_1BYTE, reg); + GetEmitter()->emitIns_R(ins, EA_1BYTE, reg, instOptions); #elif defined(TARGET_ARM64) GetEmitter()->emitIns_R_COND(INS_cset, EA_8BYTE, reg, JumpKindToInsCond(condition)); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index eb081a73bee90a..15de2fd08f77bd 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -306,6 +306,10 @@ enum insOpts: unsigned // One-bit: 0b10_0000_0000_0000 INS_OPTS_APX_ppx_MASK = 0x2000, // mask for APX-EVEX.ppx feature. + INS_OPTS_EVEX_zu = 1 << 14, // Zero Upper for APX-EVEX + // One-bit: 0b100_0000_0000_0000 + INS_OPTS_EVEX_zu_MASK = 0x4000, // mask for APX-EVEX.zu feature. + }; #elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 088c8b981968be..ebc130e370cd84 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -1154,13 +1154,29 @@ INST3(ccmpge, "ccmpge", IUM_RD, 0x000038, 0x0003880, 0x INST3(ccmple, "ccmple", IUM_RD, 0x000038, 0x0003880, 0x00003A, ILLEGAL, ILLEGAL, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) INST3(ccmpg, "ccmpg", IUM_RD, 0x000038, 0x0003880, 0x00003A, ILLEGAL, ILLEGAL, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) #define LAST_CCMP_INSTRUCTION INS_ccmpg -#define LAST_APX_INSTRUCTION INS_ccmpg +INST3(crc32_apx, "crc32", IUM_RW, BAD_CODE, BAD_CODE, 0x0000F0, 3C, 1C, INS_TT_NONE, INS_FLAGS_None) +INST3(movbe_apx, "movbe", IUM_WR, 0x000061, BAD_CODE, 0x000060, ILLEGAL, ILLEGAL, INS_TT_NONE, INS_FLAGS_None) + +INST3(seto_apx, "setzuo", IUM_WR, SSEDBLMAP(4, 0x40), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_OF) +INST3(setno_apx, "setzuno", IUM_WR, SSEDBLMAP(4, 0x41), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_OF) +INST3(setb_apx, "setzub", IUM_WR, SSEDBLMAP(4, 0x42), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_CF) +INST3(setae_apx, "setzuae", IUM_WR, SSEDBLMAP(4, 0x43), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_CF) +INST3(sete_apx, "setzue", IUM_WR, SSEDBLMAP(4, 0x44), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_ZF) +INST3(setne_apx, "setzune", IUM_WR, SSEDBLMAP(4, 0x45), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_ZF) +INST3(setbe_apx, "setzube", IUM_WR, SSEDBLMAP(4, 0x46), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_ZF | Reads_CF) +INST3(seta_apx, "setzua", IUM_WR, SSEDBLMAP(4, 0x47), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_ZF | Reads_CF) +INST3(sets_apx, "setzus", IUM_WR, SSEDBLMAP(4, 0x48), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_SF) +INST3(setns_apx, "setzuns", IUM_WR, SSEDBLMAP(4, 0x49), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_SF) +INST3(setp_apx, "setzup", IUM_WR, SSEDBLMAP(4, 0x4A), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_PF) +INST3(setnp_apx, "setzunp", IUM_WR, SSEDBLMAP(4, 0x4B), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_PF) +INST3(setl_apx, "setzul", IUM_WR, SSEDBLMAP(4, 0x4C), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_OF | Reads_SF) +INST3(setge_apx, "setzuge", IUM_WR, SSEDBLMAP(4, 0x4D), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_OF | Reads_SF) +INST3(setle_apx, "setzule", IUM_WR, SSEDBLMAP(4, 0x4E), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF) +INST3(setg_apx, "setzug", IUM_WR, SSEDBLMAP(4, 0x4F), BAD_CODE, BAD_CODE, ILLEGAL, ILLEGAL, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF) +#define LAST_APX_INSTRUCTION INS_setg_apx // Scalar instructions in SSE4.2 INST3(crc32, "crc32", IUM_RW, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF0), 3C, 1C, INS_TT_NONE, INS_FLAGS_None) -#ifdef TARGET_AMD64 -INST3(crc32_apx, "crc32", IUM_RW, BAD_CODE, BAD_CODE, 0x0000F0, 3C, 1C, INS_TT_NONE, INS_FLAGS_None) -#endif // BMI1 INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), 3C, 1C, INS_TT_NONE, Undefined_OF | Undefined_SF | Writes_ZF | Undefined_AF | Undefined_PF | Writes_CF | Encoding_REX2) // Count the Number of Trailing Zero Bits @@ -1176,9 +1192,6 @@ INST3(lzcnt_apx, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, // MOVBE INST3(movbe, "movbe", IUM_WR, PCKMVB(0xF1), BAD_CODE, PCKMVB(0xF0), ILLEGAL, ILLEGAL, INS_TT_NONE, INS_FLAGS_None) -#ifdef TARGET_AMD64 -INST3(movbe_apx, "movbe", IUM_WR, 0x000061, BAD_CODE, 0x000060, ILLEGAL, ILLEGAL, INS_TT_NONE, INS_FLAGS_None) -#endif // POPCNT INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), 3C, 1C, INS_TT_NONE, Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Resets_CF | Encoding_REX2) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 2a8941f1421d09..f5d05b2a95aa66 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -436,6 +436,7 @@ RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining RELEASE_CONFIG_INTEGER(EnableApxPPX, "EnableApxPPX", 0) // Allows APX PPX feature to be disabled +RELEASE_CONFIG_INTEGER(EnableApxZU, "EnableApxZU", 0) // Allows APX ZU feature to be disabled // clang-format on