Skip to content

Commit c6b2e21

Browse files
committed
Fixup how xarch instructions check for embedded broadcast and masking support
1 parent d896e85 commit c6b2e21

22 files changed

+1986
-1565
lines changed

src/coreclr/jit/codegen.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ class CodeGen final : public CodeGenInterface
5858
// We use movaps when non-VEX because it is a smaller instruction;
5959
// however the VEX version vmovaps would be used which is the same size as vmovdqa;
6060
// also vmovdqa has more available CPU ports on older processors so we switch to that
61-
return compiler->canUseVexEncoding() ? INS_movdqa : INS_movaps;
61+
return compiler->canUseVexEncoding() ? INS_movdqa32 : INS_movaps;
6262
}
6363
instruction simdUnalignedMovIns()
6464
{
6565
// We use movups when non-VEX because it is a smaller instruction;
6666
// however the VEX version vmovups would be used which is the same size as vmovdqu;
6767
// but vmovdqu has more available CPU ports on older processors so we switch to that
68-
return compiler->canUseVexEncoding() ? INS_movdqu : INS_movups;
68+
return compiler->canUseVexEncoding() ? INS_movdqu32 : INS_movups;
6969
}
7070
#endif // defined(TARGET_XARCH)
7171

src/coreclr/jit/codegeninterface.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,11 @@ class CodeGenInterface
179179
public:
180180
static bool instIsFP(instruction ins);
181181
#if defined(TARGET_XARCH)
182-
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
182+
static bool instIsEmbeddedBroadcastCompatible(instruction ins);
183+
static bool instIsEmbeddedMaskingCompatible(instruction ins);
184+
183185
static unsigned instInputSize(instruction ins);
186+
static unsigned instKMaskBaseSize(instruction ins);
184187
#endif // TARGET_XARCH
185188
//-------------------------------------------------------------------------
186189
// Liveness-related fields & methods

src/coreclr/jit/codegenxarch.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4001,7 +4001,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
40014001
// this probably needs to be changed.
40024002

40034003
// Load
4004-
genCodeForLoadOffset(INS_movdqu, EA_16BYTE, xmmTmpReg, src, offset);
4004+
genCodeForLoadOffset(INS_movdqu32, EA_16BYTE, xmmTmpReg, src, offset);
40054005
// Store
40064006
genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset);
40074007

@@ -5700,7 +5700,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
57005700
case NI_AVX2_ConvertToUInt32:
57015701
{
57025702
// These intrinsics are "ins reg/mem, xmm"
5703-
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
5703+
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
57045704
attr = emitActualTypeSize(baseType);
57055705
#if defined(TARGET_X86)
57065706
if (varTypeIsLong(baseType))
@@ -5731,7 +5731,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
57315731
case NI_AVX10v1_V512_ExtractVector256:
57325732
{
57335733
// These intrinsics are "ins reg/mem, xmm, imm8"
5734-
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
5734+
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
57355735
attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(hwintrinsic->GetSimdSize()));
57365736

57375737
if (intrinsicId == NI_SSE2_Extract)
@@ -5808,7 +5808,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
58085808
case NI_AVX10v1_ConvertToVector128UInt16WithSaturation:
58095809
{
58105810
// These intrinsics are "ins reg/mem, xmm"
5811-
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
5811+
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
58125812
attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(hwintrinsic->GetSimdSize()));
58135813
break;
58145814
}
@@ -8491,7 +8491,7 @@ void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset
84918491

84928492
if (type == TYP_STRUCT)
84938493
{
8494-
ins = INS_movdqu;
8494+
ins = INS_movdqu32;
84958495
// This should be changed!
84968496
attr = EA_16BYTE;
84978497
size = 16;
@@ -9360,7 +9360,7 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
93609360
// // Legacy instructions
93619361
theEmitter->emitIns_R_ARX(INS_add, EA_4BYTE, REG_R16, REG_R17, REG_R18, 1, 0);
93629362

9363-
theEmitter->emitIns_AR_R(INS_movnti, EA_8BYTE, REG_R17, REG_R16, 10);
9363+
theEmitter->emitIns_AR_R(INS_movnti64, EA_8BYTE, REG_R17, REG_R16, 10);
93649364
theEmitter->emitIns_R_R_R(INS_andn, EA_8BYTE, REG_R17, REG_R16, REG_R18);
93659365

93669366
theEmitter->emitIns_Mov(INS_kmovb_gpr, EA_4BYTE, REG_R16, REG_K0, false);
@@ -9388,8 +9388,8 @@ void CodeGen::genAmd64EmitterUnitTestsApx()
93889388
theEmitter->emitIns_R_R_R(INS_pext, EA_4BYTE, REG_R16, REG_R18, REG_R17);
93899389
theEmitter->emitIns_R_R_R(INS_pext, EA_8BYTE, REG_R16, REG_R18, REG_R17);
93909390

9391-
theEmitter->emitIns_Mov(INS_movd, EA_4BYTE, REG_R16, REG_XMM0, false);
9392-
theEmitter->emitIns_Mov(INS_movd, EA_4BYTE, REG_R16, REG_XMM16, false);
9391+
theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM0, false);
9392+
theEmitter->emitIns_Mov(INS_movd32, EA_4BYTE, REG_R16, REG_XMM16, false);
93939393
theEmitter->emitIns_Mov(INS_movq, EA_8BYTE, REG_R16, REG_XMM0, false);
93949394
theEmitter->emitIns_Mov(INS_movq, EA_8BYTE, REG_R16, REG_XMM16, false);
93959395
}

src/coreclr/jit/emit.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8260,7 +8260,7 @@ void emitter::emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, reg
82608260
{
82618261
assert(emitComp->IsBaselineVector256IsaSupportedDebugOnly());
82628262
dataSize = 16;
8263-
ins = INS_vbroadcastf128;
8263+
ins = INS_vbroadcastf32x4;
82648264
}
82658265

82668266
if ((dataSize == 16) && (constValue->u64[1] == constValue->u64[0]))

0 commit comments

Comments
 (0)