Skip to content

Commit cd460db

Browse files
Add the barebones support for using embedded masking with AVX512 (#97675)
* Add the barebones support for using embedded masking with AVX512 * Applying formatting patch * Add some basic asserts to ensure _idCustom# isn't used incorrectly * Ensure that the instruction check is correct for TlsGD
1 parent b91ed70 commit cd460db

File tree

11 files changed

+501
-60
lines changed

11 files changed

+501
-60
lines changed

src/coreclr/jit/emit.h

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -768,12 +768,26 @@ class emitter
768768
unsigned _idLargeDsp : 1; // does a large displacement follow?
769769
unsigned _idLargeCall : 1; // large call descriptor used
770770

771-
unsigned _idBound : 1; // jump target / frame offset bound
772-
#ifndef TARGET_ARMARCH
773-
unsigned _idCallRegPtr : 1; // IL indirect calls: addr in reg
774-
#endif
775-
unsigned _idTlsGD : 1; // Used to store information related to TLS GD access on linux
776-
unsigned _idNoGC : 1; // Some helpers don't get recorded in GC tables
771+
// We have several pieces of information we need to encode but which are only applicable
772+
// to a subset of instrDescs. To accommodate that, we define a several _idCustom# bitfields
773+
// and then some defineds to make accessing them simpler
774+
775+
unsigned _idCustom1 : 1;
776+
unsigned _idCustom2 : 1;
777+
unsigned _idCustom3 : 1;
778+
779+
#define _idBound _idCustom1 /* jump target / frame offset bound */
780+
#define _idTlsGD _idCustom2 /* Used to store information related to TLS GD access on linux */
781+
#define _idNoGC _idCustom3 /* Some helpers don't get recorded in GC tables */
782+
#define _idEvexAaaContext (_idCustom3 << 2) | (_idCustom2 << 1) | _idCustom1 /* bits used for the EVEX.aaa context */
783+
784+
#if !defined(TARGET_ARMARCH)
785+
unsigned _idCustom4 : 1;
786+
787+
#define _idCallRegPtr _idCustom4 /* IL indirect calls : addr in reg */
788+
#define _idEvexZContext _idCustom4 /* bits used for the EVEX.z context */
789+
#endif // !TARGET_ARMARCH
790+
777791
#if defined(TARGET_XARCH)
778792
// EVEX.b can indicate several context: embedded broadcast, embedded rounding.
779793
// For normal and embedded broadcast intrinsics, EVEX.L'L has the same semantic, vector length.
@@ -1578,30 +1592,36 @@ class emitter
15781592

15791593
bool idIsBound() const
15801594
{
1595+
assert(!IsAvx512OrPriorInstruction(_idIns));
15811596
return _idBound != 0;
15821597
}
15831598
void idSetIsBound()
15841599
{
1600+
assert(!IsAvx512OrPriorInstruction(_idIns));
15851601
_idBound = 1;
15861602
}
15871603

15881604
#ifndef TARGET_ARMARCH
15891605
bool idIsCallRegPtr() const
15901606
{
1607+
assert(!IsAvx512OrPriorInstruction(_idIns));
15911608
return _idCallRegPtr != 0;
15921609
}
15931610
void idSetIsCallRegPtr()
15941611
{
1612+
assert(!IsAvx512OrPriorInstruction(_idIns));
15951613
_idCallRegPtr = 1;
15961614
}
1597-
#endif
1615+
#endif // !TARGET_ARMARCH
15981616

15991617
bool idIsTlsGD() const
16001618
{
1619+
assert(!IsAvx512OrPriorInstruction(_idIns));
16011620
return _idTlsGD != 0;
16021621
}
16031622
void idSetTlsGD()
16041623
{
1624+
assert(!IsAvx512OrPriorInstruction(_idIns));
16051625
_idTlsGD = 1;
16061626
}
16071627

@@ -1610,10 +1630,12 @@ class emitter
16101630
// code, it is not necessary to generate GC info for a call so labeled.
16111631
bool idIsNoGC() const
16121632
{
1633+
assert(!IsAvx512OrPriorInstruction(_idIns));
16131634
return _idNoGC != 0;
16141635
}
16151636
void idSetIsNoGC(bool val)
16161637
{
1638+
assert(!IsAvx512OrPriorInstruction(_idIns));
16171639
_idNoGC = val;
16181640
}
16191641

@@ -1625,7 +1647,8 @@ class emitter
16251647

16261648
void idSetEvexbContext(insOpts instOptions)
16271649
{
1628-
assert(_idEvexbContext == 0);
1650+
assert(!idIsEvexbContextSet());
1651+
16291652
if (instOptions == INS_OPTS_EVEX_eb_er_rd)
16301653
{
16311654
_idEvexbContext = 1;
@@ -1648,6 +1671,34 @@ class emitter
16481671
{
16491672
return _idEvexbContext;
16501673
}
1674+
1675+
unsigned idGetEvexAaaContext() const
1676+
{
1677+
assert(IsAvx512OrPriorInstruction(_idIns));
1678+
return _idEvexAaaContext;
1679+
}
1680+
1681+
void idSetEvexAaaContext(insOpts instOptions)
1682+
{
1683+
assert(idGetEvexAaaContext() == 0);
1684+
unsigned value = static_cast<unsigned>((instOptions & INS_OPTS_EVEX_aaa_MASK) >> 2);
1685+
1686+
_idCustom1 = ((value >> 0) & 1);
1687+
_idCustom2 = ((value >> 1) & 1);
1688+
_idCustom3 = ((value >> 2) & 1);
1689+
}
1690+
1691+
bool idIsEvexZContextSet() const
1692+
{
1693+
assert(IsAvx512OrPriorInstruction(_idIns));
1694+
return _idEvexZContext != 0;
1695+
}
1696+
1697+
void idSetEvexZContext()
1698+
{
1699+
assert(!idIsEvexZContextSet());
1700+
_idEvexZContext = 1;
1701+
}
16511702
#endif
16521703

16531704
#ifdef TARGET_ARMARCH
@@ -2222,6 +2273,7 @@ class emitter
22222273
void emitDispInsHex(instrDesc* id, BYTE* code, size_t sz);
22232274
void emitDispEmbBroadcastCount(instrDesc* id);
22242275
void emitDispEmbRounding(instrDesc* id);
2276+
void emitDispEmbMasking(instrDesc* id);
22252277
void emitDispIns(instrDesc* id,
22262278
bool isNew,
22272279
bool doffs,

src/coreclr/jit/emitxarch.cpp

Lines changed: 67 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -49,23 +49,6 @@ bool emitter::IsKInstruction(instruction ins)
4949
return (flags & KInstruction) != 0;
5050
}
5151

52-
//------------------------------------------------------------------------
53-
// IsAvx512OrPriorInstruction: Is this an Avx512 or Avx or Sse or K (opmask) instruction.
54-
// Technically, K instructions would be considered under the VEX encoding umbrella, but due to
55-
// the instruction table encoding had to be pulled out with the rest of the `INST5` definitions.
56-
//
57-
// Arguments:
58-
// ins - The instruction to check.
59-
//
60-
// Returns:
61-
// `true` if it is a sse or avx or avx512 instruction.
62-
//
63-
bool emitter::IsAvx512OrPriorInstruction(instruction ins)
64-
{
65-
// TODO-XArch-AVX512: Fix check once AVX512 instructions are added.
66-
return ((ins >= INS_FIRST_SSE_INSTRUCTION) && (ins <= INS_LAST_AVX512_INSTRUCTION));
67-
}
68-
6952
bool emitter::IsAVXOnlyInstruction(instruction ins)
7053
{
7154
return (ins >= INS_FIRST_AVX_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
@@ -1304,9 +1287,10 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
13041287
#define DEFAULT_BYTE_EVEX_PREFIX 0x62F07C0800000000ULL
13051288

13061289
#define DEFAULT_BYTE_EVEX_PREFIX_MASK 0xFFFFFFFF00000000ULL
1290+
#define BBIT_IN_BYTE_EVEX_PREFIX 0x0000001000000000ULL
13071291
#define LBIT_IN_BYTE_EVEX_PREFIX 0x0000002000000000ULL
13081292
#define LPRIMEBIT_IN_BYTE_EVEX_PREFIX 0x0000004000000000ULL
1309-
#define EVEX_B_BIT 0x0000001000000000ULL
1293+
#define ZBIT_IN_BYTE_EVEX_PREFIX 0x0000008000000000ULL
13101294

13111295
//------------------------------------------------------------------------
13121296
// AddEvexPrefix: Add default EVEX prefix with only LL' bits set.
@@ -1344,7 +1328,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt
13441328

13451329
if (id->idIsEvexbContextSet())
13461330
{
1347-
code |= EVEX_B_BIT;
1331+
code |= BBIT_IN_BYTE_EVEX_PREFIX;
13481332

13491333
if (!id->idHasMem())
13501334
{
@@ -1385,6 +1369,8 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt
13851369
{
13861370
case IF_RWR_RRD_ARD_RRD:
13871371
{
1372+
assert(id->idGetEvexAaaContext() == 0);
1373+
13881374
CnsVal cnsVal;
13891375
emitGetInsAmdCns(id, &cnsVal);
13901376

@@ -1394,6 +1380,8 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt
13941380

13951381
case IF_RWR_RRD_MRD_RRD:
13961382
{
1383+
assert(id->idGetEvexAaaContext() == 0);
1384+
13971385
CnsVal cnsVal;
13981386
emitGetInsDcmCns(id, &cnsVal);
13991387

@@ -1403,6 +1391,8 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt
14031391

14041392
case IF_RWR_RRD_SRD_RRD:
14051393
{
1394+
assert(id->idGetEvexAaaContext() == 0);
1395+
14061396
CnsVal cnsVal;
14071397
emitGetInsCns(id, &cnsVal);
14081398

@@ -1412,12 +1402,24 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt
14121402

14131403
case IF_RWR_RRD_RRD_RRD:
14141404
{
1405+
assert(id->idGetEvexAaaContext() == 0);
14151406
maskReg = id->idReg4();
14161407
break;
14171408
}
14181409

14191410
default:
14201411
{
1412+
unsigned aaaContext = id->idGetEvexAaaContext();
1413+
1414+
if (aaaContext != 0)
1415+
{
1416+
maskReg = static_cast<regNumber>(aaaContext + KBASE);
1417+
1418+
if (id->idIsEvexZContextSet())
1419+
{
1420+
code |= ZBIT_IN_BYTE_EVEX_PREFIX;
1421+
}
1422+
}
14211423
break;
14221424
}
14231425
}
@@ -4170,9 +4172,8 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
41704172
}
41714173

41724174
// If this is just "call reg", we're done.
4173-
if (id->idIsCallRegPtr())
4175+
if (((ins == INS_call) || (ins == INS_tail_i_jmp)) && id->idIsCallRegPtr())
41744176
{
4175-
assert(ins == INS_call || ins == INS_tail_i_jmp);
41764177
assert(dsp == 0);
41774178
return size;
41784179
}
@@ -6822,7 +6823,9 @@ void emitter::emitIns_R_R_A(
68226823
id->idIns(ins);
68236824
id->idReg1(reg1);
68246825
id->idReg2(reg2);
6826+
68256827
SetEvexBroadcastIfNeeded(id, instOptions);
6828+
SetEvexEmbMaskIfNeeded(id, instOptions);
68266829

68276830
emitHandleMemOp(indir, id, (ins == INS_mulx) ? IF_RWR_RWR_ARD : emitInsModeFormat(ins, IF_RRD_RRD_ARD), ins);
68286831

@@ -6947,7 +6950,9 @@ void emitter::emitIns_R_R_C(instruction ins,
69476950
id->idReg1(reg1);
69486951
id->idReg2(reg2);
69496952
id->idAddr()->iiaFieldHnd = fldHnd;
6953+
69506954
SetEvexBroadcastIfNeeded(id, instOptions);
6955+
SetEvexEmbMaskIfNeeded(id, instOptions);
69516956

69526957
UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
69536958
id->idCodeSize(sz);
@@ -6974,12 +6979,13 @@ void emitter::emitIns_R_R_R(
69746979
id->idReg2(reg1);
69756980
id->idReg3(reg2);
69766981

6977-
if ((instOptions & INS_OPTS_b_MASK) != INS_OPTS_NONE)
6982+
if ((instOptions & INS_OPTS_EVEX_b_MASK) != 0)
69786983
{
69796984
// if EVEX.b needs to be set in this path, then it should be embedded rounding.
69806985
assert(UseEvexEncoding());
69816986
id->idSetEvexbContext(instOptions);
69826987
}
6988+
SetEvexEmbMaskIfNeeded(id, instOptions);
69836989

69846990
UNATIVE_OFFSET sz = emitInsSizeRR(id, insCodeRM(ins));
69856991
id->idCodeSize(sz);
@@ -7001,7 +7007,9 @@ void emitter::emitIns_R_R_S(
70017007
id->idReg1(reg1);
70027008
id->idReg2(reg2);
70037009
id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
7010+
70047011
SetEvexBroadcastIfNeeded(id, instOptions);
7012+
SetEvexEmbMaskIfNeeded(id, instOptions);
70057013

70067014
#ifdef DEBUG
70077015
id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
@@ -10785,6 +10793,28 @@ void emitter::emitDispEmbRounding(instrDesc* id)
1078510793
}
1078610794
}
1078710795

10796+
// emitDispEmbMasking: Display the tag where embedded masking is activated
10797+
//
10798+
// Arguments:
10799+
// id - The instruction descriptor
10800+
//
10801+
void emitter::emitDispEmbMasking(instrDesc* id)
10802+
{
10803+
regNumber maskReg = static_cast<regNumber>(id->idGetEvexAaaContext() + KBASE);
10804+
10805+
if (maskReg == REG_K0)
10806+
{
10807+
return;
10808+
}
10809+
10810+
printf(" {%s}", emitRegName(maskReg));
10811+
10812+
if (id->idIsEvexZContextSet())
10813+
{
10814+
printf(" {z}");
10815+
}
10816+
}
10817+
1078810818
//--------------------------------------------------------------------
1078910819
// emitDispIns: Dump the given instruction to jitstdout.
1079010820
//
@@ -11033,7 +11063,7 @@ void emitter::emitDispIns(
1103311063
case IF_AWR:
1103411064
case IF_ARW:
1103511065
{
11036-
if (id->idIsCallRegPtr())
11066+
if (((ins == INS_call) || (ins == INS_tail_i_jmp)) && id->idIsCallRegPtr())
1103711067
{
1103811068
printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
1103911069
}
@@ -11184,7 +11214,9 @@ void emitter::emitDispIns(
1118411214
case IF_RRW_RRD_ARD:
1118511215
case IF_RWR_RWR_ARD:
1118611216
{
11187-
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
11217+
printf("%s", emitRegName(id->idReg1(), attr));
11218+
emitDispEmbMasking(id);
11219+
printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr);
1118811220
emitDispAddrMode(id);
1118911221
emitDispEmbBroadcastCount(id);
1119011222
break;
@@ -11458,7 +11490,9 @@ void emitter::emitDispIns(
1145811490
case IF_RRW_RRD_SRD:
1145911491
case IF_RWR_RWR_SRD:
1146011492
{
11461-
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
11493+
printf("%s", emitRegName(id->idReg1(), attr));
11494+
emitDispEmbMasking(id);
11495+
printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr);
1146211496
emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
1146311497
id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
1146411498
emitDispEmbBroadcastCount(id);
@@ -11652,8 +11686,9 @@ void emitter::emitDispIns(
1165211686
reg2 = reg3;
1165311687
reg3 = tmp;
1165411688
}
11655-
printf("%s, ", emitRegName(id->idReg1(), attr));
11656-
printf("%s, ", emitRegName(reg2, attr));
11689+
printf("%s", emitRegName(id->idReg1(), attr));
11690+
emitDispEmbMasking(id);
11691+
printf(", %s, ", emitRegName(reg2, attr));
1165711692
printf("%s", emitRegName(reg3, attr));
1165811693
emitDispEmbRounding(id);
1165911694
break;
@@ -11964,7 +11999,9 @@ void emitter::emitDispIns(
1196411999
case IF_RRW_RRD_MRD:
1196512000
case IF_RWR_RWR_MRD:
1196612001
{
11967-
printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
12002+
printf("%s", emitRegName(id->idReg1(), attr));
12003+
emitDispEmbMasking(id);
12004+
printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr);
1196812005
offs = emitGetInsDsp(id);
1196912006
emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
1197012007
emitDispEmbBroadcastCount(id);
@@ -12918,7 +12955,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
1291812955
#else
1291912956
dst += emitOutputLong(dst, dsp);
1292012957
#endif
12921-
if (id->idIsTlsGD())
12958+
if (!IsAvx512OrPriorInstruction(ins) && id->idIsTlsGD())
1292212959
{
1292312960
addlDelta = -4;
1292412961
emitRecordRelocationWithAddlDelta((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_TLSGD,
@@ -16648,7 +16685,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1664816685
}
1664916686

1665016687
#ifdef DEBUG
16651-
if (ins == INS_call && !id->idIsTlsGD())
16688+
if ((ins == INS_call) && !id->idIsTlsGD())
1665216689
{
1665316690
emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
1665416691
(CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);

0 commit comments

Comments
 (0)