Skip to content

Commit 87312f7

Browse files
Improving the SIMD codegen for SIMD12 load/store (#80083)
* Improving the SIMD codegen for SIMD12 load/store * Apply formatting patch * Ensure the right index is used for insertps * Ensure emitIns_SIMD_* is used for insertps to handle the non-rmw form * Fix the input size flag for extractps * Fix an issue where the second half of a TYP_SIMD12 store used the wrong register * Ensure relocatable handles for TYP_SIMD12 load/stores are not contained * Ensure arm32 can build
1 parent 0bfa052 commit 87312f7

File tree

13 files changed

+357
-226
lines changed

13 files changed

+357
-226
lines changed

src/coreclr/jit/codegen.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1086,13 +1086,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
10861086
// values through an indirection. Note that Vector3 locals allocated on stack would have
10871087
// their size rounded to TARGET_POINTER_SIZE (which is 8 bytes on 64-bit targets) and hence
10881088
// Vector3 locals could be treated as TYP_SIMD16 while reading/writing.
1089-
void genStoreIndTypeSIMD12(GenTree* treeNode);
1090-
void genLoadIndTypeSIMD12(GenTree* treeNode);
1091-
void genStoreLclTypeSIMD12(GenTree* treeNode);
1092-
void genLoadLclTypeSIMD12(GenTree* treeNode);
1089+
void genStoreIndTypeSimd12(GenTreeStoreInd* treeNode);
1090+
void genLoadIndTypeSimd12(GenTreeIndir* treeNode);
1091+
void genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode);
1092+
void genLoadLclTypeSimd12(GenTreeLclVarCommon* treeNode);
10931093
#ifdef TARGET_X86
1094-
void genStoreSIMD12ToStack(regNumber operandReg, regNumber tmpReg);
1095-
void genPutArgStkSIMD12(GenTree* treeNode);
1094+
void genStoreSimd12ToStack(regNumber dataReg, regNumber tmpReg);
1095+
void genPutArgStkSimd12(GenTreePutArgStk* treeNode);
10961096
#endif // TARGET_X86
10971097
#endif // FEATURE_SIMD
10981098

src/coreclr/jit/codegenarm64.cpp

Lines changed: 50 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2798,19 +2798,19 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
27982798
void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
27992799
{
28002800
var_types targetType = tree->TypeGet();
2801-
regNumber targetReg = tree->GetRegNum();
2802-
emitter* emit = GetEmitter();
2803-
noway_assert(targetType != TYP_STRUCT);
28042801

28052802
#ifdef FEATURE_SIMD
2806-
// storing of TYP_SIMD12 (i.e. Vector3) field
2807-
if (tree->TypeGet() == TYP_SIMD12)
2803+
if (targetType == TYP_SIMD12)
28082804
{
2809-
genStoreLclTypeSIMD12(tree);
2805+
genStoreLclTypeSimd12(tree);
28102806
return;
28112807
}
28122808
#endif // FEATURE_SIMD
28132809

2810+
regNumber targetReg = tree->GetRegNum();
2811+
emitter* emit = GetEmitter();
2812+
noway_assert(targetType != TYP_STRUCT);
2813+
28142814
// record the offset
28152815
unsigned offset = tree->GetLclOffs();
28162816

@@ -2909,7 +2909,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
29092909
// storing of TYP_SIMD12 (i.e. Vector3) field
29102910
if (targetType == TYP_SIMD12)
29112911
{
2912-
genStoreLclTypeSIMD12(lclNode);
2912+
genStoreLclTypeSimd12(lclNode);
29132913
return;
29142914
}
29152915
#endif // FEATURE_SIMD
@@ -4172,7 +4172,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
41724172
// Storing Vector3 of size 12 bytes through indirection
41734173
if (tree->TypeGet() == TYP_SIMD12)
41744174
{
4175-
genStoreIndTypeSIMD12(tree);
4175+
genStoreIndTypeSimd12(tree);
41764176
return;
41774177
}
41784178
#endif // FEATURE_SIMD
@@ -5160,7 +5160,7 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node)
51605160
}
51615161

51625162
//-----------------------------------------------------------------------------
5163-
// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
5163+
// genStoreIndTypeSimd12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
51645164
// Since Vector3 is not a hardware supported write size, it is performed
51655165
// as two writes: 8 byte followed by 4-byte.
51665166
//
@@ -5171,41 +5171,39 @@ void CodeGen::genSimdUpperRestore(GenTreeIntrinsic* node)
51715171
// Return Value:
51725172
// None.
51735173
//
5174-
void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
5174+
void CodeGen::genStoreIndTypeSimd12(GenTreeStoreInd* treeNode)
51755175
{
5176-
assert(treeNode->OperGet() == GT_STOREIND);
5176+
assert(treeNode->OperIs(GT_STOREIND));
51775177

5178-
GenTree* addr = treeNode->AsOp()->gtOp1;
5179-
GenTree* data = treeNode->AsOp()->gtOp2;
5178+
// Should not require a write barrier
5179+
assert(gcInfo.gcIsWriteBarrierCandidate(treeNode) == GCInfo::WBF_NoBarrier);
51805180

51815181
// addr and data should not be contained.
5182-
assert(!data->isContained());
5182+
5183+
GenTree* addr = treeNode->Addr();
51835184
assert(!addr->isContained());
51845185

5185-
#ifdef DEBUG
5186-
// Should not require a write barrier
5187-
GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode->AsStoreInd());
5188-
assert(writeBarrierForm == GCInfo::WBF_NoBarrier);
5189-
#endif
5186+
GenTree* data = treeNode->Data();
5187+
assert(!data->isContained());
51905188

5191-
genConsumeOperands(treeNode->AsOp());
5189+
regNumber addrReg = genConsumeReg(addr);
5190+
regNumber dataReg = genConsumeReg(data);
51925191

51935192
// Need an additional integer register to extract upper 4 bytes from data.
51945193
regNumber tmpReg = treeNode->GetSingleTempReg();
5195-
assert(tmpReg != addr->GetRegNum());
51965194

51975195
// 8-byte write
5198-
GetEmitter()->emitIns_R_R(INS_str, EA_8BYTE, data->GetRegNum(), addr->GetRegNum());
5196+
GetEmitter()->emitIns_R_R(INS_str, EA_8BYTE, dataReg, addrReg);
51995197

52005198
// Extract upper 4-bytes from data
5201-
GetEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, data->GetRegNum(), 2);
5199+
GetEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);
52025200

52035201
// 4-byte write
5204-
GetEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, tmpReg, addr->GetRegNum(), 8);
5202+
GetEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, tmpReg, addrReg, 8);
52055203
}
52065204

52075205
//-----------------------------------------------------------------------------
5208-
// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
5206+
// genLoadIndTypeSimd12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
52095207
// Since Vector3 is not a hardware supported write size, it is performed
52105208
// as two loads: 8 byte followed by 4-byte.
52115209
//
@@ -5216,34 +5214,33 @@ void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
52165214
// Return Value:
52175215
// None.
52185216
//
5219-
void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
5217+
void CodeGen::genLoadIndTypeSimd12(GenTreeIndir* treeNode)
52205218
{
5221-
assert(treeNode->OperGet() == GT_IND);
5222-
5223-
GenTree* addr = treeNode->AsOp()->gtOp1;
5224-
regNumber targetReg = treeNode->GetRegNum();
5219+
assert(treeNode->OperIs(GT_IND));
52255220

5221+
GenTree* addr = treeNode->Addr();
52265222
assert(!addr->isContained());
52275223

5228-
regNumber operandReg = genConsumeReg(addr);
5224+
regNumber tgtReg = treeNode->GetRegNum();
5225+
regNumber addrReg = genConsumeReg(addr);
52295226

52305227
// Need an additional int register to read upper 4 bytes, which is different from targetReg
52315228
regNumber tmpReg = treeNode->GetSingleTempReg();
52325229

52335230
// 8-byte read
5234-
GetEmitter()->emitIns_R_R(INS_ldr, EA_8BYTE, targetReg, addr->GetRegNum());
5231+
GetEmitter()->emitIns_R_R(INS_ldr, EA_8BYTE, tgtReg, addrReg);
52355232

52365233
// 4-byte read
5237-
GetEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, addr->GetRegNum(), 8);
5234+
GetEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, addrReg, 8);
52385235

52395236
// Insert upper 4-bytes into data
5240-
GetEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, targetReg, tmpReg, 2);
5237+
GetEmitter()->emitIns_R_R_I(INS_mov, EA_4BYTE, tgtReg, tmpReg, 2);
52415238

52425239
genProduceReg(treeNode);
52435240
}
52445241

52455242
//-----------------------------------------------------------------------------
5246-
// genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
5243+
// genStoreLclTypeSimd12: store a TYP_SIMD12 (i.e. Vector3) type field.
52475244
// Since Vector3 is not a hardware supported write size, it is performed
52485245
// as two stores: 8 byte followed by 4-byte.
52495246
//
@@ -5253,23 +5250,20 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
52535250
// Return Value:
52545251
// None.
52555252
//
5256-
void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
5253+
void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode)
52575254
{
52585255
assert(treeNode->OperIs(GT_STORE_LCL_FLD, GT_STORE_LCL_VAR));
52595256

5260-
GenTreeLclVarCommon* lclVar = treeNode->AsLclVarCommon();
5261-
5262-
unsigned offs = lclVar->GetLclOffs();
5263-
unsigned varNum = lclVar->GetLclNum();
5264-
LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
5257+
unsigned offs = treeNode->GetLclOffs();
5258+
unsigned varNum = treeNode->GetLclNum();
52655259
assert(varNum < compiler->lvaCount);
52665260

5267-
GenTree* op1 = lclVar->gtGetOp1();
5261+
GenTree* data = treeNode->gtGetOp1();
52685262

5269-
if (op1->isContained())
5263+
if (data->isContained())
52705264
{
52715265
// This is only possible for a zero-init.
5272-
assert(op1->IsIntegralConst(0) || op1->IsVectorZero());
5266+
assert(data->IsIntegralConst(0) || data->IsVectorZero());
52735267

52745268
// store lower 8 bytes
52755269
GetEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, REG_ZR, varNum, offs);
@@ -5279,30 +5273,34 @@ void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
52795273

52805274
// Update life after instruction emitted
52815275
genUpdateLife(treeNode);
5276+
5277+
LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
52825278
varDsc->SetRegNum(REG_STK);
52835279

52845280
return;
52855281
}
52865282

5287-
regNumber targetReg = treeNode->GetRegNum();
5288-
regNumber operandReg = genConsumeReg(op1);
5283+
regNumber tgtReg = treeNode->GetRegNum();
5284+
regNumber dataReg = genConsumeReg(data);
52895285

5290-
if (targetReg != REG_NA)
5286+
if (tgtReg != REG_NA)
52915287
{
5292-
assert(GetEmitter()->isVectorRegister(targetReg));
5293-
52945288
// Simply use mov if we move a SIMD12 reg to another SIMD12 reg
5295-
inst_Mov(treeNode->TypeGet(), targetReg, operandReg, /* canSkip */ true);
5289+
assert(GetEmitter()->isVectorRegister(tgtReg));
5290+
5291+
inst_Mov(treeNode->TypeGet(), tgtReg, dataReg, /* canSkip */ true);
52965292
genProduceReg(treeNode);
52975293
}
52985294
else
52995295
{
53005296
// Need an additional integer register to extract upper 4 bytes from data.
5301-
regNumber tmpReg = lclVar->GetSingleTempReg();
5302-
GetEmitter()->emitStoreSIMD12ToLclOffset(varNum, offs, operandReg, tmpReg);
5297+
regNumber tmpReg = treeNode->GetSingleTempReg();
5298+
GetEmitter()->emitStoreSimd12ToLclOffset(varNum, offs, dataReg, tmpReg);
53035299

53045300
// Update life after instruction emitted
53055301
genUpdateLife(treeNode);
5302+
5303+
LclVarDsc* varDsc = compiler->lvaGetDesc(varNum);
53065304
varDsc->SetRegNum(REG_STK);
53075305
}
53085306
}

src/coreclr/jit/codegenarmarch.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
807807
if (compMacOsArm64Abi() && (treeNode->GetStackByteSize() == 12))
808808
{
809809
regNumber tmpReg = treeNode->GetSingleTempReg();
810-
GetEmitter()->emitStoreSIMD12ToLclOffset(varNumOut, argOffsetOut, srcReg, tmpReg);
810+
GetEmitter()->emitStoreSimd12ToLclOffset(varNumOut, argOffsetOut, srcReg, tmpReg);
811811
argOffsetOut += 12;
812812
}
813813
else
@@ -1827,7 +1827,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
18271827
// Handling of Vector3 type values loaded through indirection.
18281828
if (tree->TypeGet() == TYP_SIMD12)
18291829
{
1830-
genLoadIndTypeSIMD12(tree);
1830+
genLoadIndTypeSimd12(tree);
18311831
return;
18321832
}
18331833
#endif // FEATURE_SIMD

src/coreclr/jit/codegenlinear.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1880,7 +1880,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg
18801880
{
18811881
// Need an additional integer register to extract upper 4 bytes from data.
18821882
regNumber tmpReg = nextArgNode->GetSingleTempReg();
1883-
GetEmitter()->emitStoreSIMD12ToLclOffset(outArgVarNum, thisFieldOffset, reg, tmpReg);
1883+
GetEmitter()->emitStoreSimd12ToLclOffset(outArgVarNum, thisFieldOffset, reg, tmpReg);
18841884
}
18851885
else
18861886
#endif // FEATURE_SIMD

src/coreclr/jit/codegenxarch.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4768,19 +4768,19 @@ void CodeGen::genCodeForLclFld(GenTreeLclFld* tree)
47684768
assert(tree->OperIs(GT_LCL_FLD));
47694769

47704770
var_types targetType = tree->TypeGet();
4771-
regNumber targetReg = tree->GetRegNum();
4772-
4773-
noway_assert(targetReg != REG_NA);
47744771

47754772
#ifdef FEATURE_SIMD
47764773
// Loading of TYP_SIMD12 (i.e. Vector3) field
47774774
if (targetType == TYP_SIMD12)
47784775
{
4779-
genLoadLclTypeSIMD12(tree);
4776+
genLoadLclTypeSimd12(tree);
47804777
return;
47814778
}
47824779
#endif
47834780

4781+
regNumber targetReg = tree->GetRegNum();
4782+
noway_assert(targetReg != REG_NA);
4783+
47844784
noway_assert(targetType != TYP_STRUCT);
47854785

47864786
emitAttr size = emitTypeSize(targetType);
@@ -4819,7 +4819,7 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
48194819
// Loading of TYP_SIMD12 (i.e. Vector3) variable
48204820
if (tree->TypeGet() == TYP_SIMD12)
48214821
{
4822-
genLoadLclTypeSIMD12(tree);
4822+
genLoadLclTypeSimd12(tree);
48234823
return;
48244824
}
48254825
#endif // defined(FEATURE_SIMD) && defined(TARGET_X86)
@@ -4848,7 +4848,7 @@ void CodeGen::genCodeForStoreLclFld(GenTreeLclFld* tree)
48484848
// storing of TYP_SIMD12 (i.e. Vector3) field
48494849
if (targetType == TYP_SIMD12)
48504850
{
4851-
genStoreLclTypeSIMD12(tree);
4851+
genStoreLclTypeSimd12(tree);
48524852
return;
48534853
}
48544854
#endif // FEATURE_SIMD
@@ -4949,7 +4949,7 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
49494949
// storing of TYP_SIMD12 (i.e. Vector3) field
49504950
if (targetType == TYP_SIMD12)
49514951
{
4952-
genStoreLclTypeSIMD12(lclNode);
4952+
genStoreLclTypeSimd12(lclNode);
49534953
return;
49544954
}
49554955
#endif // FEATURE_SIMD
@@ -5131,7 +5131,7 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
51315131
// Handling of Vector3 type values loaded through indirection.
51325132
if (tree->TypeGet() == TYP_SIMD12)
51335133
{
5134-
genLoadIndTypeSIMD12(tree);
5134+
genLoadIndTypeSimd12(tree);
51355135
return;
51365136
}
51375137
#endif // FEATURE_SIMD
@@ -5170,7 +5170,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
51705170
// Storing Vector3 of size 12 bytes through indirection
51715171
if (tree->TypeGet() == TYP_SIMD12)
51725172
{
5173-
genStoreIndTypeSIMD12(tree);
5173+
genStoreIndTypeSimd12(tree);
51745174
return;
51755175
}
51765176
#endif // FEATURE_SIMD
@@ -5336,6 +5336,12 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
53365336
break;
53375337
}
53385338

5339+
case NI_Vector128_GetElement:
5340+
{
5341+
assert(baseType == TYP_FLOAT);
5342+
FALLTHROUGH;
5343+
}
5344+
53395345
case NI_SSE2_Extract:
53405346
case NI_SSE41_Extract:
53415347
case NI_SSE41_X64_Extract:
@@ -8123,7 +8129,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
81238129
if (fieldType == TYP_SIMD12)
81248130
{
81258131
assert(genIsValidFloatReg(simdTmpReg));
8126-
genStoreSIMD12ToStack(argReg, simdTmpReg);
8132+
genStoreSimd12ToStack(argReg, simdTmpReg);
81278133
}
81288134
else
81298135
#endif // defined(FEATURE_SIMD)
@@ -8418,7 +8424,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
84188424
#if defined(TARGET_X86) && defined(FEATURE_SIMD)
84198425
if (putArgStk->isSIMD12())
84208426
{
8421-
genPutArgStkSIMD12(putArgStk);
8427+
genPutArgStkSimd12(putArgStk);
84228428
return;
84238429
}
84248430
#endif // defined(TARGET_X86) && defined(FEATURE_SIMD)

0 commit comments

Comments
 (0)