Skip to content

Commit c267ef9

Browse files
committed
JIT: Have lowering set up IR for post-indexed addressing
This adds a transformation in lowering that tries to set up the IR to be amenable to post-indexed addressing in the backend. It does so by looking for RMW additions/subtractions of a local that was also recently used as the address to an indirection.
1 parent fcb9b18 commit c267ef9

File tree

6 files changed

+245
-17
lines changed

6 files changed

+245
-17
lines changed

src/coreclr/jit/lower.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
632632
FALLTHROUGH;
633633

634634
case GT_STORE_LCL_FLD:
635-
LowerStoreLocCommon(node->AsLclVarCommon());
636-
break;
635+
return LowerStoreLocCommon(node->AsLclVarCommon());
637636

638637
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
639638
case GT_CMPXCHG:
@@ -4783,7 +4782,10 @@ void Lowering::LowerRet(GenTreeOp* ret)
47834782
// Arguments:
47844783
// lclStore - The store lcl node to lower.
47854784
//
4786-
void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
4785+
// Returns:
4786+
// Next node to lower.
4787+
//
4788+
GenTree* Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
47874789
{
47884790
assert(lclStore->OperIs(GT_STORE_LCL_FLD, GT_STORE_LCL_VAR));
47894791
JITDUMP("lowering store lcl var/field (before):\n");
@@ -4870,8 +4872,7 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
48704872
lclStore->gtOp1 = spilledCall;
48714873
src = lclStore->gtOp1;
48724874
JITDUMP("lowering store lcl var/field has to spill call src.\n");
4873-
LowerStoreLocCommon(lclStore);
4874-
return;
4875+
return LowerStoreLocCommon(lclStore);
48754876
}
48764877
#endif // !WINDOWS_AMD64_ABI
48774878
convertToStoreObj = false;
@@ -4966,7 +4967,7 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
49664967
DISPTREERANGE(BlockRange(), objStore);
49674968
JITDUMP("\n");
49684969

4969-
return;
4970+
return objStore->gtNext;
49704971
}
49714972
}
49724973

@@ -4984,11 +4985,13 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
49844985
ContainCheckBitCast(bitcast);
49854986
}
49864987

4987-
LowerStoreLoc(lclStore);
4988+
GenTree* next = LowerStoreLoc(lclStore);
49884989

49894990
JITDUMP("lowering store lcl var/field (after):\n");
49904991
DISPTREERANGE(BlockRange(), lclStore);
49914992
JITDUMP("\n");
4993+
4994+
return next;
49924995
}
49934996

49944997
//----------------------------------------------------------------------------------------------

src/coreclr/jit/lower.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ class Lowering final : public Phase
157157
GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition);
158158
void LowerJmpMethod(GenTree* jmp);
159159
void LowerRet(GenTreeOp* ret);
160-
void LowerStoreLocCommon(GenTreeLclVarCommon* lclVar);
160+
GenTree* LowerStoreLocCommon(GenTreeLclVarCommon* lclVar);
161161
void LowerRetStruct(GenTreeUnOp* ret);
162162
void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret);
163163
void LowerCallStruct(GenTreeCall* call);
@@ -353,6 +353,8 @@ class Lowering final : public Phase
353353
GenTree* LowerIndir(GenTreeIndir* ind);
354354
bool OptimizeForLdpStp(GenTreeIndir* ind);
355355
bool TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir);
356+
bool TryMoveAddSubRMWAfterIndir(GenTreeLclVarCommon* store);
357+
bool TryMakeIndirAndStoreAdjacent(GenTreeIndir* prevIndir, GenTreeLclVarCommon* store);
356358
void MarkTree(GenTree* root);
357359
void UnmarkTree(GenTree* root);
358360
GenTree* LowerStoreIndir(GenTreeStoreInd* node);
@@ -401,11 +403,11 @@ class Lowering final : public Phase
401403
bool LowerRMWMemOp(GenTreeIndir* storeInd);
402404
#endif
403405

404-
void WidenSIMD12IfNecessary(GenTreeLclVarCommon* node);
405-
bool CheckMultiRegLclVar(GenTreeLclVar* lclNode, int registerCount);
406-
void LowerStoreLoc(GenTreeLclVarCommon* tree);
407-
void LowerRotate(GenTree* tree);
408-
void LowerShift(GenTreeOp* shift);
406+
void WidenSIMD12IfNecessary(GenTreeLclVarCommon* node);
407+
bool CheckMultiRegLclVar(GenTreeLclVar* lclNode, int registerCount);
408+
GenTree* LowerStoreLoc(GenTreeLclVarCommon* tree);
409+
void LowerRotate(GenTree* tree);
410+
void LowerShift(GenTreeOp* shift);
409411
#ifdef FEATURE_HW_INTRINSICS
410412
GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node);
411413
void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 212 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,10 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN
472472
// This involves:
473473
// - Widening small stores (on ARM).
474474
//
475-
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
475+
// Returns:
476+
// Next node to lower.
477+
//
478+
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
476479
{
477480
#ifdef TARGET_ARM
478481
// On ARM, small stores can cost a bit more in terms of code size so we try to widen them. This is legal
@@ -495,6 +498,17 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
495498
}
496499

497500
ContainCheckStoreLoc(storeLoc);
501+
502+
GenTree* next = storeLoc->gtNext;
503+
504+
#ifdef TARGET_ARM64
505+
if (comp->opts.OptimizationEnabled())
506+
{
507+
TryMoveAddSubRMWAfterIndir(storeLoc);
508+
}
509+
#endif
510+
511+
return next;
498512
}
499513

500514
//------------------------------------------------------------------------
@@ -1053,6 +1067,203 @@ void Lowering::LowerModPow2(GenTree* node)
10531067
ContainCheckNode(mod);
10541068
}
10551069

1070+
const int POST_INDEXED_ADDRESSING_MAX_DISTANCE = 16;
1071+
1072+
//------------------------------------------------------------------------
1073+
// TryMoveAddSubRMWAfterIndir: Try to move an RMW update of a local with an
1074+
// ADD/SUB operand earlier to happen right after an indirection on the same
1075+
// local, attempting to make these combinable intro post-indexed addressing.
1076+
//
1077+
// Arguments:
1078+
// store - The store to a local
1079+
//
1080+
// Return Value:
1081+
// True if the store was moved; otherwise false.
1082+
//
1083+
bool Lowering::TryMoveAddSubRMWAfterIndir(GenTreeLclVarCommon* store)
1084+
{
1085+
if (!store->OperIs(GT_STORE_LCL_VAR))
1086+
{
1087+
return false;
1088+
}
1089+
1090+
unsigned lclNum = store->GetLclNum();
1091+
if (comp->lvaGetDesc(lclNum)->lvDoNotEnregister)
1092+
{
1093+
return false;
1094+
}
1095+
1096+
GenTree* data = store->Data();
1097+
if (!data->OperIs(GT_ADD, GT_SUB) || data->gtOverflow())
1098+
{
1099+
return false;
1100+
}
1101+
1102+
GenTree* op1 = data->gtGetOp1();
1103+
GenTree* op2 = data->gtGetOp2();
1104+
if (!op1->OperIs(GT_LCL_VAR) || !op2->isContainedIntOrIImmed())
1105+
{
1106+
return false;
1107+
}
1108+
1109+
if (op1->AsLclVarCommon()->GetLclNum() != lclNum)
1110+
{
1111+
return false;
1112+
}
1113+
1114+
int maxCount = min(m_blockIndirs.Height(), POST_INDEXED_ADDRESSING_MAX_DISTANCE / 2);
1115+
for (int i = 0; i < maxCount; i++)
1116+
{
1117+
SavedIndir& prev = m_blockIndirs.TopRef(i);
1118+
if ((prev.AddrBase->GetLclNum() != lclNum) || (prev.Offset != 0))
1119+
{
1120+
continue;
1121+
}
1122+
1123+
GenTreeIndir* prevIndir = prev.Indir;
1124+
if ((prevIndir == nullptr) || (prevIndir->gtNext == nullptr))
1125+
{
1126+
continue;
1127+
}
1128+
1129+
JITDUMP(
1130+
"[%06u] is an an RMW ADD/SUB on local V%02u which is used as the address to [%06u]. Trying to make them adjacent.\n",
1131+
Compiler::dspTreeID(store), lclNum, Compiler::dspTreeID(prevIndir));
1132+
1133+
if (TryMakeIndirAndStoreAdjacent(prevIndir, store))
1134+
{
1135+
prev.Indir = nullptr;
1136+
return true;
1137+
}
1138+
}
1139+
1140+
return false;
1141+
}
1142+
1143+
//------------------------------------------------------------------------
1144+
// TryMakeIndirAndStoreAdjacent: Try to move a store earlier, right after the
1145+
// specified indirection.
1146+
//
1147+
// Arguments:
1148+
// prevIndir - Indirection that comes before "store"
1149+
// store - Store that we want to happen next to the indirection
1150+
//
1151+
// Return Value:
1152+
// True if the store was moved; otherwise false.
1153+
//
1154+
bool Lowering::TryMakeIndirAndStoreAdjacent(GenTreeIndir* prevIndir, GenTreeLclVarCommon* store)
1155+
{
1156+
GenTree* cur = prevIndir;
1157+
for (int i = 0; i < POST_INDEXED_ADDRESSING_MAX_DISTANCE; i++)
1158+
{
1159+
cur = cur->gtNext;
1160+
if (cur == store)
1161+
break;
1162+
}
1163+
1164+
if (cur != store)
1165+
{
1166+
JITDUMP(" Too far separated, giving up\n");
1167+
return false;
1168+
}
1169+
1170+
JITDUMP(" They are close. Trying to move the following range (where * are nodes part of the data flow):\n\n");
1171+
#ifdef DEBUG
1172+
bool isClosed;
1173+
GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode();
1174+
GenTree* endDumpNode = store->gtNext;
1175+
1176+
auto dumpWithMarks = [=]() {
1177+
if (!comp->verbose)
1178+
{
1179+
return;
1180+
}
1181+
1182+
for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext)
1183+
{
1184+
const char* prefix;
1185+
if (node == prevIndir)
1186+
prefix = "1. ";
1187+
else if (node == store)
1188+
prefix = "2. ";
1189+
else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
1190+
prefix = "* ";
1191+
else
1192+
prefix = " ";
1193+
1194+
comp->gtDispLIRNode(node, prefix);
1195+
}
1196+
};
1197+
1198+
#endif
1199+
1200+
MarkTree(store);
1201+
1202+
INDEBUG(dumpWithMarks());
1203+
JITDUMP("\n");
1204+
1205+
assert((prevIndir->gtLIRFlags & LIR::Flags::Mark) == 0);
1206+
m_scratchSideEffects.Clear();
1207+
1208+
for (GenTree* cur = prevIndir->gtNext; cur != store; cur = cur->gtNext)
1209+
{
1210+
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
1211+
{
1212+
// 'cur' is part of data flow of 'store', so we will be moving the
1213+
// currently recorded effects past 'cur'.
1214+
if (m_scratchSideEffects.InterferesWith(comp, cur, true))
1215+
{
1216+
JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur));
1217+
UnmarkTree(store);
1218+
return false;
1219+
}
1220+
}
1221+
else
1222+
{
1223+
// Not part of dataflow; add its effects that will move past
1224+
// 'store'.
1225+
m_scratchSideEffects.AddNode(comp, cur);
1226+
}
1227+
}
1228+
1229+
if (m_scratchSideEffects.InterferesWith(comp, store, true))
1230+
{
1231+
JITDUMP("Have interference. Giving up.\n");
1232+
UnmarkTree(store);
1233+
return false;
1234+
}
1235+
1236+
JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n",
1237+
Compiler::dspTreeID(store));
1238+
1239+
GenTree* previous = prevIndir;
1240+
for (GenTree* node = prevIndir->gtNext;;)
1241+
{
1242+
GenTree* next = node->gtNext;
1243+
1244+
if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
1245+
{
1246+
// Part of data flow. Move it to happen right after 'previous'.
1247+
BlockRange().Remove(node);
1248+
BlockRange().InsertAfter(previous, node);
1249+
previous = node;
1250+
}
1251+
1252+
if (node == store)
1253+
{
1254+
break;
1255+
}
1256+
1257+
node = next;
1258+
}
1259+
1260+
JITDUMP("Result:\n\n");
1261+
INDEBUG(dumpWithMarks());
1262+
JITDUMP("\n");
1263+
UnmarkTree(store);
1264+
return true;
1265+
}
1266+
10561267
//------------------------------------------------------------------------
10571268
// LowerAddForPossibleContainment: Tries to lower GT_ADD in such a way
10581269
// that would allow one of its operands

src/coreclr/jit/lowerloongarch64.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,14 +249,18 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
249249
// This involves:
250250
// - Widening operations of unsigneds.
251251
//
252-
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
252+
// Returns:
253+
// Next node to lower.
254+
//
255+
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
253256
{
254257
if (storeLoc->OperIs(GT_STORE_LCL_FLD))
255258
{
256259
// We should only encounter this for lclVars that are lvDoNotEnregister.
257260
verifyLclFldDoNotEnregister(storeLoc->GetLclNum());
258261
}
259262
ContainCheckStoreLoc(storeLoc);
263+
return storeLoc->gtNext;
260264
}
261265

262266
//------------------------------------------------------------------------

src/coreclr/jit/lowerriscv64.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -198,14 +198,18 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
198198
// This involves:
199199
// - Widening operations of unsigneds.
200200
//
201-
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
201+
// Returns:
202+
// Next node to lower.
203+
//
204+
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
202205
{
203206
if (storeLoc->OperIs(GT_STORE_LCL_FLD))
204207
{
205208
// We should only encounter this for lclVars that are lvDoNotEnregister.
206209
verifyLclFldDoNotEnregister(storeLoc->GetLclNum());
207210
}
208211
ContainCheckStoreLoc(storeLoc);
212+
return storeLoc->gtNext;
209213
}
210214

211215
//------------------------------------------------------------------------

src/coreclr/jit/lowerxarch.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,10 @@ void Lowering::LowerRotate(GenTree* tree)
4343
// - Handling of contained immediates.
4444
// - Widening some small stores.
4545
//
46-
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
46+
// Returns:
47+
// Next tree to lower.
48+
//
49+
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
4750
{
4851
// Most small locals (the exception is dependently promoted fields) have 4 byte wide stack slots, so
4952
// we can widen the store, if profitable. The widening is only (largely) profitable for 2 byte stores.
@@ -64,6 +67,7 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
6467
}
6568

6669
ContainCheckStoreLoc(storeLoc);
70+
return storeLoc->gtNext;
6771
}
6872

6973
//------------------------------------------------------------------------

0 commit comments

Comments
 (0)