@@ -472,7 +472,10 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN
472472// This involves:
473473// - Widening small stores (on ARM).
474474//
475- void Lowering::LowerStoreLoc (GenTreeLclVarCommon* storeLoc)
475+ // Returns:
476+ // Next node to lower.
477+ //
478+ GenTree* Lowering::LowerStoreLoc (GenTreeLclVarCommon* storeLoc)
476479{
477480#ifdef TARGET_ARM
478481 // On ARM, small stores can cost a bit more in terms of code size so we try to widen them. This is legal
@@ -495,6 +498,17 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
495498 }
496499
497500 ContainCheckStoreLoc (storeLoc);
501+
502+ GenTree* next = storeLoc->gtNext ;
503+
504+ #ifdef TARGET_ARM64
505+ if (comp->opts .OptimizationEnabled ())
506+ {
507+ TryMoveAddSubRMWAfterIndir (storeLoc);
508+ }
509+ #endif
510+
511+ return next;
498512}
499513
500514// ------------------------------------------------------------------------
@@ -1053,6 +1067,203 @@ void Lowering::LowerModPow2(GenTree* node)
10531067 ContainCheckNode (mod);
10541068}
10551069
1070+ const int POST_INDEXED_ADDRESSING_MAX_DISTANCE = 16 ;
1071+
1072+ // ------------------------------------------------------------------------
1073+ // TryMoveAddSubRMWAfterIndir: Try to move an RMW update of a local with an
1074+ // ADD/SUB operand earlier to happen right after an indirection on the same
1075+ // local, attempting to make these combinable intro post-indexed addressing.
1076+ //
1077+ // Arguments:
1078+ // store - The store to a local
1079+ //
1080+ // Return Value:
1081+ // True if the store was moved; otherwise false.
1082+ //
1083+ bool Lowering::TryMoveAddSubRMWAfterIndir (GenTreeLclVarCommon* store)
1084+ {
1085+ if (!store->OperIs (GT_STORE_LCL_VAR))
1086+ {
1087+ return false ;
1088+ }
1089+
1090+ unsigned lclNum = store->GetLclNum ();
1091+ if (comp->lvaGetDesc (lclNum)->lvDoNotEnregister )
1092+ {
1093+ return false ;
1094+ }
1095+
1096+ GenTree* data = store->Data ();
1097+ if (!data->OperIs (GT_ADD, GT_SUB) || data->gtOverflow ())
1098+ {
1099+ return false ;
1100+ }
1101+
1102+ GenTree* op1 = data->gtGetOp1 ();
1103+ GenTree* op2 = data->gtGetOp2 ();
1104+ if (!op1->OperIs (GT_LCL_VAR) || !op2->isContainedIntOrIImmed ())
1105+ {
1106+ return false ;
1107+ }
1108+
1109+ if (op1->AsLclVarCommon ()->GetLclNum () != lclNum)
1110+ {
1111+ return false ;
1112+ }
1113+
1114+ int maxCount = min (m_blockIndirs.Height (), POST_INDEXED_ADDRESSING_MAX_DISTANCE / 2 );
1115+ for (int i = 0 ; i < maxCount; i++)
1116+ {
1117+ SavedIndir& prev = m_blockIndirs.TopRef (i);
1118+ if ((prev.AddrBase ->GetLclNum () != lclNum) || (prev.Offset != 0 ))
1119+ {
1120+ continue ;
1121+ }
1122+
1123+ GenTreeIndir* prevIndir = prev.Indir ;
1124+ if ((prevIndir == nullptr ) || (prevIndir->gtNext == nullptr ))
1125+ {
1126+ continue ;
1127+ }
1128+
1129+ JITDUMP (
1130+ " [%06u] is an an RMW ADD/SUB on local V%02u which is used as the address to [%06u]. Trying to make them adjacent.\n " ,
1131+ Compiler::dspTreeID (store), lclNum, Compiler::dspTreeID (prevIndir));
1132+
1133+ if (TryMakeIndirAndStoreAdjacent (prevIndir, store))
1134+ {
1135+ prev.Indir = nullptr ;
1136+ return true ;
1137+ }
1138+ }
1139+
1140+ return false ;
1141+ }
1142+
1143+ // ------------------------------------------------------------------------
1144+ // TryMakeIndirAndStoreAdjacent: Try to move a store earlier, right after the
1145+ // specified indirection.
1146+ //
1147+ // Arguments:
1148+ // prevIndir - Indirection that comes before "store"
1149+ // store - Store that we want to happen next to the indirection
1150+ //
1151+ // Return Value:
1152+ // True if the store was moved; otherwise false.
1153+ //
1154+ bool Lowering::TryMakeIndirAndStoreAdjacent (GenTreeIndir* prevIndir, GenTreeLclVarCommon* store)
1155+ {
1156+ GenTree* cur = prevIndir;
1157+ for (int i = 0 ; i < POST_INDEXED_ADDRESSING_MAX_DISTANCE; i++)
1158+ {
1159+ cur = cur->gtNext ;
1160+ if (cur == store)
1161+ break ;
1162+ }
1163+
1164+ if (cur != store)
1165+ {
1166+ JITDUMP (" Too far separated, giving up\n " );
1167+ return false ;
1168+ }
1169+
1170+ JITDUMP (" They are close. Trying to move the following range (where * are nodes part of the data flow):\n\n " );
1171+ #ifdef DEBUG
1172+ bool isClosed;
1173+ GenTree* startDumpNode = BlockRange ().GetTreeRange (prevIndir, &isClosed).FirstNode ();
1174+ GenTree* endDumpNode = store->gtNext ;
1175+
1176+ auto dumpWithMarks = [=]() {
1177+ if (!comp->verbose )
1178+ {
1179+ return ;
1180+ }
1181+
1182+ for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext )
1183+ {
1184+ const char * prefix;
1185+ if (node == prevIndir)
1186+ prefix = " 1. " ;
1187+ else if (node == store)
1188+ prefix = " 2. " ;
1189+ else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0 )
1190+ prefix = " * " ;
1191+ else
1192+ prefix = " " ;
1193+
1194+ comp->gtDispLIRNode (node, prefix);
1195+ }
1196+ };
1197+
1198+ #endif
1199+
1200+ MarkTree (store);
1201+
1202+ INDEBUG (dumpWithMarks ());
1203+ JITDUMP (" \n " );
1204+
1205+ assert ((prevIndir->gtLIRFlags & LIR::Flags::Mark) == 0 );
1206+ m_scratchSideEffects.Clear ();
1207+
1208+ for (GenTree* cur = prevIndir->gtNext ; cur != store; cur = cur->gtNext )
1209+ {
1210+ if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0 )
1211+ {
1212+ // 'cur' is part of data flow of 'store', so we will be moving the
1213+ // currently recorded effects past 'cur'.
1214+ if (m_scratchSideEffects.InterferesWith (comp, cur, true ))
1215+ {
1216+ JITDUMP (" Giving up due to interference with [%06u]\n " , Compiler::dspTreeID (cur));
1217+ UnmarkTree (store);
1218+ return false ;
1219+ }
1220+ }
1221+ else
1222+ {
1223+ // Not part of dataflow; add its effects that will move past
1224+ // 'store'.
1225+ m_scratchSideEffects.AddNode (comp, cur);
1226+ }
1227+ }
1228+
1229+ if (m_scratchSideEffects.InterferesWith (comp, store, true ))
1230+ {
1231+ JITDUMP (" Have interference. Giving up.\n " );
1232+ UnmarkTree (store);
1233+ return false ;
1234+ }
1235+
1236+ JITDUMP (" Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n " ,
1237+ Compiler::dspTreeID (store));
1238+
1239+ GenTree* previous = prevIndir;
1240+ for (GenTree* node = prevIndir->gtNext ;;)
1241+ {
1242+ GenTree* next = node->gtNext ;
1243+
1244+ if ((node->gtLIRFlags & LIR::Flags::Mark) != 0 )
1245+ {
1246+ // Part of data flow. Move it to happen right after 'previous'.
1247+ BlockRange ().Remove (node);
1248+ BlockRange ().InsertAfter (previous, node);
1249+ previous = node;
1250+ }
1251+
1252+ if (node == store)
1253+ {
1254+ break ;
1255+ }
1256+
1257+ node = next;
1258+ }
1259+
1260+ JITDUMP (" Result:\n\n " );
1261+ INDEBUG (dumpWithMarks ());
1262+ JITDUMP (" \n " );
1263+ UnmarkTree (store);
1264+ return true ;
1265+ }
1266+
10561267// ------------------------------------------------------------------------
10571268// LowerAddForPossibleContainment: Tries to lower GT_ADD in such a way
10581269// that would allow one of its operands
0 commit comments