@@ -414,8 +414,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
414414 }
415415
416416 case GT_STOREIND:
417- LowerStoreIndirCommon (node->AsStoreInd ());
418- break ;
417+ return LowerStoreIndirCommon (node->AsStoreInd ());
419418
420419 case GT_ADD:
421420 {
@@ -8895,7 +8894,10 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind)
88958894// Arguments:
88968895// ind - the store indirection node we are lowering.
88978896//
8898- void Lowering::LowerStoreIndirCommon (GenTreeStoreInd* ind)
8897+ // Return Value:
8898+ // Next node to lower.
8899+ //
8900+ GenTree* Lowering::LowerStoreIndirCommon (GenTreeStoreInd* ind)
88998901{
89008902 assert (ind->TypeGet () != TYP_STRUCT);
89018903
@@ -8910,28 +8912,30 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
89108912#endif
89118913 TryCreateAddrMode (ind->Addr (), isContainable, ind);
89128914
8913- if (! comp->codeGen ->gcInfo .gcIsWriteBarrierStoreIndNode (ind))
8915+ if (comp->codeGen ->gcInfo .gcIsWriteBarrierStoreIndNode (ind))
89148916 {
8917+ return ind->gtNext ;
8918+ }
8919+
89158920#ifndef TARGET_XARCH
8916- if (ind->Data ()->IsIconHandle (GTF_ICON_OBJ_HDL))
8921+ if (ind->Data ()->IsIconHandle (GTF_ICON_OBJ_HDL))
8922+ {
8923+ const ssize_t handle = ind->Data ()->AsIntCon ()->IconValue ();
8924+ if (!comp->info .compCompHnd ->isObjectImmutable (reinterpret_cast <CORINFO_OBJECT_HANDLE>(handle)))
89178925 {
8918- const ssize_t handle = ind->Data ()->AsIntCon ()->IconValue ();
8919- if (!comp->info .compCompHnd ->isObjectImmutable (reinterpret_cast <CORINFO_OBJECT_HANDLE>(handle)))
8920- {
8921- // On platforms with weaker memory model we need to make sure we use a store with the release semantic
8922- // when we publish a potentially mutable object
8923- // See relevant discussions https://github.com/dotnet/runtime/pull/76135#issuecomment-1257258310 and
8924- // https://github.com/dotnet/runtime/pull/76112#discussion_r980639782
8926+ // On platforms with weaker memory model we need to make sure we use a store with the release semantic
8927+ // when we publish a potentially mutable object
8928+ // See relevant discussions https://github.com/dotnet/runtime/pull/76135#issuecomment-1257258310 and
8929+ // https://github.com/dotnet/runtime/pull/76112#discussion_r980639782
89258930
8926- // This can be relaxed to "just make sure to use stlr/memory barrier" if needed
8927- ind->gtFlags |= GTF_IND_VOLATILE;
8928- }
8931+ // This can be relaxed to "just make sure to use stlr/memory barrier" if needed
8932+ ind->gtFlags |= GTF_IND_VOLATILE;
89298933 }
8934+ }
89308935#endif
89318936
8932- LowerStoreIndirCoalescing (ind);
8933- LowerStoreIndir (ind);
8934- }
8937+ LowerStoreIndirCoalescing (ind);
8938+ return LowerStoreIndir (ind);
89358939}
89368940
89378941// ------------------------------------------------------------------------
@@ -9014,7 +9018,7 @@ GenTree* Lowering::LowerIndir(GenTreeIndir* ind)
90149018#ifdef TARGET_ARM64
90159019 if (comp->opts .OptimizationEnabled () && ind->OperIs (GT_IND))
90169020 {
9017- OptimizeForLdp (ind);
9021+ OptimizeForLdpStp (ind);
90189022 }
90199023#endif
90209024
@@ -9029,7 +9033,7 @@ GenTree* Lowering::LowerIndir(GenTreeIndir* ind)
90299033// cases passing the distance check, but 82 out of these 112 extra cases were
90309034// then rejected due to interference. So 16 seems like a good number to balance
90319035// the throughput costs.
9032- const int LDP_REORDERING_MAX_DISTANCE = 16 ;
9036+ const int LDP_STP_REORDERING_MAX_DISTANCE = 16 ;
90339037
90349038// ------------------------------------------------------------------------
90359039// OptimizeForLdp: Record information about an indirection, and try to optimize
@@ -9042,7 +9046,7 @@ const int LDP_REORDERING_MAX_DISTANCE = 16;
90429046// Returns:
90439047// True if the optimization was successful.
90449048//
9045- bool Lowering::OptimizeForLdp (GenTreeIndir* ind)
9049+ bool Lowering::OptimizeForLdpStp (GenTreeIndir* ind)
90469050{
90479051 if (!ind->TypeIs (TYP_INT, TYP_LONG, TYP_FLOAT, TYP_DOUBLE, TYP_SIMD8, TYP_SIMD16) || ind->IsVolatile ())
90489052 {
@@ -9060,7 +9064,7 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
90609064
90619065 // Every indirection takes an expected 2+ nodes, so we only expect at most
90629066 // half the reordering distance to be candidates for the optimization.
9063- int maxCount = min (m_blockIndirs.Height (), LDP_REORDERING_MAX_DISTANCE / 2 );
9067+ int maxCount = min (m_blockIndirs.Height (), LDP_STP_REORDERING_MAX_DISTANCE / 2 );
90649068 for (int i = 0 ; i < maxCount; i++)
90659069 {
90669070 SavedIndir& prev = m_blockIndirs.TopRef (i);
@@ -9075,11 +9079,22 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
90759079 continue ;
90769080 }
90779081
9082+ if (prevIndir->gtNext == nullptr )
9083+ {
9084+ // Deleted by other optimization
9085+ continue ;
9086+ }
9087+
9088+ if (prevIndir->OperIsStore () != ind->OperIsStore ())
9089+ {
9090+ continue ;
9091+ }
9092+
90789093 JITDUMP (" [%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n " ,
90799094 Compiler::dspTreeID (ind), Compiler::dspTreeID (prevIndir), (unsigned )offs, (unsigned )prev.Offset );
90809095 if (std::abs (offs - prev.Offset ) == genTypeSize (ind))
90819096 {
9082- JITDUMP (" ..and they are amenable to ldp optimization\n " );
9097+ JITDUMP (" ..and they are amenable to ldp/stp optimization\n " );
90839098 if (TryMakeIndirsAdjacent (prevIndir, ind))
90849099 {
90859100 // Do not let the previous one participate in
@@ -9115,7 +9130,7 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
91159130bool Lowering::TryMakeIndirsAdjacent (GenTreeIndir* prevIndir, GenTreeIndir* indir)
91169131{
91179132 GenTree* cur = prevIndir;
9118- for (int i = 0 ; i < LDP_REORDERING_MAX_DISTANCE ; i++)
9133+ for (int i = 0 ; i < LDP_STP_REORDERING_MAX_DISTANCE ; i++)
91199134 {
91209135 cur = cur->gtNext ;
91219136 if (cur == indir)
@@ -9172,8 +9187,16 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
91729187 INDEBUG (dumpWithMarks ());
91739188 JITDUMP (" \n " );
91749189
9190+ if ((prevIndir->gtLIRFlags & LIR::Flags::Mark) != 0 )
9191+ {
9192+ JITDUMP (" Previous indir is part of the data flow of current indir\n " );
9193+ UnmarkTree (indir);
9194+ return false ;
9195+ }
9196+
91759197 m_scratchSideEffects.Clear ();
91769198
9199+ bool sawData = false ;
91779200 for (GenTree* cur = prevIndir->gtNext ; cur != indir; cur = cur->gtNext )
91789201 {
91799202 if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0 )
@@ -9186,6 +9209,11 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
91869209 UnmarkTree (indir);
91879210 return false ;
91889211 }
9212+
9213+ if (indir->OperIsStore ())
9214+ {
9215+ sawData |= cur == indir->Data ();
9216+ }
91899217 }
91909218 else
91919219 {
@@ -9197,6 +9225,13 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
91979225
91989226 if (m_scratchSideEffects.InterferesWith (comp, indir, true ))
91999227 {
9228+ if (!indir->OperIsLoad ())
9229+ {
9230+ JITDUMP (" Have conservative interference with last store. Giving up.\n " );
9231+ UnmarkTree (indir);
9232+ return false ;
9233+ }
9234+
92009235 // Try a bit harder, making use of the following facts:
92019236 //
92029237 // 1. We know the indir is non-faulting, so we do not need to worry
@@ -9293,8 +9328,39 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
92939328 }
92949329 }
92959330
9296- JITDUMP (" Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n " ,
9297- Compiler::dspTreeID (indir));
9331+ JITDUMP (" Interference checks passed: can move unrelated nodes past second indir.\n " );
9332+
9333+ if (sawData)
9334+ {
9335+ // If the data node of 'indir' is between 'prevIndir' and 'indir' then
9336+ // try to move the previous indir up to happen after the data
9337+ // computation. We will be moving all nodes unrelated to the data flow
9338+ // past 'indir', so we only need to check interference between
9339+ // 'prevIndir' and all nodes that are part of 'indir's dataflow.
9340+ m_scratchSideEffects.Clear ();
9341+ m_scratchSideEffects.AddNode (comp, prevIndir);
9342+
9343+ for (GenTree* cur = prevIndir->gtNext ;; cur = cur->gtNext )
9344+ {
9345+ if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0 )
9346+ {
9347+ if (m_scratchSideEffects.InterferesWith (comp, cur, true ))
9348+ {
9349+ JITDUMP (" Cannot move prev indir [%06u] up past [%06u] to get it past the data computation\n " ,
9350+ Compiler::dspTreeID (prevIndir), Compiler::dspTreeID (cur));
9351+ UnmarkTree (indir);
9352+ return false ;
9353+ }
9354+ }
9355+
9356+ if (cur == indir->Data ())
9357+ {
9358+ break ;
9359+ }
9360+ }
9361+ }
9362+
9363+ JITDUMP (" Moving nodes that are not part of data flow of [%06u]\n\n " , Compiler::dspTreeID (indir));
92989364
92999365 GenTree* previous = prevIndir;
93009366 for (GenTree* node = prevIndir->gtNext ;;)
@@ -9317,6 +9383,22 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi
93179383 node = next;
93189384 }
93199385
9386+ if (sawData)
9387+ {
9388+ // For some reason LSRA is not able to reuse a constant if both LIR
9389+ // temps are live simultaneously, so skip moving in those cases and
9390+ // expect LSRA to reuse the constant instead.
9391+ if (indir->Data ()->OperIs (GT_CNS_INT, GT_CNS_DBL) && GenTree::Compare (indir->Data (), prevIndir->Data ()))
9392+ {
9393+ JITDUMP (" Not moving previous indir since we are expecting constant reuse for the data\n " );
9394+ }
9395+ else
9396+ {
9397+ BlockRange ().Remove (prevIndir);
9398+ BlockRange ().InsertAfter (indir->Data (), prevIndir);
9399+ }
9400+ }
9401+
93209402 JITDUMP (" Result:\n\n " );
93219403 INDEBUG (dumpWithMarks ());
93229404 JITDUMP (" \n " );
0 commit comments