From ddf2b02678e7136d6ecad1c1582797f8c3dd2bb3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 19 Aug 2024 16:52:47 +0200 Subject: [PATCH 01/25] JIT: Put all CSEs into SSA This adds an SSA updater that can incrementally put locals that were added to the IR into SSA form and starts using this infrastructure from CSE. It updates CSE to use this SSA updater to put all CSEs into SSA. Previously only single-def CSEs were put into SSA, which can cause e.g. IV opts to miss out on them. The SSA updater requires all uses and definitions to be supplied. Based on the definitions it is then possible to compute the candidate blocks for phi nodes in the usual way as the iterated dominance frontier. Since we do not have liveness for the local we do not insert the phi definitions eagerly; instead, we recursively compute the reaching def for each use by walking its dominators. Walking the dominators we either expect to find a real definition, or to hit a block that is part of the iterated dominance frontier of the definitions, in which case we know we have a live phi definition that we can insert. Inserting phi definitions needs to recursively do the same thing for the phi arguments. To make this faster we could memoize the reaching SSA numbers for each block, though this is not currently done. This also requires computing the DFS tree, dominators, and dominance frontiers after RBO (which invalidates those). The DFS tree and dominators should be reusable by IV opts in most cases, though currently we invalidate them after CSE because assertion prop sometimes will change the flow graph and does not know how to invalidate the DFS tree. This can also be fixed to improve throughput. We currently do not get much use out of this because IV opts also need liveness for the IVs. More specifically it needs to know whether the IVs are live out of the loop; it should be possible to keep some breadcrumbs around for inserted locals to cheaply know this, I think. --- src/coreclr/jit/compiler.cpp | 1 + src/coreclr/jit/compiler.h | 26 +- src/coreclr/jit/fgdiagnostic.cpp | 1 + src/coreclr/jit/flowgraph.cpp | 142 ++++++ src/coreclr/jit/inductionvariableopts.cpp | 10 + src/coreclr/jit/optcse.cpp | 58 ++- src/coreclr/jit/optimizer.cpp | 2 +- src/coreclr/jit/ssabuilder.cpp | 579 ++++++++++++++-------- src/coreclr/jit/ssabuilder.h | 49 +- src/coreclr/jit/valuenum.cpp | 3 +- 10 files changed, 637 insertions(+), 234 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index ad8622e109cdd6..62da3591626d16 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1974,6 +1974,7 @@ void Compiler::compInit(ArenaAllocator* pAlloc, #endif // DEBUG vnStore = nullptr; + vnState = nullptr; m_outlinedCompositeSsaNums = nullptr; m_nodeToLoopMemoryBlockMap = nullptr; m_signatureToLookupInfoMap = nullptr; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b748f3b81138a3..388dc87e47e0b9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2380,6 +2380,29 @@ class FlowGraphDominatorTree static FlowGraphDominatorTree* Build(const FlowGraphDfsTree* dfsTree); }; +class FlowGraphDominanceFrontiers +{ + FlowGraphDominatorTree* m_domTree; + BlkToBlkVectorMap m_map; + BitVecTraits m_poTraits; + BitVec m_visited; + + FlowGraphDominanceFrontiers(FlowGraphDominatorTree* domTree); + +#ifdef DEBUG + void Dump(); +#endif + +public: + FlowGraphDominatorTree* GetDomTree() + { + return m_domTree; + } + + static FlowGraphDominanceFrontiers* Build(FlowGraphDominatorTree* domTree); + void ComputeIteratedDominanceFrontier(BasicBlock* block, BlkVector* result); +}; + // Represents a reverse mapping from block back to its (most nested) containing loop. class BlockToNaturalLoopMap { @@ -5201,6 +5224,7 @@ class Compiler // Dominator tree used by SSA construction and copy propagation (the two are expected to use the same tree // in order to avoid the need for SSA reconstruction and an "out of SSA" phase). FlowGraphDominatorTree* m_domTree; + FlowGraphDominanceFrontiers* m_domFrontiers; BlockReachabilitySets* m_reachabilitySets; // Do we require loops to be in canonical form? The canonical form ensures that: @@ -12136,7 +12160,7 @@ class DomTreeVisitor public: //------------------------------------------------------------------------ - // WalkTree: Walk the dominator tree. + // WalkTree: Walk the dominator tree starting from the first BB. // // Parameter: // domTree - Dominator tree. diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index dbe1b4351b0c5f..853f6c821efa60 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -4745,6 +4745,7 @@ void Compiler::fgDebugCheckFlowGraphAnnotations() assert((m_loops == nullptr) || (m_loops->GetDfsTree() == m_dfsTree)); assert((m_domTree == nullptr) || (m_domTree->GetDfsTree() == m_dfsTree)); + assert((m_domFrontiers == nullptr) || (m_domFrontiers->GetDomTree() == m_domTree)); assert((m_reachabilitySets == nullptr) || (m_reachabilitySets->GetDfsTree() == m_dfsTree)); } diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 58e2f78185ab26..508501b5db9f85 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4130,6 +4130,7 @@ void Compiler::fgInvalidateDfsTree() m_dfsTree = nullptr; m_loops = nullptr; m_domTree = nullptr; + m_domFrontiers = nullptr; m_reachabilitySets = nullptr; fgSsaValid = false; } @@ -6341,6 +6342,147 @@ FlowGraphDominatorTree* FlowGraphDominatorTree::Build(const FlowGraphDfsTree* df return new (comp, CMK_DominatorMemory) FlowGraphDominatorTree(dfsTree, domTree, preorderNums, postorderNums); } +FlowGraphDominanceFrontiers::FlowGraphDominanceFrontiers(FlowGraphDominatorTree* domTree) + : m_domTree(domTree) + , m_map(domTree->GetDfsTree()->GetCompiler()->getAllocator(CMK_DominatorMemory)) + , m_poTraits(domTree->GetDfsTree()->PostOrderTraits()) + , m_visited(BitVecOps::MakeEmpty(&m_poTraits)) +{ +} + +FlowGraphDominanceFrontiers* FlowGraphDominanceFrontiers::Build(FlowGraphDominatorTree* domTree) +{ + const FlowGraphDfsTree* dfsTree = domTree->GetDfsTree(); + Compiler* comp = dfsTree->GetCompiler(); + + FlowGraphDominanceFrontiers* result = new (comp, CMK_DominatorMemory) FlowGraphDominanceFrontiers(domTree); + + for (unsigned i = 0; i < dfsTree->GetPostOrderCount(); i++) + { + BasicBlock* block = dfsTree->GetPostOrder(i); + + // Recall that B3 is in the dom frontier of B1 if there exists a B2 + // such that B1 dom B2, !(B1 dom B3), and B3 is an immediate successor + // of B2. (Note that B1 might be the same block as B2.) + // In that definition, we're considering "block" to be B3, and trying + // to find B1's. To do so, first we consider the predecessors of "block", + // searching for candidate B2's -- "block" is obviously an immediate successor + // of its immediate predecessors. If there are zero or one preds, then there + // is no pred, or else the single pred dominates "block", so no B2 exists. + FlowEdge* blockPreds = comp->BlockPredsWithEH(block); + + // If block has 0/1 predecessor, skip, apart from handler entry blocks + // that are always in the dominance frontier of its enclosed blocks. + if (!comp->bbIsHandlerBeg(block) && ((blockPreds == nullptr) || (blockPreds->getNextPredEdge() == nullptr))) + { + continue; + } + + // Otherwise, there are > 1 preds. Each is a candidate B2 in the definition -- + // *unless* it dominates "block"/B3. + + for (FlowEdge* pred = blockPreds; pred != nullptr; pred = pred->getNextPredEdge()) + { + BasicBlock* predBlock = pred->getSourceBlock(); + + if (!dfsTree->Contains(predBlock)) + { + continue; + } + + // If we've found a B2, then consider the possible B1's. We start with + // B2, since a block dominates itself, then traverse upwards in the dominator + // tree, stopping when we reach the root, or the immediate dominator of "block"/B3. + // (Note that we are guaranteed to encounter this immediate dominator of "block"/B3: + // a predecessor must be dominated by B3's immediate dominator.) + // Along this way, make "block"/B3 part of the dom frontier of the B1. + // When we reach this immediate dominator, the definition no longer applies, since this + // potential B1 *does* dominate "block"/B3, so we stop. + for (BasicBlock* b1 = predBlock; (b1 != nullptr) && (b1 != block->bbIDom); // !root && !loop + b1 = b1->bbIDom) + { + BlkVector& b1DF = *result->m_map.Emplace(b1, comp->getAllocator(CMK_DominatorMemory)); + // It's possible to encounter the same DF multiple times, ensure that we don't add duplicates. + if (b1DF.empty() || (b1DF.back() != block)) + { + b1DF.push_back(block); + } + } + } + } + + return result; +} + +void FlowGraphDominanceFrontiers::ComputeIteratedDominanceFrontier(BasicBlock* block, BlkVector* result) +{ + assert(result->empty()); + + BlkVector* bDF = m_map.LookupPointer(block); + + if (bDF == nullptr) + { + return; + } + + // Compute IDF(b) - start by adding DF(b) to IDF(b). + result->reserve(bDF->size()); + BitVecOps::ClearD(&m_poTraits, m_visited); + + for (BasicBlock* f : *bDF) + { + BitVecOps::AddElemD(&m_poTraits, m_visited, f->bbPostorderNum); + result->push_back(f); + } + + // Now for each block f from IDF(b) add DF(f) to IDF(b). This may result in new + // blocks being added to IDF(b) and the process repeats until no more new blocks + // are added. Note that since we keep adding to bIDF we can't use iterators as + // they may get invalidated. This happens to be a convenient way to avoid having + // to track newly added blocks in a separate set. + for (size_t newIndex = 0; newIndex < result->size(); newIndex++) + { + BasicBlock* f = (*result)[newIndex]; + BlkVector* fDF = m_map.LookupPointer(f); + + if (fDF == nullptr) + { + continue; + } + + for (BasicBlock* ff : *fDF) + { + if (BitVecOps::TryAddElemD(&m_poTraits, m_visited, ff->bbPostorderNum)) + { + result->push_back(ff); + } + } + } +} + +#ifdef DEBUG +void FlowGraphDominanceFrontiers::Dump() +{ + printf("DF:\n"); + for (unsigned i = 0; i < m_domTree->GetDfsTree()->GetPostOrderCount(); ++i) + { + BasicBlock* b = m_domTree->GetDfsTree()->GetPostOrder(i); + printf("Block " FMT_BB " := {", b->bbNum); + + BlkVector* bDF = m_map.LookupPointer(b); + if (bDF != nullptr) + { + int index = 0; + for (BasicBlock* f : *bDF) + { + printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum); + } + } + printf("}\n"); + } +} +#endif + //------------------------------------------------------------------------ // BlockToNaturalLoopMap::GetLoop: Map a block back to its most nested // containing loop. diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 2ea9cee6b9f211..277b18dfc268ae 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -391,6 +391,11 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); + if (!dsc->lvTracked) + { + return false; + } + BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) { if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) { @@ -1285,6 +1290,11 @@ bool Compiler::optPrimaryIVHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* return true; } + if (!varDsc->lvTracked) + { + return true; + } + BasicBlockVisit visitResult = loop->VisitRegularExitBlocks([=](BasicBlock* block) { if (VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex)) { diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 0d67ffd7a956a5..ffa1c58e2eca83 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -17,6 +17,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #endif #include "optcse.h" +#include "ssabuilder.h" #ifdef DEBUG #define RLDUMP(...) \ @@ -4841,20 +4842,20 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) unsigned cseSsaNum = SsaConfig::RESERVED_SSA_NUM; LclSsaVarDsc* ssaVarDsc = nullptr; - if (dsc->csdDefCount == 1) - { - JITDUMP(FMT_CSE " is single-def, so associated CSE temp V%02u will be in SSA\n", dsc->csdIndex, cseLclVarNum); - lclDsc->lvInSsa = true; - - // Allocate the ssa num - CompAllocator allocator = m_pCompiler->getAllocator(CMK_SSA); - cseSsaNum = lclDsc->lvPerSsaData.AllocSsaNum(allocator); - ssaVarDsc = lclDsc->GetPerSsaData(cseSsaNum); - } - else - { - INDEBUG(lclDsc->lvIsMultiDefCSE = 1); - } + // if (dsc->csdDefCount == 1) + //{ + // JITDUMP(FMT_CSE " is single-def, so associated CSE temp V%02u will be in SSA\n", dsc->csdIndex, + // cseLclVarNum); lclDsc->lvInSsa = true; + + // // Allocate the ssa num + // CompAllocator allocator = m_pCompiler->getAllocator(CMK_SSA); + // cseSsaNum = lclDsc->lvPerSsaData.AllocSsaNum(allocator); + // ssaVarDsc = lclDsc->GetPerSsaData(cseSsaNum); + //} + // else + //{ + // INDEBUG(lclDsc->lvIsMultiDefCSE = 1); + //} // Verify that all of the ValueNumbers in this list are correct as // Morph will change them when it performs a mutating operation. @@ -5037,7 +5038,7 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) // // Create a reference to the CSE temp - GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); + GenTreeLclVar* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); // Assign the ssa num for the lclvar use. Note it may be the reserved num. @@ -5302,8 +5303,34 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) /* re-morph the statement */ m_pCompiler->fgMorphBlockStmt(blk, stmt DEBUGARG("optValnumCSE")); + } while (lst != nullptr); + + ArrayStack defs(m_pCompiler->getAllocator(CMK_CSE)); + ArrayStack uses(m_pCompiler->getAllocator(CMK_CSE)); + + lst = dsc->csdTreeList; + do + { + Statement* lstStmt = lst->tslStmt; + for (GenTree* tree : lstStmt->TreeList()) + { + if (tree->OperIs(GT_LCL_VAR) && (tree->AsLclVar()->GetLclNum() == cseLclVarNum)) + { + uses.Push(UseDefLocation(lst->tslBlock, lstStmt, tree->AsLclVar())); + } + if (tree->OperIs(GT_STORE_LCL_VAR) && (tree->AsLclVar()->GetLclNum() == cseLclVarNum)) + { + defs.Push(UseDefLocation(lst->tslBlock, lstStmt, tree->AsLclVar())); + } + } + do + { + lst = lst->tslNext; + } while ((lst != nullptr) && (lst->tslStmt == lstStmt)); } while (lst != nullptr); + + SsaBuilder::InsertInSsa(m_pCompiler, cseLclVarNum, defs, uses); } void CSE_Heuristic::AdjustHeuristic(CSE_Candidate* successfulCandidate) @@ -5687,6 +5714,7 @@ PhaseStatus Compiler::optOptimizeValnumCSEs() } optValnumCSE_phase = false; + fgInvalidateDfsTree(); return heuristic->MadeChanges() ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 816862ec091be3..6996109e2468a0 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6186,7 +6186,7 @@ PhaseStatus Compiler::optVNBasedDeadStoreRemoval() LclSsaVarDsc* defDsc = varDsc->lvPerSsaData.GetSsaDefByIndex(defIndex); GenTreeLclVarCommon* store = defDsc->GetDefNode(); - if (store != nullptr) + if (store != nullptr && defDsc->m_vnPair.BothDefined()) { assert(store->OperIsLocalStore() && defDsc->m_vnPair.BothDefined()); diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index a0b018b3078b48..9f5e74abc31b1a 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -86,192 +86,10 @@ void Compiler::fgResetForSsa() SsaBuilder::SsaBuilder(Compiler* pCompiler) : m_pCompiler(pCompiler) , m_allocator(pCompiler->getAllocator(CMK_SSA)) - , m_visitedTraits(0, pCompiler) // at this point we do not know the size, SetupBBRoot can add a block , m_renameStack(m_allocator, pCompiler->lvaCount) { } -//------------------------------------------------------------------------ -// ComputeDominanceFrontiers: Compute flow graph dominance frontiers -// -// Arguments: -// postOrder - an array containing all flow graph blocks -// count - the number of blocks in the postOrder array -// mapDF - a caller provided hashtable that will be populated -// with blocks and their dominance frontiers (only those -// blocks that have non-empty frontiers will be included) -// -// Notes: -// Recall that the dominance frontier of a block B is the set of blocks -// B3 such that there exists some B2 s.t. B3 is a successor of B2, and -// B dominates B2. Note that this dominance need not be strict -- B2 -// and B may be the same node. -// See "A simple, fast dominance algorithm", by Cooper, Harvey, and Kennedy. -// -void SsaBuilder::ComputeDominanceFrontiers(BasicBlock** postOrder, int count, BlkToBlkVectorMap* mapDF) -{ - DBG_SSA_JITDUMP("Computing DF:\n"); - - for (int i = 0; i < count; ++i) - { - BasicBlock* block = postOrder[i]; - - DBG_SSA_JITDUMP("Considering block " FMT_BB ".\n", block->bbNum); - - // Recall that B3 is in the dom frontier of B1 if there exists a B2 - // such that B1 dom B2, !(B1 dom B3), and B3 is an immediate successor - // of B2. (Note that B1 might be the same block as B2.) - // In that definition, we're considering "block" to be B3, and trying - // to find B1's. To do so, first we consider the predecessors of "block", - // searching for candidate B2's -- "block" is obviously an immediate successor - // of its immediate predecessors. If there are zero or one preds, then there - // is no pred, or else the single pred dominates "block", so no B2 exists. - - FlowEdge* blockPreds = m_pCompiler->BlockPredsWithEH(block); - - // If block has 0/1 predecessor, skip, apart from handler entry blocks - // that are always in the dominance frontier of its enclosed blocks. - if (!m_pCompiler->bbIsHandlerBeg(block) && - ((blockPreds == nullptr) || (blockPreds->getNextPredEdge() == nullptr))) - { - DBG_SSA_JITDUMP(" Has %d preds; skipping.\n", blockPreds == nullptr ? 0 : 1); - continue; - } - - // Otherwise, there are > 1 preds. Each is a candidate B2 in the definition -- - // *unless* it dominates "block"/B3. - - FlowGraphDfsTree* dfsTree = m_pCompiler->m_dfsTree; - FlowGraphDominatorTree* domTree = m_pCompiler->m_domTree; - - for (FlowEdge* pred = blockPreds; pred != nullptr; pred = pred->getNextPredEdge()) - { - BasicBlock* predBlock = pred->getSourceBlock(); - DBG_SSA_JITDUMP(" Considering predecessor " FMT_BB ".\n", predBlock->bbNum); - - if (!dfsTree->Contains(predBlock)) - { - DBG_SSA_JITDUMP(" Unreachable node\n"); - continue; - } - - // If we've found a B2, then consider the possible B1's. We start with - // B2, since a block dominates itself, then traverse upwards in the dominator - // tree, stopping when we reach the root, or the immediate dominator of "block"/B3. - // (Note that we are guaranteed to encounter this immediate dominator of "block"/B3: - // a predecessor must be dominated by B3's immediate dominator.) - // Along this way, make "block"/B3 part of the dom frontier of the B1. - // When we reach this immediate dominator, the definition no longer applies, since this - // potential B1 *does* dominate "block"/B3, so we stop. - for (BasicBlock* b1 = predBlock; (b1 != nullptr) && (b1 != block->bbIDom); // !root && !loop - b1 = b1->bbIDom) - { - DBG_SSA_JITDUMP(" Adding " FMT_BB " to dom frontier of pred dom " FMT_BB ".\n", block->bbNum, - b1->bbNum); - - BlkVector& b1DF = *mapDF->Emplace(b1, m_allocator); - // It's possible to encounter the same DF multiple times, ensure that we don't add duplicates. - if (b1DF.empty() || (b1DF.back() != block)) - { - b1DF.push_back(block); - } - } - } - } - -#ifdef DEBUG - if (m_pCompiler->verboseSsa) - { - printf("\nComputed DF:\n"); - for (int i = 0; i < count; ++i) - { - BasicBlock* b = postOrder[i]; - printf("Block " FMT_BB " := {", b->bbNum); - - BlkVector* bDF = mapDF->LookupPointer(b); - if (bDF != nullptr) - { - int index = 0; - for (BasicBlock* f : *bDF) - { - printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum); - } - } - printf("}\n"); - } - } -#endif -} - -//------------------------------------------------------------------------ -// ComputeIteratedDominanceFrontier: Compute the iterated dominance frontier -// for the specified block. -// -// Arguments: -// b - the block to computed the frontier for -// mapDF - a map containing the dominance frontiers of all blocks -// bIDF - a caller provided vector where the IDF is to be stored -// -// Notes: -// The iterated dominance frontier is formed by a closure operation: -// the IDF of B is the smallest set that includes B's dominance frontier, -// and also includes the dominance frontier of all elements of the set. -// -void SsaBuilder::ComputeIteratedDominanceFrontier(BasicBlock* b, const BlkToBlkVectorMap* mapDF, BlkVector* bIDF) -{ - assert(bIDF->empty()); - - BlkVector* bDF = mapDF->LookupPointer(b); - - if (bDF != nullptr) - { - // Compute IDF(b) - start by adding DF(b) to IDF(b). - bIDF->reserve(bDF->size()); - BitVecOps::ClearD(&m_visitedTraits, m_visited); - - for (BasicBlock* f : *bDF) - { - BitVecOps::AddElemD(&m_visitedTraits, m_visited, f->bbPostorderNum); - bIDF->push_back(f); - } - - // Now for each block f from IDF(b) add DF(f) to IDF(b). This may result in new - // blocks being added to IDF(b) and the process repeats until no more new blocks - // are added. Note that since we keep adding to bIDF we can't use iterators as - // they may get invalidated. This happens to be a convenient way to avoid having - // to track newly added blocks in a separate set. - for (size_t newIndex = 0; newIndex < bIDF->size(); newIndex++) - { - BasicBlock* f = (*bIDF)[newIndex]; - BlkVector* fDF = mapDF->LookupPointer(f); - - if (fDF != nullptr) - { - for (BasicBlock* ff : *fDF) - { - if (BitVecOps::TryAddElemD(&m_visitedTraits, m_visited, ff->bbPostorderNum)) - { - bIDF->push_back(ff); - } - } - } - } - } - -#ifdef DEBUG - if (m_pCompiler->verboseSsa) - { - printf("IDF(" FMT_BB ") := {", b->bbNum); - int index = 0; - for (BasicBlock* f : *bIDF) - { - printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum); - } - printf("}\n"); - } -#endif -} - /** * Returns the phi GT_PHI node if the variable already has a phi node. * @@ -280,7 +98,7 @@ void SsaBuilder::ComputeIteratedDominanceFrontier(BasicBlock* b, const BlkToBlkV * * @return If there is a phi node for the lclNum, returns the GT_PHI tree, else NULL. */ -static GenTree* GetPhiNode(BasicBlock* block, unsigned lclNum) +static Statement* GetPhiNode(BasicBlock* block, unsigned lclNum) { // Walk the statements for phi nodes. for (Statement* const stmt : block->Statements()) @@ -295,7 +113,7 @@ static GenTree* GetPhiNode(BasicBlock* block, unsigned lclNum) GenTree* tree = stmt->GetRootNode(); if (tree->AsLclVar()->GetLclNum() == lclNum) { - return tree->AsLclVar()->Data(); + return stmt; } } @@ -309,19 +127,22 @@ static GenTree* GetPhiNode(BasicBlock* block, unsigned lclNum) // block - The block where to insert the statement // lclNum - The variable number // -void SsaBuilder::InsertPhi(BasicBlock* block, unsigned lclNum) +// Returns: +// Inserted phi definition. +// +Statement* SsaBuilder::InsertPhi(Compiler* comp, BasicBlock* block, unsigned lclNum) { - var_types type = m_pCompiler->lvaGetDesc(lclNum)->TypeGet(); + var_types type = comp->lvaGetDesc(lclNum)->TypeGet(); // PHIs and all the associated nodes do not generate any code so the costs are always 0 - GenTree* phi = new (m_pCompiler, GT_PHI) GenTreePhi(type); + GenTree* phi = new (comp, GT_PHI) GenTreePhi(type); phi->SetCosts(0, 0); - GenTree* store = m_pCompiler->gtNewStoreLclVarNode(lclNum, phi); + GenTreeLclVar* store = comp->gtNewStoreLclVarNode(lclNum, phi); store->SetCosts(0, 0); store->gtType = type; // TODO-ASG-Cleanup: delete. This quirk avoided diffs from costing-induced tail dup. // Create the statement and chain everything in linear order - PHI, STORE_LCL_VAR. - Statement* stmt = m_pCompiler->gtNewStmt(store); + Statement* stmt = comp->gtNewStmt(store); stmt->SetTreeList(phi); phi->gtNext = store; store->gtPrev = phi; @@ -334,9 +155,10 @@ void SsaBuilder::InsertPhi(BasicBlock* block, unsigned lclNum) } #endif // DEBUG - m_pCompiler->fgInsertStmtAtBeg(block, stmt); + comp->fgInsertStmtAtBeg(block, stmt); JITDUMP("Added PHI definition for V%02u at start of " FMT_BB ".\n", lclNum, block->bbNum); + return stmt; } //------------------------------------------------------------------------ @@ -378,16 +200,26 @@ void SsaBuilder::AddPhiArg( } // Didn't find a match, add a new phi arg - // - var_types type = m_pCompiler->lvaGetDesc(lclNum)->TypeGet(); + AddNewPhiArg(m_pCompiler, block, stmt, phi, lclNum, ssaNum, pred); +} - GenTree* phiArg = new (m_pCompiler, GT_PHI_ARG) GenTreePhiArg(type, lclNum, ssaNum, pred); +void SsaBuilder::AddNewPhiArg(Compiler* comp, + BasicBlock* block, + Statement* stmt, + GenTreePhi* phi, + unsigned lclNum, + unsigned ssaNum, + BasicBlock* pred) +{ + var_types type = comp->lvaGetDesc(lclNum)->TypeGet(); + + GenTree* phiArg = new (comp, GT_PHI_ARG) GenTreePhiArg(type, lclNum, ssaNum, pred); // Costs are not relevant for PHI args. phiArg->SetCosts(0, 0); // The argument order doesn't matter so just insert at the front of the list because // it's easier. It's also easier to insert in linear order since the first argument // will be first in linear order as well. - phi->gtUses = new (m_pCompiler, CMK_ASTNode) GenTreePhi::Use(phiArg, phi->gtUses); + phi->gtUses = new (comp, CMK_ASTNode) GenTreePhi::Use(phiArg, phi->gtUses); GenTree* head = stmt->GetTreeList(); assert(head->OperIs(GT_PHI, GT_PHI_ARG)); @@ -395,7 +227,7 @@ void SsaBuilder::AddPhiArg( phiArg->gtNext = head; head->gtPrev = phiArg; - LclVarDsc* const varDsc = m_pCompiler->lvaGetDesc(lclNum); + LclVarDsc* const varDsc = comp->lvaGetDesc(lclNum); LclSsaVarDsc* const ssaDesc = varDsc->GetPerSsaData(ssaNum); ssaDesc->AddPhiUse(block); @@ -427,10 +259,11 @@ void SsaBuilder::InsertPhiFunctions() unsigned count = dfsTree->GetPostOrderCount(); // Compute dominance frontier. - BlkToBlkVectorMap mapDF(m_allocator); - ComputeDominanceFrontiers(postOrder, count, &mapDF); + m_pCompiler->m_domFrontiers = FlowGraphDominanceFrontiers::Build(m_pCompiler->m_domTree); EndPhase(PHASE_BUILD_SSA_DF); + DBEXEC(m_pCompiler->verboseSsa, m_pCompiler->m_domTree->Dump()); + // Use the same IDF vector for all blocks to avoid unnecessary memory allocations BlkVector blockIDF(m_allocator); @@ -442,7 +275,20 @@ void SsaBuilder::InsertPhiFunctions() DBG_SSA_JITDUMP("Considering dominance frontier of block " FMT_BB ":\n", block->bbNum); blockIDF.clear(); - ComputeIteratedDominanceFrontier(block, &mapDF, &blockIDF); + m_pCompiler->m_domFrontiers->ComputeIteratedDominanceFrontier(block, &blockIDF); + +#ifdef DEBUG + if (m_pCompiler->verboseSsa) + { + printf("IDF(" FMT_BB ") := {", block->bbNum); + int index = 0; + for (BasicBlock* f : blockIDF) + { + printf("%s" FMT_BB, (index++ == 0) ? "" : ",", f->bbNum); + } + printf("}\n"); + } +#endif if (blockIDF.empty()) { @@ -480,7 +326,7 @@ void SsaBuilder::InsertPhiFunctions() { // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of // j. So insert a phi node at l. - InsertPhi(bbInDomFront, lclNum); + InsertPhi(m_pCompiler, bbInDomFront, lclNum); } } } @@ -1300,9 +1146,6 @@ void SsaBuilder::Build() { JITDUMP("*************** In SsaBuilder::Build()\n"); - m_visitedTraits = m_pCompiler->m_dfsTree->PostOrderTraits(); - m_visited = BitVecOps::MakeEmpty(&m_visitedTraits); - // Compute liveness on the graph. m_pCompiler->fgLocalVarLiveness(); EndPhase(PHASE_BUILD_SSA_LIVENESS); @@ -1505,3 +1348,335 @@ void Compiler::JitTestCheckSSA() } } #endif // DEBUG + +class IncrementalSsaBuilder +{ + Compiler* m_comp; + unsigned m_lclNum; + ArrayStack& m_defs; + ArrayStack& m_uses; + BitVecTraits m_poTraits; + BitVec m_defBlocks; + BitVec m_iteratedDominanceFrontiers; + + UseDefLocation FindOrCreateReachingDef(const UseDefLocation& use); + bool FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def); + bool FindReachingDefInSameStatement(const UseDefLocation& use, UseDefLocation* def); + Statement* LatestStatement(Statement* stmt1, Statement* stmt2); +public: + IncrementalSsaBuilder(Compiler* comp, + unsigned lclNum, + ArrayStack& defs, + ArrayStack& uses) + : m_comp(comp) + , m_lclNum(lclNum) + , m_defs(defs) + , m_uses(uses) + , m_poTraits(comp->m_dfsTree->PostOrderTraits()) + , m_defBlocks(BitVecOps::MakeEmpty(&m_poTraits)) + , m_iteratedDominanceFrontiers(BitVecOps::MakeEmpty(&m_poTraits)) + { + } + + void Insert(); +}; + +//------------------------------------------------------------------------ +// FindOrCreateReachingDef: Given a use indicated by a block and potentially a +// statement and tree, find the reaching definition for it, potentially +// creating it if the reaching definition is a phi that has not been created +// yet. +// +// Parameters: +// use - The use. The block must be non-null. The statement and tree can be +// null, meaning that the use is happening after the last statement in the +// block. +// +// Returns: +// Location of a definition node that is the reaching def. +// +UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocation& use) +{ + for (BasicBlock* dom = use.Block; dom != nullptr; dom = dom->bbIDom) + { + UseDefLocation reachingDef; + if (BitVecOps::IsMember(&m_poTraits, m_defBlocks, dom->bbPostorderNum) && + FindReachingDefInBlock(use, dom, &reachingDef)) + { + return reachingDef; + } + + if (BitVecOps::IsMember(&m_poTraits, m_iteratedDominanceFrontiers, dom->bbPostorderNum)) + { + Statement* phiDef = GetPhiNode(dom, m_lclNum); + if (phiDef == nullptr) + { + phiDef = SsaBuilder::InsertPhi(m_comp, dom, m_lclNum); + + LclVarDsc* dsc = m_comp->lvaGetDesc(m_lclNum); + unsigned ssaNum = dsc->lvPerSsaData.AllocSsaNum(m_comp->getAllocator(CMK_SSA), dom, + phiDef->GetRootNode()->AsLclVarCommon()); + phiDef->GetRootNode()->AsLclVar()->SetSsaNum(ssaNum); + + GenTreePhi* phi = phiDef->GetRootNode()->AsLclVar()->Data()->AsPhi(); + + for (FlowEdge* predEdge = m_comp->BlockPredsWithEH(dom); predEdge != nullptr; + predEdge = predEdge->getNextPredEdge()) + { + BasicBlock* pred = predEdge->getSourceBlock(); + if (!m_comp->m_dfsTree->Contains(pred)) + { + continue; + } + + // TODO: This cannot be recursive + UseDefLocation phiArgDef = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); + SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgDef.Tree->GetSsaNum(), pred); + } + + m_comp->fgValueNumberPhiDef(phiDef->GetRootNode()->AsLclVar(), dom); + } + + return UseDefLocation(dom, phiDef, phiDef->GetRootNode()->AsLclVar()); + } + } + + assert(!"Found use without any def"); + unreached(); +} + +//------------------------------------------------------------------------ +// FindReachingDefInBlock: Given a use, try to find a definition in the +// specified block. +// +// Parameters: +// use - The use. The block must be non-null. The statement and tree can be +// null, meaning that the use is happening after the last statement in the +// block. +// block - The block to look for a definition in. +// def - [out] The found definition, if any. +// +// Returns: +// True if a reaching definition was found in "block". +// +// Remarks: +// If the use occurs in "block", then this function takes care to find the +// latest definition before the use. +// +bool IncrementalSsaBuilder::FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def) +{ + Statement* latestDefStmt = nullptr; + GenTreeLclVar* latestTree = nullptr; + + for (int i = 0; i < m_defs.Height(); i++) + { + UseDefLocation& candidate = m_defs.BottomRef(i); + if (candidate.Block != block) + { + continue; + } + + if (candidate.Stmt == use.Stmt) + { + if (FindReachingDefInSameStatement(use, def)) + { + return true; + } + + continue; + } + + if ((candidate.Block == use.Block) && (use.Stmt != nullptr) && + (LatestStatement(use.Stmt, candidate.Stmt) != use.Stmt)) + { + // Def is after use + continue; + } + + if (candidate.Stmt == latestDefStmt) + { + latestTree = nullptr; + } + else if ((latestDefStmt == nullptr) || (LatestStatement(candidate.Stmt, latestDefStmt) == candidate.Stmt)) + { + latestDefStmt = candidate.Stmt; + latestTree = candidate.Tree; + } + } + + if (latestDefStmt == nullptr) + { + return false; + } + + if (latestTree == nullptr) + { + for (GenTree* tree : latestDefStmt->TreeList()) + { + if (tree->OperIs(GT_STORE_LCL_VAR) && (tree->AsLclVar()->GetLclNum() == m_lclNum)) + { + latestTree = tree->AsLclVar(); + } + } + + assert(latestTree != nullptr); + } + + *def = UseDefLocation(use.Block, latestDefStmt, latestTree); + return true; +} + +//------------------------------------------------------------------------ +// FindReachingDefInSameStatement: Given a use, try to find a definition within +// the same statement as that use. +// +// Parameters: +// use - The use. +// def - [out] The found definition, if any. +// +// Returns: +// True if a reaching definition was found in the same statement as the use. +// +bool IncrementalSsaBuilder::FindReachingDefInSameStatement(const UseDefLocation& use, UseDefLocation* def) +{ + for (GenTree* tree = use.Tree->gtPrev; tree != nullptr; tree = tree->gtPrev) + { + if (tree->OperIs(GT_STORE_LCL_VAR) && (tree->AsLclVar()->GetLclNum() == m_lclNum)) + { + *def = UseDefLocation(use.Block, use.Stmt, tree->AsLclVar()); + return true; + } + } + + return false; +} + +//------------------------------------------------------------------------ +// LatestStatement: Given two statements in the same block, find the latest one +// of them. +// +// Parameters: +// stmt1 - The first statement +// stmt2 - The second statement +// +// Returns: +// Latest of the two statements. +// +Statement* IncrementalSsaBuilder::LatestStatement(Statement* stmt1, Statement* stmt2) +{ + if (stmt1 == stmt2) + { + return stmt1; + } + + Statement* cursor1 = stmt1->GetNextStmt(); + Statement* cursor2 = stmt2->GetNextStmt(); + + while (true) + { + if ((cursor1 == stmt2) || (cursor2 == nullptr)) + { + return stmt2; + } + + if ((cursor2 == stmt1) || (cursor1 == nullptr)) + { + return stmt1; + } + + cursor1 = cursor1->GetNextStmt(); + cursor2 = cursor2->GetNextStmt(); + } +} + +//------------------------------------------------------------------------ +// Insert: Insert the uses and definitions in SSA. +// +void IncrementalSsaBuilder::Insert() +{ + FlowGraphDfsTree* dfsTree = m_comp->m_dfsTree; + + // Alloc SSA numbers for all real definitions. + for (int i = 0; i < m_defs.Height(); i++) + { + UseDefLocation& def = m_defs.BottomRef(i); + if (!dfsTree->Contains(def.Block)) + { + continue; + } + + BitVecOps::AddElemD(&m_poTraits, m_defBlocks, def.Block->bbPostorderNum); + + LclVarDsc* dsc = m_comp->lvaGetDesc(m_lclNum); + unsigned ssaNum = dsc->lvPerSsaData.AllocSsaNum(m_comp->getAllocator(CMK_SSA), def.Block, def.Tree); + def.Tree->SetSsaNum(ssaNum); + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(ssaNum); + ssaDsc->m_vnPair = def.Tree->Data()->gtVNPair; + } + + // Compute iterated dominance frontiers of all real definitions. These are + // the blocks that unpruned phi definitions would be inserted into. We + // insert the phis lazily to end up with pruned SSA, but we still need to + // know which blocks are candidates for phis. + BlkVector idf(m_comp->getAllocator(CMK_SSA)); + + for (int i = 0; i < m_defs.Height(); i++) + { + BasicBlock* block = m_defs.BottomRef(i).Block; + idf.clear(); + m_comp->m_domFrontiers->ComputeIteratedDominanceFrontier(block, &idf); + + for (BasicBlock* idfBlock : idf) + { + BitVecOps::AddElemD(&m_poTraits, m_iteratedDominanceFrontiers, idfBlock->bbPostorderNum); + } + } + + // Finally compute all the reaching defs for the uses. + for (int i = 0; i < m_uses.Height(); i++) + { + UseDefLocation& use = m_uses.BottomRef(i); + if (!dfsTree->Contains(use.Block)) + { + continue; + } + + UseDefLocation def = FindOrCreateReachingDef(use); + use.Tree->SetSsaNum(def.Tree->GetSsaNum()); + } +} + +//------------------------------------------------------------------------ +// InsertInSsa: Insert a specified local in SSA given its local number and all +// of its definitions and uses in the IR. +// +// Parameters: +// comp - Compiler instance +// lclNum - The local that is being inserted into SSA +// defs - All STORE_LCL_VAR definitions of the local +// uses - All LCL_VAR uses of the local +// +void SsaBuilder::InsertInSsa(Compiler* comp, + unsigned lclNum, + ArrayStack& defs, + ArrayStack& uses) +{ + if (comp->m_dfsTree == nullptr) + { + comp->m_dfsTree = comp->fgComputeDfs(); + } + + if (comp->m_domTree == nullptr) + { + comp->m_domTree = FlowGraphDominatorTree::Build(comp->m_dfsTree); + } + + if (comp->m_domFrontiers == nullptr) + { + comp->m_domFrontiers = FlowGraphDominanceFrontiers::Build(comp->m_domTree); + } + + IncrementalSsaBuilder builder(comp, lclNum, defs, uses); + builder.Insert(); + comp->lvaGetDesc(lclNum)->lvInSsa = true; +} diff --git a/src/coreclr/jit/ssabuilder.h b/src/coreclr/jit/ssabuilder.h index 014edb94f1e01e..20363669acba1f 100644 --- a/src/coreclr/jit/ssabuilder.h +++ b/src/coreclr/jit/ssabuilder.h @@ -12,8 +12,28 @@ typedef int LclVarNum; // Pair of a local var name eg: V01 and Ssa number; eg: V01_01 typedef std::pair SsaVarName; +struct UseDefLocation +{ + BasicBlock* Block = nullptr; + Statement* Stmt = nullptr; + GenTreeLclVar* Tree = nullptr; + + UseDefLocation() + { + } + + UseDefLocation(BasicBlock* block, Statement* stmt, GenTreeLclVar* tree) + : Block(block) + , Stmt(stmt) + , Tree(tree) + { + } +}; + class SsaBuilder { + friend class IncrementalSsaBuilder; + private: inline void EndPhase(Phases phase) { @@ -33,20 +53,26 @@ class SsaBuilder // variable are stored in the "per SSA data" on the local descriptor. void Build(); + static void InsertInSsa(Compiler* comp, + unsigned lclNum, + ArrayStack& defs, + ArrayStack& uses); private: - // Compute flow graph dominance frontiers. - void ComputeDominanceFrontiers(BasicBlock** postOrder, int count, BlkToBlkVectorMap* mapDF); - - // Compute the iterated dominance frontier for the specified block. - void ComputeIteratedDominanceFrontier(BasicBlock* b, const BlkToBlkVectorMap* mapDF, BlkVector* bIDF); - // Insert a new GT_PHI statement. - void InsertPhi(BasicBlock* block, unsigned lclNum); + static Statement* InsertPhi(Compiler* comp, BasicBlock* block, unsigned lclNum); // Add a new GT_PHI_ARG node to an existing GT_PHI node void AddPhiArg( BasicBlock* block, Statement* stmt, GenTreePhi* phi, unsigned lclNum, unsigned ssaNum, BasicBlock* pred); + static void AddNewPhiArg(Compiler* comp, + BasicBlock* block, + Statement* stmt, + GenTreePhi* phi, + unsigned lclNum, + unsigned ssaNum, + BasicBlock* pred); + // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires // count to be the valid entries in the "postOrder" array. Inserts GT_PHI nodes at the beginning // of basic blocks that require them. @@ -79,12 +105,7 @@ class SsaBuilder // the handlers of a newly entered block based on one entering block. void AddPhiArgsToNewlyEnteredHandler(BasicBlock* predEnterBlock, BasicBlock* enterBlock, BasicBlock* handlerStart); - Compiler* m_pCompiler; - CompAllocator m_allocator; - - // Bit vector used by ComputeImmediateDom to track already visited blocks. - BitVecTraits m_visitedTraits; - BitVec m_visited; - + Compiler* m_pCompiler; + CompAllocator m_allocator; SsaRenameState m_renameStack; }; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index afbaeeb931bffa..5cccf9f24debfa 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -10866,6 +10866,7 @@ PhaseStatus Compiler::fgValueNumber() #endif // DEBUG fgVNPassesCompleted++; + vnState = nullptr; return PhaseStatus::MODIFIED_EVERYTHING; } @@ -11152,7 +11153,7 @@ void Compiler::fgValueNumberPhiDef(GenTreeLclVar* newSsaDef, BasicBlock* blk, bo for (GenTreePhi::Use& use : phiNode->Uses()) { GenTreePhiArg* phiArg = use.GetNode()->AsPhiArg(); - if (!vnState->IsReachableThroughPred(blk, phiArg->gtPredBB)) + if ((vnState != nullptr) && !vnState->IsReachableThroughPred(blk, phiArg->gtPredBB)) { JITDUMP(" Phi arg [%06u] is unnecessary; path through pred " FMT_BB " cannot be taken\n", dspTreeID(phiArg), phiArg->gtPredBB->bbNum); From 8c8b9a02902d5d5326f0309e32d3701683fc3112 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Aug 2024 09:15:55 +0200 Subject: [PATCH 02/25] Revert unnecessary change --- src/coreclr/jit/optimizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 6996109e2468a0..816862ec091be3 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -6186,7 +6186,7 @@ PhaseStatus Compiler::optVNBasedDeadStoreRemoval() LclSsaVarDsc* defDsc = varDsc->lvPerSsaData.GetSsaDefByIndex(defIndex); GenTreeLclVarCommon* store = defDsc->GetDefNode(); - if (store != nullptr && defDsc->m_vnPair.BothDefined()) + if (store != nullptr) { assert(store->OperIsLocalStore() && defDsc->m_vnPair.BothDefined()); From 658b3ce28e6d6d59ed8a62fc93cdb3f1d4edeefa Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Aug 2024 09:52:13 +0200 Subject: [PATCH 03/25] Avoid unnecessary flowgraph annotation recomputations --- src/coreclr/jit/compiler.cpp | 2 ++ src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/helperexpansion.cpp | 9 +++++++-- src/coreclr/jit/inductionvariableopts.cpp | 18 +++++++++++++++--- src/coreclr/jit/morph.cpp | 23 ++++++++++++++++++----- src/coreclr/jit/optcse.cpp | 1 - src/coreclr/jit/redundantbranchopts.cpp | 2 +- 7 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 62da3591626d16..83014990d7aa7d 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -5165,6 +5165,8 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_OPTIMIZE_INDUCTION_VARIABLES, &Compiler::optInductionVariables); } + fgInvalidateDfsTree(); + if (doVNBasedDeadStoreRemoval) { // Note: this invalidates SSA and value numbers on tree nodes. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 388dc87e47e0b9..2059157291545e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5485,7 +5485,7 @@ class Compiler void fgMergeBlockReturn(BasicBlock* block); - bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg)); + bool fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg), bool invalidateDFSTreeOnFGChange = true); void fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt); bool gtRemoveTreesAfterNoReturnCall(BasicBlock* block, Statement* stmt); diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 3d5678d2743b42..e93b872c5455e3 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1206,9 +1206,14 @@ PhaseStatus Compiler::fgExpandHelper(bool skipRarelyRunBlocks) } } - if ((result == PhaseStatus::MODIFIED_EVERYTHING) && opts.OptimizationEnabled()) + if (result == PhaseStatus::MODIFIED_EVERYTHING) { - fgRenumberBlocks(); + fgInvalidateDfsTree(); + + if (opts.OptimizationEnabled()) + { + fgRenumberBlocks(); + } } return result; diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 277b18dfc268ae..d18fe3289cc8ca 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -2424,9 +2424,21 @@ PhaseStatus Compiler::optInductionVariables() bool changed = false; optReachableBitVecTraits = nullptr; - m_dfsTree = fgComputeDfs(); - m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); - m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + + if (m_dfsTree == nullptr) + { + m_dfsTree = fgComputeDfs(); + } + + if (m_domTree == nullptr) + { + m_domTree = FlowGraphDominatorTree::Build(m_dfsTree); + } + + if (m_loops == nullptr) + { + m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); + } LoopLocalOccurrences loopLocals(m_loops); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index d037282e447ab5..7cff7e19f27760 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12936,9 +12936,11 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) // fgMorphBlockStmt: morph a single statement in a block. // // Arguments: -// block - block containing the statement -// stmt - statement to morph -// msg - string to identify caller in a dump +// block - block containing the statement +// stmt - statement to morph +// msg - string to identify caller in a dump +// invalidateDFSTreeOnFGChange - whether or not the DFS tree should be invalidated +// by this function if it makes a flow graph change // // Returns: // true if 'stmt' was removed from the block. @@ -12947,7 +12949,9 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) // Notes: // Can be called anytime, unlike fgMorphStmts() which should only be called once. // -bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg)) +bool Compiler::fgMorphBlockStmt(BasicBlock* block, + Statement* stmt DEBUGARG(const char* msg), + bool invalidateDFSTreeOnFGChange) { assert(block != nullptr); assert(stmt != nullptr); @@ -13013,7 +13017,11 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(cons if (!removedStmt && (stmt->GetNextStmt() == nullptr) && !fgRemoveRestOfBlock) { FoldResult const fr = fgFoldConditional(block); - removedStmt = (fr == FoldResult::FOLD_REMOVED_LAST_STMT); + if (invalidateDFSTreeOnFGChange && (fr != FoldResult::FOLD_DID_NOTHING)) + { + fgInvalidateDfsTree(); + } + removedStmt = (fr == FoldResult::FOLD_REMOVED_LAST_STMT); } if (!removedStmt) @@ -13053,6 +13061,11 @@ bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(cons { // Convert block to a throw bb fgConvertBBToThrowBB(block); + + if (invalidateDFSTreeOnFGChange) + { + fgInvalidateDfsTree(); + } } #ifdef DEBUG diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index ffa1c58e2eca83..862a21806eb4d4 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -5714,7 +5714,6 @@ PhaseStatus Compiler::optOptimizeValnumCSEs() } optValnumCSE_phase = false; - fgInvalidateDfsTree(); return heuristic->MadeChanges() ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index f074fab8032f6a..5a26ee1f5aca6a 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -958,7 +958,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) JITDUMP("\nRedundant branch opt in " FMT_BB ":\n", block->bbNum); - fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__)); + fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__), /* invalidateDFSTreeOnFGChange */ false); Metrics.RedundantBranchesEliminated++; return true; } From 6e2385668dbf5b6cfa539649a7ba78eca278e476 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Aug 2024 10:28:22 +0200 Subject: [PATCH 04/25] Speed up single-def case --- src/coreclr/jit/optcse.cpp | 57 ++-------------------------------- src/coreclr/jit/ssabuilder.cpp | 57 ++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 58 deletions(-) diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 862a21806eb4d4..f418baef7ba14f 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -4836,26 +4836,8 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) // // Later we will unmark any nested CSE's for the CSE uses. // - // If there's just a single def for the CSE, we'll put this - // CSE into SSA form on the fly. We won't need any PHIs. - // - unsigned cseSsaNum = SsaConfig::RESERVED_SSA_NUM; - LclSsaVarDsc* ssaVarDsc = nullptr; - - // if (dsc->csdDefCount == 1) - //{ - // JITDUMP(FMT_CSE " is single-def, so associated CSE temp V%02u will be in SSA\n", dsc->csdIndex, - // cseLclVarNum); lclDsc->lvInSsa = true; - - // // Allocate the ssa num - // CompAllocator allocator = m_pCompiler->getAllocator(CMK_SSA); - // cseSsaNum = lclDsc->lvPerSsaData.AllocSsaNum(allocator); - // ssaVarDsc = lclDsc->GetPerSsaData(cseSsaNum); - //} - // else - //{ - // INDEBUG(lclDsc->lvIsMultiDefCSE = 1); - //} + + INDEBUG(lclDsc->lvIsMultiDefCSE = dsc->csdDefCount > 1); // Verify that all of the ValueNumbers in this list are correct as // Morph will change them when it performs a mutating operation. @@ -5041,15 +5023,6 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) GenTreeLclVar* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); - // Assign the ssa num for the lclvar use. Note it may be the reserved num. - cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); - - // If this local is in ssa, notify ssa there's a new use. - if (ssaVarDsc != nullptr) - { - ssaVarDsc->AddUse(blk); - } - cse = cseLclVar; if (isSharedConst) { @@ -5209,42 +5182,16 @@ void CSE_HeuristicCommon::PerformCSE(CSE_Candidate* successfulCandidate) store->gtVNPair = ValueNumStore::VNPForVoid(); // The store node itself is $VN.Void. noway_assert(store->OperIs(GT_STORE_LCL_VAR)); - // Backpatch the SSA def, if we're putting this CSE temp into ssa. - store->AsLclVar()->SetSsaNum(cseSsaNum); - // Move the information about the CSE def to the store; it now indicates a completed // CSE def instead of just a candidate. optCSE_canSwap uses this information to reason // about evaluation order in between substitutions of CSE defs/uses. store->gtCSEnum = exp->gtCSEnum; exp->gtCSEnum = NO_CSE; - if (cseSsaNum != SsaConfig::RESERVED_SSA_NUM) - { - LclSsaVarDsc* ssaVarDsc = m_pCompiler->lvaTable[cseLclVarNum].GetPerSsaData(cseSsaNum); - - // These should not have been set yet, since this is the first and - // only def for this CSE. - assert(ssaVarDsc->GetBlock() == nullptr); - assert(ssaVarDsc->GetDefNode() == nullptr); - - ssaVarDsc->m_vnPair = val->gtVNPair; - ssaVarDsc->SetBlock(blk); - ssaVarDsc->SetDefNode(store->AsLclVarCommon()); - } - /* Create a reference to the CSE temp */ GenTree* cseLclVar = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp); cseLclVar->gtVNPair.SetBoth(dsc->csdConstDefVN); - // Assign the ssa num for the lclvar use. Note it may be the reserved num. - cseLclVar->AsLclVarCommon()->SetSsaNum(cseSsaNum); - - // If this local is in ssa, notify ssa there's a new use. - if (ssaVarDsc != nullptr) - { - ssaVarDsc->AddUse(blk); - } - GenTree* cseUse = cseLclVar; if (isSharedConst) { diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 9f5e74abc31b1a..bdf7d0f237549c 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1435,6 +1435,9 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati } m_comp->fgValueNumberPhiDef(phiDef->GetRootNode()->AsLclVar(), dom); + + JITDUMP(" New phi def:\n"); + DISPSTMT(phiDef); } return UseDefLocation(dom, phiDef, phiDef->GetRootNode()->AsLclVar()); @@ -1596,6 +1599,7 @@ void IncrementalSsaBuilder::Insert() { FlowGraphDfsTree* dfsTree = m_comp->m_dfsTree; + LclVarDsc* dsc = m_comp->lvaGetDesc(m_lclNum); // Alloc SSA numbers for all real definitions. for (int i = 0; i < m_defs.Height(); i++) { @@ -1607,11 +1611,11 @@ void IncrementalSsaBuilder::Insert() BitVecOps::AddElemD(&m_poTraits, m_defBlocks, def.Block->bbPostorderNum); - LclVarDsc* dsc = m_comp->lvaGetDesc(m_lclNum); - unsigned ssaNum = dsc->lvPerSsaData.AllocSsaNum(m_comp->getAllocator(CMK_SSA), def.Block, def.Tree); + unsigned ssaNum = dsc->lvPerSsaData.AllocSsaNum(m_comp->getAllocator(CMK_SSA), def.Block, def.Tree); def.Tree->SetSsaNum(ssaNum); LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(ssaNum); ssaDsc->m_vnPair = def.Tree->Data()->gtVNPair; + JITDUMP(" [%06u] d:%u\n", Compiler::dspTreeID(def.Tree), ssaNum); } // Compute iterated dominance frontiers of all real definitions. These are @@ -1643,6 +1647,8 @@ void IncrementalSsaBuilder::Insert() UseDefLocation def = FindOrCreateReachingDef(use); use.Tree->SetSsaNum(def.Tree->GetSsaNum()); + dsc->GetPerSsaData(def.Tree->GetSsaNum())->AddUse(use.Block); + JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), def.Tree->GetSsaNum()); } } @@ -1656,11 +1662,56 @@ void IncrementalSsaBuilder::Insert() // defs - All STORE_LCL_VAR definitions of the local // uses - All LCL_VAR uses of the local // +// Remarks: +// All uses are required to never read an uninitialized value of the local. +// That is, this function requires that all paths through the function go +// through one of the defs in "defs" before any use in "uses". +// void SsaBuilder::InsertInSsa(Compiler* comp, unsigned lclNum, ArrayStack& defs, ArrayStack& uses) { + LclVarDsc* dsc = comp->lvaGetDesc(lclNum); + assert(!dsc->lvInSsa); + + JITDUMP("Putting V%02u into SSA form\n", lclNum); + JITDUMP(" %d defs:", defs.Height()); + for (int i = 0; i < defs.Height(); i++) + { + JITDUMP(" [%06u]", Compiler::dspTreeID(defs.Bottom(i).Tree)); + } + + JITDUMP("\n %d uses:", uses.Height()); + for (int i = 0; i < uses.Height(); i++) + { + JITDUMP(" [%06u]", Compiler::dspTreeID(uses.Bottom(i).Tree)); + } + + if (defs.Height() == 1) + { + JITDUMP(" Single-def local; putting into SSA directly\n"); + + UseDefLocation& def = defs.BottomRef(0); + + unsigned ssaNum = dsc->lvPerSsaData.AllocSsaNum(comp->getAllocator(CMK_SSA), def.Block, def.Tree); + def.Tree->SetSsaNum(ssaNum); + JITDUMP(" [%06u] d:%u\n", Compiler::dspTreeID(def.Tree), ssaNum); + + LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(ssaNum); + ssaDsc->m_vnPair = def.Tree->Data()->gtVNPair; + + for (int i = 0; i < uses.Height(); i++) + { + UseDefLocation& use = uses.BottomRef(i); + use.Tree->SetSsaNum(ssaNum); + ssaDsc->AddUse(use.Block); + JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), ssaNum); + } + + return; + } + if (comp->m_dfsTree == nullptr) { comp->m_dfsTree = comp->fgComputeDfs(); @@ -1678,5 +1729,5 @@ void SsaBuilder::InsertInSsa(Compiler* comp, IncrementalSsaBuilder builder(comp, lclNum, defs, uses); builder.Insert(); - comp->lvaGetDesc(lclNum)->lvInSsa = true; + dsc->lvInSsa = true; } From 115291ee2d81e5e9259f60b14a410f51c510ea05 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Aug 2024 10:42:56 +0200 Subject: [PATCH 05/25] Add newline --- src/coreclr/jit/ssabuilder.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index bdf7d0f237549c..7b3c48d9baab4f 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1688,6 +1688,8 @@ void SsaBuilder::InsertInSsa(Compiler* comp, JITDUMP(" [%06u]", Compiler::dspTreeID(uses.Bottom(i).Tree)); } + JITDUMP("\n"); + if (defs.Height() == 1) { JITDUMP(" Single-def local; putting into SSA directly\n"); From 91f23522941cbea867bc048805f1e1c51c1bbe48 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Aug 2024 10:43:02 +0200 Subject: [PATCH 06/25] Add cache --- src/coreclr/jit/ssabuilder.cpp | 45 +++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 7b3c48d9baab4f..2a4ccb372bef85 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1349,6 +1349,8 @@ void Compiler::JitTestCheckSSA() } #endif // DEBUG +typedef JitHashTable, unsigned> BlockToReachingDefMap; + class IncrementalSsaBuilder { Compiler* m_comp; @@ -1358,8 +1360,9 @@ class IncrementalSsaBuilder BitVecTraits m_poTraits; BitVec m_defBlocks; BitVec m_iteratedDominanceFrontiers; + BlockToReachingDefMap m_reachingDefCache; - UseDefLocation FindOrCreateReachingDef(const UseDefLocation& use); + unsigned FindOrCreateReachingDef(const UseDefLocation& use); bool FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def); bool FindReachingDefInSameStatement(const UseDefLocation& use, UseDefLocation* def); Statement* LatestStatement(Statement* stmt1, Statement* stmt2); @@ -1375,6 +1378,7 @@ class IncrementalSsaBuilder , m_poTraits(comp->m_dfsTree->PostOrderTraits()) , m_defBlocks(BitVecOps::MakeEmpty(&m_poTraits)) , m_iteratedDominanceFrontiers(BitVecOps::MakeEmpty(&m_poTraits)) + , m_reachingDefCache(comp->getAllocator(CMK_SSA)) { } @@ -1393,17 +1397,23 @@ class IncrementalSsaBuilder // block. // // Returns: -// Location of a definition node that is the reaching def. +// SSA number of reaching def. // -UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocation& use) +unsigned IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocation& use) { + unsigned cachedSsaNum = SsaConfig::RESERVED_SSA_NUM; + if ((use.Stmt == nullptr) && (use.Tree == nullptr) && m_reachingDefCache.Lookup(use.Block, &cachedSsaNum)) + { + return cachedSsaNum; + } + + UseDefLocation reachingDef; for (BasicBlock* dom = use.Block; dom != nullptr; dom = dom->bbIDom) { - UseDefLocation reachingDef; if (BitVecOps::IsMember(&m_poTraits, m_defBlocks, dom->bbPostorderNum) && FindReachingDefInBlock(use, dom, &reachingDef)) { - return reachingDef; + break; } if (BitVecOps::IsMember(&m_poTraits, m_iteratedDominanceFrontiers, dom->bbPostorderNum)) @@ -1430,8 +1440,8 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati } // TODO: This cannot be recursive - UseDefLocation phiArgDef = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); - SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgDef.Tree->GetSsaNum(), pred); + unsigned phiArgSsaNum = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); + SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgSsaNum, pred); } m_comp->fgValueNumberPhiDef(phiDef->GetRootNode()->AsLclVar(), dom); @@ -1440,12 +1450,19 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati DISPSTMT(phiDef); } - return UseDefLocation(dom, phiDef, phiDef->GetRootNode()->AsLclVar()); + reachingDef = UseDefLocation(dom, phiDef, phiDef->GetRootNode()->AsLclVar()); + break; } } - assert(!"Found use without any def"); - unreached(); + assert((reachingDef.Tree != nullptr) && !"Found use without any def"); + + if ((reachingDef.Block != use.Block) || ((use.Stmt == nullptr) && (use.Tree == nullptr))) + { + m_reachingDefCache.Set(use.Block, reachingDef.Tree->GetSsaNum()); + } + + return reachingDef.Tree->GetSsaNum(); } //------------------------------------------------------------------------ @@ -1645,10 +1662,10 @@ void IncrementalSsaBuilder::Insert() continue; } - UseDefLocation def = FindOrCreateReachingDef(use); - use.Tree->SetSsaNum(def.Tree->GetSsaNum()); - dsc->GetPerSsaData(def.Tree->GetSsaNum())->AddUse(use.Block); - JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), def.Tree->GetSsaNum()); + unsigned ssaNum = FindOrCreateReachingDef(use); + use.Tree->SetSsaNum(ssaNum); + dsc->GetPerSsaData(ssaNum)->AddUse(use.Block); + JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), ssaNum); } } From 2de17a2666d7b058fbecc8e22fa98ec945829a9d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Aug 2024 10:45:08 +0200 Subject: [PATCH 07/25] Revert "Add cache" This reverts commit 91f23522941cbea867bc048805f1e1c51c1bbe48. --- src/coreclr/jit/ssabuilder.cpp | 45 +++++++++++----------------------- 1 file changed, 14 insertions(+), 31 deletions(-) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 2a4ccb372bef85..7b3c48d9baab4f 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1349,8 +1349,6 @@ void Compiler::JitTestCheckSSA() } #endif // DEBUG -typedef JitHashTable, unsigned> BlockToReachingDefMap; - class IncrementalSsaBuilder { Compiler* m_comp; @@ -1360,9 +1358,8 @@ class IncrementalSsaBuilder BitVecTraits m_poTraits; BitVec m_defBlocks; BitVec m_iteratedDominanceFrontiers; - BlockToReachingDefMap m_reachingDefCache; - unsigned FindOrCreateReachingDef(const UseDefLocation& use); + UseDefLocation FindOrCreateReachingDef(const UseDefLocation& use); bool FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def); bool FindReachingDefInSameStatement(const UseDefLocation& use, UseDefLocation* def); Statement* LatestStatement(Statement* stmt1, Statement* stmt2); @@ -1378,7 +1375,6 @@ class IncrementalSsaBuilder , m_poTraits(comp->m_dfsTree->PostOrderTraits()) , m_defBlocks(BitVecOps::MakeEmpty(&m_poTraits)) , m_iteratedDominanceFrontiers(BitVecOps::MakeEmpty(&m_poTraits)) - , m_reachingDefCache(comp->getAllocator(CMK_SSA)) { } @@ -1397,23 +1393,17 @@ class IncrementalSsaBuilder // block. // // Returns: -// SSA number of reaching def. +// Location of a definition node that is the reaching def. // -unsigned IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocation& use) +UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocation& use) { - unsigned cachedSsaNum = SsaConfig::RESERVED_SSA_NUM; - if ((use.Stmt == nullptr) && (use.Tree == nullptr) && m_reachingDefCache.Lookup(use.Block, &cachedSsaNum)) - { - return cachedSsaNum; - } - - UseDefLocation reachingDef; for (BasicBlock* dom = use.Block; dom != nullptr; dom = dom->bbIDom) { + UseDefLocation reachingDef; if (BitVecOps::IsMember(&m_poTraits, m_defBlocks, dom->bbPostorderNum) && FindReachingDefInBlock(use, dom, &reachingDef)) { - break; + return reachingDef; } if (BitVecOps::IsMember(&m_poTraits, m_iteratedDominanceFrontiers, dom->bbPostorderNum)) @@ -1440,8 +1430,8 @@ unsigned IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocation& us } // TODO: This cannot be recursive - unsigned phiArgSsaNum = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); - SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgSsaNum, pred); + UseDefLocation phiArgDef = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); + SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgDef.Tree->GetSsaNum(), pred); } m_comp->fgValueNumberPhiDef(phiDef->GetRootNode()->AsLclVar(), dom); @@ -1450,19 +1440,12 @@ unsigned IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocation& us DISPSTMT(phiDef); } - reachingDef = UseDefLocation(dom, phiDef, phiDef->GetRootNode()->AsLclVar()); - break; + return UseDefLocation(dom, phiDef, phiDef->GetRootNode()->AsLclVar()); } } - assert((reachingDef.Tree != nullptr) && !"Found use without any def"); - - if ((reachingDef.Block != use.Block) || ((use.Stmt == nullptr) && (use.Tree == nullptr))) - { - m_reachingDefCache.Set(use.Block, reachingDef.Tree->GetSsaNum()); - } - - return reachingDef.Tree->GetSsaNum(); + assert(!"Found use without any def"); + unreached(); } //------------------------------------------------------------------------ @@ -1662,10 +1645,10 @@ void IncrementalSsaBuilder::Insert() continue; } - unsigned ssaNum = FindOrCreateReachingDef(use); - use.Tree->SetSsaNum(ssaNum); - dsc->GetPerSsaData(ssaNum)->AddUse(use.Block); - JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), ssaNum); + UseDefLocation def = FindOrCreateReachingDef(use); + use.Tree->SetSsaNum(def.Tree->GetSsaNum()); + dsc->GetPerSsaData(def.Tree->GetSsaNum())->AddUse(use.Block); + JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), def.Tree->GetSsaNum()); } } From 21f4d9548c9edb3556497e79215e055663d8753e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 20 Aug 2024 11:21:02 +0200 Subject: [PATCH 08/25] Fix assert, write some more docs --- src/coreclr/jit/flowgraph.cpp | 42 ++++++++++++++++++++++++++++++++++ src/coreclr/jit/ssabuilder.cpp | 1 + 2 files changed, 43 insertions(+) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 508501b5db9f85..739f3eca74e810 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -6350,6 +6350,30 @@ FlowGraphDominanceFrontiers::FlowGraphDominanceFrontiers(FlowGraphDominatorTree* { } +//------------------------------------------------------------------------ +// FlowGraphDominanceFrontiers::Build: Build the dominance frontiers for all +// blocks. +// +// Parameters: +// domTree - Dominator tree to build dominance frontiers for +// +// Returns: +// Data structure representing dominance frontiers. +// +// Remarks: +// Recall that the dominance frontier of a block B is the set of blocks +// B3 such that there exists some B2 s.t. B3 is a successor of B2, and +// B dominates B2. Note that this dominance need not be strict -- B2 +// and B may be the same node. +// +// In other words, a block B' is in DF(B) if B dominates an immediate +// predecessor of B', but does not dominate B'. Intuitively, these blocks are +// the "first" blocks that are no longer dominated by B; these are the places +// we are interested in inserting phi definitions that may refer to defs in +// B. +// +// See "A simple, fast dominance algorithm", by Cooper, Harvey, and Kennedy. +// FlowGraphDominanceFrontiers* FlowGraphDominanceFrontiers::Build(FlowGraphDominatorTree* domTree) { const FlowGraphDfsTree* dfsTree = domTree->GetDfsTree(); @@ -6414,6 +6438,20 @@ FlowGraphDominanceFrontiers* FlowGraphDominanceFrontiers::Build(FlowGraphDominat return result; } +//------------------------------------------------------------------------ +// ComputeIteratedDominanceFrontier: Compute the iterated dominance frontier of +// a block. This is the transitive closure of taking dominance frontiers. +// +// Parameters: +// block - Block to compute iterated dominance frontier for. +// result - Vector to add blocks of IDF into. +// +// Remarks: +// When we create phi definitions we are creating new definitions that +// themselves induce the creation of more phi nodes. Thus, the transitive +// closure of DF(B) contains all blocks that may have phi definitions +// referring to defs in B, or referring to other phis referring to defs in B. +// void FlowGraphDominanceFrontiers::ComputeIteratedDominanceFrontier(BasicBlock* block, BlkVector* result) { assert(result->empty()); @@ -6461,6 +6499,10 @@ void FlowGraphDominanceFrontiers::ComputeIteratedDominanceFrontier(BasicBlock* b } #ifdef DEBUG +//------------------------------------------------------------------------ +// FlowGraphDominanceFrontiers::Dump: Dump a textual representation of the +// dominance frontiers to jitstdout. +// void FlowGraphDominanceFrontiers::Dump() { printf("DF:\n"); diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 7b3c48d9baab4f..3a5ed07a399a06 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1711,6 +1711,7 @@ void SsaBuilder::InsertInSsa(Compiler* comp, JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), ssaNum); } + dsc->lvInSsa = true; return; } From 64eb25eef9ac8e89ea94705b1201018ca9a0707b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 21 Aug 2024 12:11:32 +0200 Subject: [PATCH 09/25] Add incremental liveness updating as well --- src/coreclr/jit/block.cpp | 20 ++++++ src/coreclr/jit/compiler.cpp | 1 + src/coreclr/jit/compiler.h | 29 +++++++++ src/coreclr/jit/inductionvariableopts.cpp | 14 +---- src/coreclr/jit/ssabuilder.cpp | 74 ++++++++++++++++++++++- 5 files changed, 124 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index be2ba15e254690..8e86dcad1f6b03 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -328,6 +328,26 @@ FlowEdge* Compiler::BlockDominancePreds(BasicBlock* blk) return res; } +bool Compiler::IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) +{ + if (m_insertedSsaLocalsLiveIn == nullptr) + { + return false; + } + + return m_insertedSsaLocalsLiveIn->Lookup(BasicBlockLocalPair(block, lclNum)); +} + +bool Compiler::AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) +{ + if (m_insertedSsaLocalsLiveIn == nullptr) + { + m_insertedSsaLocalsLiveIn = new (this, CMK_SSA) BasicBlockLocalPairSet(getAllocator(CMK_SSA)); + } + + return !m_insertedSsaLocalsLiveIn->Set(BasicBlockLocalPair(block, lclNum), true, BasicBlockLocalPairSet::Overwrite); +} + //------------------------------------------------------------------------ // IsLastHotBlock: see if this is the last block before the cold section // diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 83014990d7aa7d..81916e4a4d6f7c 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1959,6 +1959,7 @@ void Compiler::compInit(ArenaAllocator* pAlloc, m_switchDescMap = nullptr; m_blockToEHPreds = nullptr; m_dominancePreds = nullptr; + m_insertedSsaLocalsLiveIn = nullptr; m_fieldSeqStore = nullptr; m_refAnyClass = nullptr; for (MemoryKind memoryKind : allMemoryKinds()) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2059157291545e..bc7b94f4045bd1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2944,6 +2944,35 @@ class Compiler return m_dominancePreds; } + struct BasicBlockLocalPair + { + BasicBlock* Block; + unsigned LclNum; + + BasicBlockLocalPair(BasicBlock* block, unsigned lclNum) + : Block(block) + , LclNum(lclNum) + { + } + + static bool Equals(const BasicBlockLocalPair& x, const BasicBlockLocalPair& y) + { + return (x.Block == y.Block) && (x.LclNum == y.LclNum); + } + static unsigned GetHashCode(const BasicBlockLocalPair& val) + { + unsigned hash = static_cast(reinterpret_cast(&val)); + hash ^= val.LclNum + 0x9e3779b9 + (hash << 19) + (hash >> 13); + return hash; + } + }; + + typedef JitHashTable BasicBlockLocalPairSet; + + BasicBlockLocalPairSet* m_insertedSsaLocalsLiveIn; + bool IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); + bool AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); + void* ehEmitCookie(BasicBlock* block); UNATIVE_OFFSET ehCodeOffset(BasicBlock* block); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index d18fe3289cc8ca..e1ba823b97d99d 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -391,13 +391,8 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); - if (!dsc->lvTracked) - { - return false; - } - BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) { - if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + if (dsc->lvTracked ? !VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex) : IsInsertedSsaLiveIn(exit, lclNum)) { JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum); return BasicBlockVisit::Continue; @@ -1290,13 +1285,8 @@ bool Compiler::optPrimaryIVHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* return true; } - if (!varDsc->lvTracked) - { - return true; - } - BasicBlockVisit visitResult = loop->VisitRegularExitBlocks([=](BasicBlock* block) { - if (VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex)) + if (varDsc->lvTracked ? VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex) : IsInsertedSsaLiveIn(block, lclNum)) { return BasicBlockVisit::Abort; } diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 3a5ed07a399a06..552a563c438101 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1349,6 +1349,58 @@ void Compiler::JitTestCheckSSA() } #endif // DEBUG +class IncrementalLiveInBuilder +{ + Compiler* m_comp; + ArrayStack m_queue; + BitVecTraits m_traits; + BitVec m_visited; + +public: + IncrementalLiveInBuilder(Compiler* comp) + : m_comp(comp) + , m_queue(comp->getAllocator(CMK_SSA)) + , m_traits(comp->fgBBNumMax + 1, comp) + , m_visited(BitVecOps::MakeEmpty(&m_traits)) + { + } + + void MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef); +}; + +void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef) +{ + BitVecOps::ClearD(&m_traits, m_visited); + m_queue.Reset(); + + m_queue.Push(use.Block); + BitVecOps::AddElemD(&m_traits, m_visited, use.Block->bbNum); + + while (!m_queue.Empty()) + { + BasicBlock* block = m_queue.Pop(); + if (block == reachingDef.Block) + { + continue; + } + + if (!m_comp->AddInsertedSsaLiveIn(block, lclNum)) + { + // Already marked live-in here; expect to have been marked live in + // to this reaching def. + continue; + } + + for (FlowEdge* edge = m_comp->BlockPredsWithEH(block); edge != nullptr; edge = edge->getNextPredEdge()) + { + if (BitVecOps::TryAddElemD(&m_traits, m_visited, edge->getSourceBlock()->bbNum)) + { + m_queue.Push(edge->getSourceBlock()); + } + } + } +} + class IncrementalSsaBuilder { Compiler* m_comp; @@ -1358,6 +1410,7 @@ class IncrementalSsaBuilder BitVecTraits m_poTraits; BitVec m_defBlocks; BitVec m_iteratedDominanceFrontiers; + IncrementalLiveInBuilder m_liveInBuilder; UseDefLocation FindOrCreateReachingDef(const UseDefLocation& use); bool FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def); @@ -1375,10 +1428,12 @@ class IncrementalSsaBuilder , m_poTraits(comp->m_dfsTree->PostOrderTraits()) , m_defBlocks(BitVecOps::MakeEmpty(&m_poTraits)) , m_iteratedDominanceFrontiers(BitVecOps::MakeEmpty(&m_poTraits)) + , m_liveInBuilder(comp) { } void Insert(); + static void MarkLiveInBackwards(Compiler* comp, unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef, BitVec& visitedSet); }; //------------------------------------------------------------------------ @@ -1420,6 +1475,10 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati GenTreePhi* phi = phiDef->GetRootNode()->AsLclVar()->Data()->AsPhi(); + // The local is always live into blocks with phi defs. + bool marked = m_comp->AddInsertedSsaLiveIn(dom, m_lclNum); + assert(marked); + for (FlowEdge* predEdge = m_comp->BlockPredsWithEH(dom); predEdge != nullptr; predEdge = predEdge->getNextPredEdge()) { @@ -1430,8 +1489,13 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati } // TODO: This cannot be recursive - UseDefLocation phiArgDef = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); - SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgDef.Tree->GetSsaNum(), pred); + UseDefLocation phiArgUse = UseDefLocation(pred, nullptr, nullptr); + UseDefLocation phiArgReachingDef = FindOrCreateReachingDef(phiArgUse); + SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgReachingDef.Tree->GetSsaNum(), pred); + + // The phi arg is modelled at the end of the pred block; + // mark liveness for it. + m_liveInBuilder.MarkLiveInBackwards(m_lclNum, phiArgUse, phiArgReachingDef); } m_comp->fgValueNumberPhiDef(phiDef->GetRootNode()->AsLclVar(), dom); @@ -1649,6 +1713,8 @@ void IncrementalSsaBuilder::Insert() use.Tree->SetSsaNum(def.Tree->GetSsaNum()); dsc->GetPerSsaData(def.Tree->GetSsaNum())->AddUse(use.Block); JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), def.Tree->GetSsaNum()); + + m_liveInBuilder.MarkLiveInBackwards(m_lclNum, use, def); } } @@ -1703,12 +1769,16 @@ void SsaBuilder::InsertInSsa(Compiler* comp, LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(ssaNum); ssaDsc->m_vnPair = def.Tree->Data()->gtVNPair; + IncrementalLiveInBuilder liveIn(comp); + for (int i = 0; i < uses.Height(); i++) { UseDefLocation& use = uses.BottomRef(i); use.Tree->SetSsaNum(ssaNum); ssaDsc->AddUse(use.Block); JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), ssaNum); + + liveIn.MarkLiveInBackwards(lclNum, use, def); } dsc->lvInSsa = true; From caa8dd36480787726cadc5a6dbe3b3bd91f34ca9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 21 Aug 2024 12:11:36 +0200 Subject: [PATCH 10/25] Revert "Add incremental liveness updating as well" This reverts commit 64eb25eef9ac8e89ea94705b1201018ca9a0707b. --- src/coreclr/jit/block.cpp | 20 ------ src/coreclr/jit/compiler.cpp | 1 - src/coreclr/jit/compiler.h | 29 --------- src/coreclr/jit/inductionvariableopts.cpp | 14 ++++- src/coreclr/jit/ssabuilder.cpp | 74 +---------------------- 5 files changed, 14 insertions(+), 124 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 8e86dcad1f6b03..be2ba15e254690 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -328,26 +328,6 @@ FlowEdge* Compiler::BlockDominancePreds(BasicBlock* blk) return res; } -bool Compiler::IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) -{ - if (m_insertedSsaLocalsLiveIn == nullptr) - { - return false; - } - - return m_insertedSsaLocalsLiveIn->Lookup(BasicBlockLocalPair(block, lclNum)); -} - -bool Compiler::AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) -{ - if (m_insertedSsaLocalsLiveIn == nullptr) - { - m_insertedSsaLocalsLiveIn = new (this, CMK_SSA) BasicBlockLocalPairSet(getAllocator(CMK_SSA)); - } - - return !m_insertedSsaLocalsLiveIn->Set(BasicBlockLocalPair(block, lclNum), true, BasicBlockLocalPairSet::Overwrite); -} - //------------------------------------------------------------------------ // IsLastHotBlock: see if this is the last block before the cold section // diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 81916e4a4d6f7c..83014990d7aa7d 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1959,7 +1959,6 @@ void Compiler::compInit(ArenaAllocator* pAlloc, m_switchDescMap = nullptr; m_blockToEHPreds = nullptr; m_dominancePreds = nullptr; - m_insertedSsaLocalsLiveIn = nullptr; m_fieldSeqStore = nullptr; m_refAnyClass = nullptr; for (MemoryKind memoryKind : allMemoryKinds()) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index bc7b94f4045bd1..2059157291545e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2944,35 +2944,6 @@ class Compiler return m_dominancePreds; } - struct BasicBlockLocalPair - { - BasicBlock* Block; - unsigned LclNum; - - BasicBlockLocalPair(BasicBlock* block, unsigned lclNum) - : Block(block) - , LclNum(lclNum) - { - } - - static bool Equals(const BasicBlockLocalPair& x, const BasicBlockLocalPair& y) - { - return (x.Block == y.Block) && (x.LclNum == y.LclNum); - } - static unsigned GetHashCode(const BasicBlockLocalPair& val) - { - unsigned hash = static_cast(reinterpret_cast(&val)); - hash ^= val.LclNum + 0x9e3779b9 + (hash << 19) + (hash >> 13); - return hash; - } - }; - - typedef JitHashTable BasicBlockLocalPairSet; - - BasicBlockLocalPairSet* m_insertedSsaLocalsLiveIn; - bool IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); - bool AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); - void* ehEmitCookie(BasicBlock* block); UNATIVE_OFFSET ehCodeOffset(BasicBlock* block); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index e1ba823b97d99d..d18fe3289cc8ca 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -391,8 +391,13 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); + if (!dsc->lvTracked) + { + return false; + } + BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) { - if (dsc->lvTracked ? !VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex) : IsInsertedSsaLiveIn(exit, lclNum)) + if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) { JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum); return BasicBlockVisit::Continue; @@ -1285,8 +1290,13 @@ bool Compiler::optPrimaryIVHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* return true; } + if (!varDsc->lvTracked) + { + return true; + } + BasicBlockVisit visitResult = loop->VisitRegularExitBlocks([=](BasicBlock* block) { - if (varDsc->lvTracked ? VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex) : IsInsertedSsaLiveIn(block, lclNum)) + if (VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex)) { return BasicBlockVisit::Abort; } diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 552a563c438101..3a5ed07a399a06 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1349,58 +1349,6 @@ void Compiler::JitTestCheckSSA() } #endif // DEBUG -class IncrementalLiveInBuilder -{ - Compiler* m_comp; - ArrayStack m_queue; - BitVecTraits m_traits; - BitVec m_visited; - -public: - IncrementalLiveInBuilder(Compiler* comp) - : m_comp(comp) - , m_queue(comp->getAllocator(CMK_SSA)) - , m_traits(comp->fgBBNumMax + 1, comp) - , m_visited(BitVecOps::MakeEmpty(&m_traits)) - { - } - - void MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef); -}; - -void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef) -{ - BitVecOps::ClearD(&m_traits, m_visited); - m_queue.Reset(); - - m_queue.Push(use.Block); - BitVecOps::AddElemD(&m_traits, m_visited, use.Block->bbNum); - - while (!m_queue.Empty()) - { - BasicBlock* block = m_queue.Pop(); - if (block == reachingDef.Block) - { - continue; - } - - if (!m_comp->AddInsertedSsaLiveIn(block, lclNum)) - { - // Already marked live-in here; expect to have been marked live in - // to this reaching def. - continue; - } - - for (FlowEdge* edge = m_comp->BlockPredsWithEH(block); edge != nullptr; edge = edge->getNextPredEdge()) - { - if (BitVecOps::TryAddElemD(&m_traits, m_visited, edge->getSourceBlock()->bbNum)) - { - m_queue.Push(edge->getSourceBlock()); - } - } - } -} - class IncrementalSsaBuilder { Compiler* m_comp; @@ -1410,7 +1358,6 @@ class IncrementalSsaBuilder BitVecTraits m_poTraits; BitVec m_defBlocks; BitVec m_iteratedDominanceFrontiers; - IncrementalLiveInBuilder m_liveInBuilder; UseDefLocation FindOrCreateReachingDef(const UseDefLocation& use); bool FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def); @@ -1428,12 +1375,10 @@ class IncrementalSsaBuilder , m_poTraits(comp->m_dfsTree->PostOrderTraits()) , m_defBlocks(BitVecOps::MakeEmpty(&m_poTraits)) , m_iteratedDominanceFrontiers(BitVecOps::MakeEmpty(&m_poTraits)) - , m_liveInBuilder(comp) { } void Insert(); - static void MarkLiveInBackwards(Compiler* comp, unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef, BitVec& visitedSet); }; //------------------------------------------------------------------------ @@ -1475,10 +1420,6 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati GenTreePhi* phi = phiDef->GetRootNode()->AsLclVar()->Data()->AsPhi(); - // The local is always live into blocks with phi defs. - bool marked = m_comp->AddInsertedSsaLiveIn(dom, m_lclNum); - assert(marked); - for (FlowEdge* predEdge = m_comp->BlockPredsWithEH(dom); predEdge != nullptr; predEdge = predEdge->getNextPredEdge()) { @@ -1489,13 +1430,8 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati } // TODO: This cannot be recursive - UseDefLocation phiArgUse = UseDefLocation(pred, nullptr, nullptr); - UseDefLocation phiArgReachingDef = FindOrCreateReachingDef(phiArgUse); - SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgReachingDef.Tree->GetSsaNum(), pred); - - // The phi arg is modelled at the end of the pred block; - // mark liveness for it. - m_liveInBuilder.MarkLiveInBackwards(m_lclNum, phiArgUse, phiArgReachingDef); + UseDefLocation phiArgDef = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); + SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgDef.Tree->GetSsaNum(), pred); } m_comp->fgValueNumberPhiDef(phiDef->GetRootNode()->AsLclVar(), dom); @@ -1713,8 +1649,6 @@ void IncrementalSsaBuilder::Insert() use.Tree->SetSsaNum(def.Tree->GetSsaNum()); dsc->GetPerSsaData(def.Tree->GetSsaNum())->AddUse(use.Block); JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), def.Tree->GetSsaNum()); - - m_liveInBuilder.MarkLiveInBackwards(m_lclNum, use, def); } } @@ -1769,16 +1703,12 @@ void SsaBuilder::InsertInSsa(Compiler* comp, LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(ssaNum); ssaDsc->m_vnPair = def.Tree->Data()->gtVNPair; - IncrementalLiveInBuilder liveIn(comp); - for (int i = 0; i < uses.Height(); i++) { UseDefLocation& use = uses.BottomRef(i); use.Tree->SetSsaNum(ssaNum); ssaDsc->AddUse(use.Block); JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), ssaNum); - - liveIn.MarkLiveInBackwards(lclNum, use, def); } dsc->lvInSsa = true; From 0869ba6d0e0fa5c93754cd61d71ff0cd784f81fd Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 31 Oct 2024 12:10:33 +0100 Subject: [PATCH 11/25] Remove comment --- src/coreclr/jit/ssabuilder.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 3a5ed07a399a06..7a0c0ffabd5020 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1429,7 +1429,6 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati continue; } - // TODO: This cannot be recursive UseDefLocation phiArgDef = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgDef.Tree->GetSsaNum(), pred); } From f5d2685f528ef79db54d54e61d8c9b4ecc91cb47 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 31 Oct 2024 12:14:00 +0100 Subject: [PATCH 12/25] Add function header --- src/coreclr/jit/ssabuilder.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 7a0c0ffabd5020..14d4b21bb399bb 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -203,6 +203,19 @@ void SsaBuilder::AddPhiArg( AddNewPhiArg(m_pCompiler, block, stmt, phi, lclNum, ssaNum, pred); } +//------------------------------------------------------------------------ +// AddNewPhiArg: Do the IR manipulations to add a new phi arg to a GenTreePhi +// node. +// +// Arguments: +// comp - Compiler instance +// block - Block containing the phi node +// stmt - The statement that contains the GT_PHI node +// phi - The phi node +// lclNum - The local +// ssaNum - SSA number of the phi arg +// pred - The predecessor block corresponding to the phi arg +// void SsaBuilder::AddNewPhiArg(Compiler* comp, BasicBlock* block, Statement* stmt, From 96ac716cf1291217517f2e5acc5da5399795ab73 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 12:31:25 +0100 Subject: [PATCH 13/25] Reapply "Add incremental liveness updating as well" This reverts commit caa8dd36480787726cadc5a6dbe3b3bd91f34ca9. --- src/coreclr/jit/block.cpp | 20 ++++++ src/coreclr/jit/compiler.h | 29 +++++++++ src/coreclr/jit/inductionvariableopts.cpp | 14 +---- src/coreclr/jit/ssabuilder.cpp | 74 ++++++++++++++++++++++- 4 files changed, 123 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 5fe74c76fa1c8a..22dc87f4d77c61 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -328,6 +328,26 @@ FlowEdge* Compiler::BlockDominancePreds(BasicBlock* blk) return res; } +bool Compiler::IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) +{ + if (m_insertedSsaLocalsLiveIn == nullptr) + { + return false; + } + + return m_insertedSsaLocalsLiveIn->Lookup(BasicBlockLocalPair(block, lclNum)); +} + +bool Compiler::AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) +{ + if (m_insertedSsaLocalsLiveIn == nullptr) + { + m_insertedSsaLocalsLiveIn = new (this, CMK_SSA) BasicBlockLocalPairSet(getAllocator(CMK_SSA)); + } + + return !m_insertedSsaLocalsLiveIn->Set(BasicBlockLocalPair(block, lclNum), true, BasicBlockLocalPairSet::Overwrite); +} + //------------------------------------------------------------------------ // IsLastHotBlock: see if this is the last block before the cold section // diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c79f74538b633d..650fc5fba4f6cd 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2949,6 +2949,35 @@ class Compiler return m_dominancePreds; } + struct BasicBlockLocalPair + { + BasicBlock* Block; + unsigned LclNum; + + BasicBlockLocalPair(BasicBlock* block, unsigned lclNum) + : Block(block) + , LclNum(lclNum) + { + } + + static bool Equals(const BasicBlockLocalPair& x, const BasicBlockLocalPair& y) + { + return (x.Block == y.Block) && (x.LclNum == y.LclNum); + } + static unsigned GetHashCode(const BasicBlockLocalPair& val) + { + unsigned hash = static_cast(reinterpret_cast(&val)); + hash ^= val.LclNum + 0x9e3779b9 + (hash << 19) + (hash >> 13); + return hash; + } + }; + + typedef JitHashTable BasicBlockLocalPairSet; + + BasicBlockLocalPairSet* m_insertedSsaLocalsLiveIn; + bool IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); + bool AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); + void* ehEmitCookie(BasicBlock* block); UNATIVE_OFFSET ehCodeOffset(BasicBlock* block); diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index f631c0b5f7889b..498250891a357a 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -391,13 +391,8 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); - if (!dsc->lvTracked) - { - return false; - } - BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) { - if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + if (dsc->lvTracked ? !VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex) : IsInsertedSsaLiveIn(exit, lclNum)) { JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum); return BasicBlockVisit::Continue; @@ -1289,13 +1284,8 @@ bool Compiler::optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loo return true; } - if (!varDsc->lvTracked) - { - return true; - } - BasicBlockVisit visitResult = loop->VisitRegularExitBlocks([=](BasicBlock* block) { - if (VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex)) + if (varDsc->lvTracked ? VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex) : IsInsertedSsaLiveIn(block, lclNum)) { return BasicBlockVisit::Abort; } diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 14d4b21bb399bb..ab66ce9072ef7e 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1362,6 +1362,58 @@ void Compiler::JitTestCheckSSA() } #endif // DEBUG +class IncrementalLiveInBuilder +{ + Compiler* m_comp; + ArrayStack m_queue; + BitVecTraits m_traits; + BitVec m_visited; + +public: + IncrementalLiveInBuilder(Compiler* comp) + : m_comp(comp) + , m_queue(comp->getAllocator(CMK_SSA)) + , m_traits(comp->fgBBNumMax + 1, comp) + , m_visited(BitVecOps::MakeEmpty(&m_traits)) + { + } + + void MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef); +}; + +void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef) +{ + BitVecOps::ClearD(&m_traits, m_visited); + m_queue.Reset(); + + m_queue.Push(use.Block); + BitVecOps::AddElemD(&m_traits, m_visited, use.Block->bbNum); + + while (!m_queue.Empty()) + { + BasicBlock* block = m_queue.Pop(); + if (block == reachingDef.Block) + { + continue; + } + + if (!m_comp->AddInsertedSsaLiveIn(block, lclNum)) + { + // Already marked live-in here; expect to have been marked live in + // to this reaching def. + continue; + } + + for (FlowEdge* edge = m_comp->BlockPredsWithEH(block); edge != nullptr; edge = edge->getNextPredEdge()) + { + if (BitVecOps::TryAddElemD(&m_traits, m_visited, edge->getSourceBlock()->bbNum)) + { + m_queue.Push(edge->getSourceBlock()); + } + } + } +} + class IncrementalSsaBuilder { Compiler* m_comp; @@ -1371,6 +1423,7 @@ class IncrementalSsaBuilder BitVecTraits m_poTraits; BitVec m_defBlocks; BitVec m_iteratedDominanceFrontiers; + IncrementalLiveInBuilder m_liveInBuilder; UseDefLocation FindOrCreateReachingDef(const UseDefLocation& use); bool FindReachingDefInBlock(const UseDefLocation& use, BasicBlock* block, UseDefLocation* def); @@ -1388,10 +1441,12 @@ class IncrementalSsaBuilder , m_poTraits(comp->m_dfsTree->PostOrderTraits()) , m_defBlocks(BitVecOps::MakeEmpty(&m_poTraits)) , m_iteratedDominanceFrontiers(BitVecOps::MakeEmpty(&m_poTraits)) + , m_liveInBuilder(comp) { } void Insert(); + static void MarkLiveInBackwards(Compiler* comp, unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef, BitVec& visitedSet); }; //------------------------------------------------------------------------ @@ -1433,6 +1488,10 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati GenTreePhi* phi = phiDef->GetRootNode()->AsLclVar()->Data()->AsPhi(); + // The local is always live into blocks with phi defs. + bool marked = m_comp->AddInsertedSsaLiveIn(dom, m_lclNum); + assert(marked); + for (FlowEdge* predEdge = m_comp->BlockPredsWithEH(dom); predEdge != nullptr; predEdge = predEdge->getNextPredEdge()) { @@ -1442,8 +1501,13 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati continue; } - UseDefLocation phiArgDef = FindOrCreateReachingDef(UseDefLocation(pred, nullptr, nullptr)); - SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgDef.Tree->GetSsaNum(), pred); + UseDefLocation phiArgUse = UseDefLocation(pred, nullptr, nullptr); + UseDefLocation phiArgReachingDef = FindOrCreateReachingDef(phiArgUse); + SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgReachingDef.Tree->GetSsaNum(), pred); + + // The phi arg is modelled at the end of the pred block; + // mark liveness for it. + m_liveInBuilder.MarkLiveInBackwards(m_lclNum, phiArgUse, phiArgReachingDef); } m_comp->fgValueNumberPhiDef(phiDef->GetRootNode()->AsLclVar(), dom); @@ -1661,6 +1725,8 @@ void IncrementalSsaBuilder::Insert() use.Tree->SetSsaNum(def.Tree->GetSsaNum()); dsc->GetPerSsaData(def.Tree->GetSsaNum())->AddUse(use.Block); JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), def.Tree->GetSsaNum()); + + m_liveInBuilder.MarkLiveInBackwards(m_lclNum, use, def); } } @@ -1715,12 +1781,16 @@ void SsaBuilder::InsertInSsa(Compiler* comp, LclSsaVarDsc* ssaDsc = dsc->GetPerSsaData(ssaNum); ssaDsc->m_vnPair = def.Tree->Data()->gtVNPair; + IncrementalLiveInBuilder liveIn(comp); + for (int i = 0; i < uses.Height(); i++) { UseDefLocation& use = uses.BottomRef(i); use.Tree->SetSsaNum(ssaNum); ssaDsc->AddUse(use.Block); JITDUMP(" [%06u] u:%u\n", Compiler::dspTreeID(use.Tree), ssaNum); + + liveIn.MarkLiveInBackwards(lclNum, use, def); } dsc->lvInSsa = true; From a65ff9e8a6ce9a5f52cceed927012a11ae355349 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 12:56:00 +0100 Subject: [PATCH 14/25] Compute liveness for SSA-inserted locals --- src/coreclr/jit/block.cpp | 24 ++++++++++++ src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/inductionvariableopts.cpp | 47 ++++++++++++++++++++--- src/coreclr/jit/ssabuilder.cpp | 31 ++++++++------- 4 files changed, 84 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 22dc87f4d77c61..dafba6004bf07b 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -328,8 +328,21 @@ FlowEdge* Compiler::BlockDominancePreds(BasicBlock* blk) return res; } +//------------------------------------------------------------------------ +// IsInsertedSsaLiveIn: See if a local is marked as being live-in to a block in +// the side table with locals inserted into SSA. +// +// Arguments: +// block - The block +// lclNum - The local +// +// Returns: +// True if the local is marked as live-in to that block +// bool Compiler::IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) { + assert(lvaGetDesc(lclNum)->lvInSsa); + if (m_insertedSsaLocalsLiveIn == nullptr) { return false; @@ -338,6 +351,17 @@ bool Compiler::IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) return m_insertedSsaLocalsLiveIn->Lookup(BasicBlockLocalPair(block, lclNum)); } +//------------------------------------------------------------------------ +// AddInsertedSsaLiveIn: Mark as local that was inserted into SSA as being +// live-in to a block. +// +// Arguments: +// block - The block +// lclNum - The local +// +// Returns: +// True if this was added anew; false if the local was already marked as such. +// bool Compiler::AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) { if (m_insertedSsaLocalsLiveIn == nullptr) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 650fc5fba4f6cd..780e956b388fa1 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2974,7 +2974,7 @@ class Compiler typedef JitHashTable BasicBlockLocalPairSet; - BasicBlockLocalPairSet* m_insertedSsaLocalsLiveIn; + BasicBlockLocalPairSet* m_insertedSsaLocalsLiveIn = nullptr; bool IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); bool AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum); @@ -7749,6 +7749,7 @@ class Compiler LoopLocalOccurrences* loopLocals); bool optCanAndShouldChangeExitTest(GenTree* cond, bool dump); bool optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals); + bool optLocalIsLiveIntoBlock(unsigned lclNum, BasicBlock* block); bool optWidenIVs(ScalarEvolutionContext& scevContext, FlowGraphNaturalLoop* loop, LoopLocalOccurrences* loopLocals); bool optWidenPrimaryIV(FlowGraphNaturalLoop* loop, diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index 498250891a357a..c922a8467b4920 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -391,8 +391,10 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); + assert(dsc->lvInSsa); + BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) { - if (dsc->lvTracked ? !VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex) : IsInsertedSsaLiveIn(exit, lclNum)) + if (optLocalIsLiveIntoBlock(lclNum, exit)) { JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum); return BasicBlockVisit::Continue; @@ -422,8 +424,7 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) block->VisitAllSuccs(this, [=](BasicBlock* succ) { if (!loop->ContainsBlock(succ) && bbIsHandlerBeg(succ)) { - assert(!VarSetOps::IsMember(this, succ->bbLiveIn, dsc->lvVarIndex) && - "Candidate IV for widening is live into exceptional exit"); + assert(!optLocalIsLiveIntoBlock(lclNum, succ) && "Candidate IV for widening is live into exceptional exit"); } return BasicBlockVisit::Continue; @@ -534,8 +535,10 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, // Now account for the cost of sinks. LclVarDsc* dsc = lvaGetDesc(lclNum); + assert(dsc->lvInSsa); + loop->VisitRegularExitBlocks([&](BasicBlock* exit) { - if (VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + if (optLocalIsLiveIntoBlock(lclNum, exit)) { savedSize -= ExtensionSize; savedCost -= exit->getBBWeight(this) * ExtensionCost; @@ -583,8 +586,10 @@ bool Compiler::optIsIVWideningProfitable(unsigned lclNum, void Compiler::optSinkWidenedIV(unsigned lclNum, unsigned newLclNum, FlowGraphNaturalLoop* loop) { LclVarDsc* dsc = lvaGetDesc(lclNum); + assert(dsc->lvInSsa); + loop->VisitRegularExitBlocks([=](BasicBlock* exit) { - if (!VarSetOps::IsMember(this, exit->bbLiveIn, dsc->lvVarIndex)) + if (!optLocalIsLiveIntoBlock(lclNum, exit)) { return BasicBlockVisit::Continue; } @@ -1284,8 +1289,14 @@ bool Compiler::optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loo return true; } + if (!varDsc->lvTracked && !varDsc->lvInSsa) + { + // We do not have liveness we can use for this untracked local. + return true; + } + BasicBlockVisit visitResult = loop->VisitRegularExitBlocks([=](BasicBlock* block) { - if (varDsc->lvTracked ? VarSetOps::IsMember(this, block->bbLiveIn, varDsc->lvVarIndex) : IsInsertedSsaLiveIn(block, lclNum)) + if (optLocalIsLiveIntoBlock(lclNum, block)) { return BasicBlockVisit::Abort; } @@ -1305,6 +1316,30 @@ bool Compiler::optLocalHasNonLoopUses(unsigned lclNum, FlowGraphNaturalLoop* loo return false; } +//------------------------------------------------------------------------ +// optLocalIsLiveIntoBlock: +// Check if a local is live into a block. Required liveness information for the local to be present +// (either because of it being tracked, or from being an SSA-inserted local). +// +// Parameters: +// lclNum - The local +// block - The block +// +// Returns: +// True if the local is live into that block. +// +bool Compiler::optLocalIsLiveIntoBlock(unsigned lclNum, BasicBlock* block) +{ + LclVarDsc* dsc = lvaGetDesc(lclNum); + if (dsc->lvTracked) + { + return VarSetOps::IsMember(this, block->bbLiveIn, dsc->lvVarIndex); + } + + assert(dsc->lvInSsa); + return IsInsertedSsaLiveIn(block, lclNum); +} + struct CursorInfo { BasicBlock* Block; diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index ab66ce9072ef7e..3973fc590e9d7f 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1366,28 +1366,38 @@ class IncrementalLiveInBuilder { Compiler* m_comp; ArrayStack m_queue; - BitVecTraits m_traits; - BitVec m_visited; public: IncrementalLiveInBuilder(Compiler* comp) : m_comp(comp) , m_queue(comp->getAllocator(CMK_SSA)) - , m_traits(comp->fgBBNumMax + 1, comp) - , m_visited(BitVecOps::MakeEmpty(&m_traits)) { } void MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef); }; +//------------------------------------------------------------------------ +// MarkLiveInBackwards: Given a use and its reaching definition, mark that +// local as live-in into all blocks on the path from the reaching definition to +// the use. +// +// Parameters: +// lclNum - The local +// use - The use +// reachingDef - The reaching definition of the use +// void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef) { - BitVecOps::ClearD(&m_traits, m_visited); m_queue.Reset(); m_queue.Push(use.Block); - BitVecOps::AddElemD(&m_traits, m_visited, use.Block->bbNum); + if (!m_comp->AddInsertedSsaLiveIn(use.Block, lclNum)) + { + // We've already marked this block as live-in before -- no need to + // repeat that work (everyone should agree on reaching defs) + return; + } while (!m_queue.Empty()) { @@ -1397,16 +1407,9 @@ void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, const UseDef continue; } - if (!m_comp->AddInsertedSsaLiveIn(block, lclNum)) - { - // Already marked live-in here; expect to have been marked live in - // to this reaching def. - continue; - } - for (FlowEdge* edge = m_comp->BlockPredsWithEH(block); edge != nullptr; edge = edge->getNextPredEdge()) { - if (BitVecOps::TryAddElemD(&m_traits, m_visited, edge->getSourceBlock()->bbNum)) + if (m_comp->AddInsertedSsaLiveIn(edge->getSourceBlock(), lclNum)) { m_queue.Push(edge->getSourceBlock()); } From c9c22fc94593a473fc8d174075884ee2e76d52a3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 12:56:38 +0100 Subject: [PATCH 15/25] Run jit-format --- src/coreclr/jit/inductionvariableopts.cpp | 3 ++- src/coreclr/jit/ssabuilder.cpp | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index c922a8467b4920..f854217b186f75 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -424,7 +424,8 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) block->VisitAllSuccs(this, [=](BasicBlock* succ) { if (!loop->ContainsBlock(succ) && bbIsHandlerBeg(succ)) { - assert(!optLocalIsLiveIntoBlock(lclNum, succ) && "Candidate IV for widening is live into exceptional exit"); + assert(!optLocalIsLiveIntoBlock(lclNum, succ) && + "Candidate IV for widening is live into exceptional exit"); } return BasicBlockVisit::Continue; diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 3973fc590e9d7f..1b2d97f190562d 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1364,7 +1364,7 @@ void Compiler::JitTestCheckSSA() class IncrementalLiveInBuilder { - Compiler* m_comp; + Compiler* m_comp; ArrayStack m_queue; public: @@ -1387,7 +1387,9 @@ class IncrementalLiveInBuilder // use - The use // reachingDef - The reaching definition of the use // -void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef) +void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, + const UseDefLocation& use, + const UseDefLocation& reachingDef) { m_queue.Reset(); @@ -1448,8 +1450,12 @@ class IncrementalSsaBuilder { } - void Insert(); - static void MarkLiveInBackwards(Compiler* comp, unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef, BitVec& visitedSet); + void Insert(); + static void MarkLiveInBackwards(Compiler* comp, + unsigned lclNum, + const UseDefLocation& use, + const UseDefLocation& reachingDef, + BitVec& visitedSet); }; //------------------------------------------------------------------------ @@ -1504,9 +1510,10 @@ UseDefLocation IncrementalSsaBuilder::FindOrCreateReachingDef(const UseDefLocati continue; } - UseDefLocation phiArgUse = UseDefLocation(pred, nullptr, nullptr); + UseDefLocation phiArgUse = UseDefLocation(pred, nullptr, nullptr); UseDefLocation phiArgReachingDef = FindOrCreateReachingDef(phiArgUse); - SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgReachingDef.Tree->GetSsaNum(), pred); + SsaBuilder::AddNewPhiArg(m_comp, dom, phiDef, phi, m_lclNum, phiArgReachingDef.Tree->GetSsaNum(), + pred); // The phi arg is modelled at the end of the pred block; // mark liveness for it. From e811f1add0f432e552da39af986143996652bc71 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 12:58:47 +0100 Subject: [PATCH 16/25] Switch bbNum -> bbID --- src/coreclr/jit/block.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 87fc23a519a1c1..99e5e3e4bf2499 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1026,10 +1026,10 @@ unsigned JitPtrKeyFuncs::GetHashCode(const BasicBlock* ptr) unsigned hash = SsaStressHashHelper(); if (hash != 0) { - return (hash ^ (ptr->bbNum << 16) ^ ptr->bbNum); + return (hash ^ (ptr->bbID << 16) ^ ptr->bbID); } #endif - return ptr->bbNum; + return ptr->bbID; } //------------------------------------------------------------------------ From cd0cd446eab214248d2425386c793d8dfc15bd19 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 14:03:37 +0100 Subject: [PATCH 17/25] Fix liveness marking --- src/coreclr/jit/ssabuilder.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 1b2d97f190562d..a955c041d18c88 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1391,9 +1391,12 @@ void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, const UseDefLocation& use, const UseDefLocation& reachingDef) { - m_queue.Reset(); + if (use.Block == reachingDef.Block) + { + // No work to be done + return; + } - m_queue.Push(use.Block); if (!m_comp->AddInsertedSsaLiveIn(use.Block, lclNum)) { // We've already marked this block as live-in before -- no need to @@ -1401,19 +1404,24 @@ void IncrementalLiveInBuilder::MarkLiveInBackwards(unsigned lclNum, return; } + m_queue.Reset(); + m_queue.Push(use.Block); + while (!m_queue.Empty()) { BasicBlock* block = m_queue.Pop(); - if (block == reachingDef.Block) - { - continue; - } for (FlowEdge* edge = m_comp->BlockPredsWithEH(block); edge != nullptr; edge = edge->getNextPredEdge()) { - if (m_comp->AddInsertedSsaLiveIn(edge->getSourceBlock(), lclNum)) + BasicBlock* pred = edge->getSourceBlock(); + if (pred == reachingDef.Block) + { + continue; + } + + if (m_comp->AddInsertedSsaLiveIn(pred, lclNum)) { - m_queue.Push(edge->getSourceBlock()); + m_queue.Push(pred); } } } From 2b7f8215373662e0ce2c69847f80afa172174ef1 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 14:39:05 +0100 Subject: [PATCH 18/25] Fix hash code --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a97f5cb3131072..8e2485610aa38f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2965,7 +2965,7 @@ class Compiler } static unsigned GetHashCode(const BasicBlockLocalPair& val) { - unsigned hash = static_cast(reinterpret_cast(&val)); + unsigned hash = val.Block->bbID; hash ^= val.LclNum + 0x9e3779b9 + (hash << 19) + (hash >> 13); return hash; } From 69bbc003854268ea930c76442c7c70184003a2e3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 14:39:14 +0100 Subject: [PATCH 19/25] Revert unnecessary change --- src/coreclr/jit/block.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 99e5e3e4bf2499..87fc23a519a1c1 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1026,10 +1026,10 @@ unsigned JitPtrKeyFuncs::GetHashCode(const BasicBlock* ptr) unsigned hash = SsaStressHashHelper(); if (hash != 0) { - return (hash ^ (ptr->bbID << 16) ^ ptr->bbID); + return (hash ^ (ptr->bbNum << 16) ^ ptr->bbNum); } #endif - return ptr->bbID; + return ptr->bbNum; } //------------------------------------------------------------------------ From e0d1442dfe0fd9c56225e7df888f5e7fb8d39002 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 14:57:01 +0100 Subject: [PATCH 20/25] Skip SSA insertion for large IDFs --- src/coreclr/jit/ssabuilder.cpp | 68 ++++++++++++++++++++++------------ src/coreclr/jit/ssabuilder.h | 2 +- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index a955c041d18c88..3d0845c2ed896c 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1458,7 +1458,7 @@ class IncrementalSsaBuilder { } - void Insert(); + bool Insert(); static void MarkLiveInBackwards(Compiler* comp, unsigned lclNum, const UseDefLocation& use, @@ -1689,10 +1689,39 @@ Statement* IncrementalSsaBuilder::LatestStatement(Statement* stmt1, Statement* s //------------------------------------------------------------------------ // Insert: Insert the uses and definitions in SSA. // -void IncrementalSsaBuilder::Insert() +// Returns: +// True if we were able to insert the local into SSA. False if we gave up +// (due to hitting internal limits). +// +bool IncrementalSsaBuilder::Insert() { FlowGraphDfsTree* dfsTree = m_comp->m_dfsTree; + // Compute iterated dominance frontiers of all real definitions. These are + // the blocks that unpruned phi definitions would be inserted into. We + // insert the phis lazily to end up with pruned SSA, but we still need to + // know which blocks are candidates for phis. + BlkVector idf(m_comp->getAllocator(CMK_SSA)); + + for (int i = 0; i < m_defs.Height(); i++) + { + BasicBlock* block = m_defs.BottomRef(i).Block; + idf.clear(); + m_comp->m_domFrontiers->ComputeIteratedDominanceFrontier(block, &idf); + + for (BasicBlock* idfBlock : idf) + { + BitVecOps::AddElemD(&m_poTraits, m_iteratedDominanceFrontiers, idfBlock->bbPostorderNum); + } + } + + // The IDF gives a bound on the potential recursion depth of + // FindOrCreateReachingDef. Limit this to a value we know won't overflow. + if (BitVecOps::Count(&m_poTraits, m_iteratedDominanceFrontiers) > 100) + { + return false; + } + LclVarDsc* dsc = m_comp->lvaGetDesc(m_lclNum); // Alloc SSA numbers for all real definitions. for (int i = 0; i < m_defs.Height(); i++) @@ -1712,24 +1741,6 @@ void IncrementalSsaBuilder::Insert() JITDUMP(" [%06u] d:%u\n", Compiler::dspTreeID(def.Tree), ssaNum); } - // Compute iterated dominance frontiers of all real definitions. These are - // the blocks that unpruned phi definitions would be inserted into. We - // insert the phis lazily to end up with pruned SSA, but we still need to - // know which blocks are candidates for phis. - BlkVector idf(m_comp->getAllocator(CMK_SSA)); - - for (int i = 0; i < m_defs.Height(); i++) - { - BasicBlock* block = m_defs.BottomRef(i).Block; - idf.clear(); - m_comp->m_domFrontiers->ComputeIteratedDominanceFrontier(block, &idf); - - for (BasicBlock* idfBlock : idf) - { - BitVecOps::AddElemD(&m_poTraits, m_iteratedDominanceFrontiers, idfBlock->bbPostorderNum); - } - } - // Finally compute all the reaching defs for the uses. for (int i = 0; i < m_uses.Height(); i++) { @@ -1746,6 +1757,8 @@ void IncrementalSsaBuilder::Insert() m_liveInBuilder.MarkLiveInBackwards(m_lclNum, use, def); } + + return true; } //------------------------------------------------------------------------ @@ -1758,12 +1771,16 @@ void IncrementalSsaBuilder::Insert() // defs - All STORE_LCL_VAR definitions of the local // uses - All LCL_VAR uses of the local // +// Returns: +// True if we were able to insert the local into SSA. False if we gave up +// (due to hitting internal limits). +// // Remarks: // All uses are required to never read an uninitialized value of the local. // That is, this function requires that all paths through the function go // through one of the defs in "defs" before any use in "uses". // -void SsaBuilder::InsertInSsa(Compiler* comp, +bool SsaBuilder::InsertInSsa(Compiler* comp, unsigned lclNum, ArrayStack& defs, ArrayStack& uses) @@ -1831,6 +1848,11 @@ void SsaBuilder::InsertInSsa(Compiler* comp, } IncrementalSsaBuilder builder(comp, lclNum, defs, uses); - builder.Insert(); - dsc->lvInSsa = true; + if (builder.Insert()) + { + dsc->lvInSsa = true; + return true; + } + + return false; } diff --git a/src/coreclr/jit/ssabuilder.h b/src/coreclr/jit/ssabuilder.h index 20363669acba1f..446f7894e1da4a 100644 --- a/src/coreclr/jit/ssabuilder.h +++ b/src/coreclr/jit/ssabuilder.h @@ -53,7 +53,7 @@ class SsaBuilder // variable are stored in the "per SSA data" on the local descriptor. void Build(); - static void InsertInSsa(Compiler* comp, + static bool InsertInSsa(Compiler* comp, unsigned lclNum, ArrayStack& defs, ArrayStack& uses); From 55fa27b55cdb568b98d9a40d2cb8d02f5eeacb1b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 14:57:40 +0100 Subject: [PATCH 21/25] Nit --- src/coreclr/jit/ssabuilder.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 3d0845c2ed896c..f100cd2cfbb0c7 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1716,7 +1716,8 @@ bool IncrementalSsaBuilder::Insert() } // The IDF gives a bound on the potential recursion depth of - // FindOrCreateReachingDef. Limit this to a value we know won't overflow. + // FindOrCreateReachingDef. Limit this to a value we know won't stack + // overflow. if (BitVecOps::Count(&m_poTraits, m_iteratedDominanceFrontiers) > 100) { return false; From 0e6d44f35aab7819e80d2b1180de8bfd43c6709e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 15:03:10 +0100 Subject: [PATCH 22/25] Fix build --- src/coreclr/jit/ssabuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index f100cd2cfbb0c7..40d9562e30249a 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -1830,7 +1830,7 @@ bool SsaBuilder::InsertInSsa(Compiler* comp, } dsc->lvInSsa = true; - return; + return true; } if (comp->m_dfsTree == nullptr) From 2e1ba4ca83d58bee5d53885b762ee8be4ee3023d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 15:14:39 +0100 Subject: [PATCH 23/25] Feedback on remark --- src/coreclr/jit/flowgraph.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index ea31a3e3db65f0..dd4d95ab8356dd 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -6623,9 +6623,9 @@ FlowGraphDominanceFrontiers::FlowGraphDominanceFrontiers(FlowGraphDominatorTree* // Data structure representing dominance frontiers. // // Remarks: -// Recall that the dominance frontier of a block B is the set of blocks -// B3 such that there exists some B2 s.t. B3 is a successor of B2, and -// B dominates B2. Note that this dominance need not be strict -- B2 +// Recall that the dominance frontier of a block B is the set of blocks B3 +// such that there exists some B2 s.t. B3 is a successor of B2, and B +// dominates B2 but not B3. Note that this dominance need not be strict -- B2 // and B may be the same node. // // In other words, a block B' is in DF(B) if B dominates an immediate From f8e5a039fb9d7828fdc74aad6a81cb2f39aa359a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 6 Nov 2024 15:34:18 +0100 Subject: [PATCH 24/25] Fix live-in check --- src/coreclr/jit/block.cpp | 8 +++++++- src/coreclr/jit/inductionvariableopts.cpp | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 87fc23a519a1c1..efc923b5e16b71 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -369,7 +369,13 @@ bool Compiler::AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) m_insertedSsaLocalsLiveIn = new (this, CMK_SSA) BasicBlockLocalPairSet(getAllocator(CMK_SSA)); } - return !m_insertedSsaLocalsLiveIn->Set(BasicBlockLocalPair(block, lclNum), true, BasicBlockLocalPairSet::Overwrite); + if (m_insertedSsaLocalsLiveIn->Set(BasicBlockLocalPair(block, lclNum), true, BasicBlockLocalPairSet::Overwrite)) + { + return false; + } + + JITDUMP("Marked V%02u as live into " FMT_BB "\n", lclNum, block->bbNum); + return true; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/inductionvariableopts.cpp b/src/coreclr/jit/inductionvariableopts.cpp index f854217b186f75..06cae5799a0da6 100644 --- a/src/coreclr/jit/inductionvariableopts.cpp +++ b/src/coreclr/jit/inductionvariableopts.cpp @@ -394,7 +394,7 @@ bool Compiler::optCanSinkWidenedIV(unsigned lclNum, FlowGraphNaturalLoop* loop) assert(dsc->lvInSsa); BasicBlockVisit result = loop->VisitRegularExitBlocks([=](BasicBlock* exit) { - if (optLocalIsLiveIntoBlock(lclNum, exit)) + if (!optLocalIsLiveIntoBlock(lclNum, exit)) { JITDUMP(" Exit " FMT_BB " does not need a sink; V%02u is not live-in\n", exit->bbNum, lclNum); return BasicBlockVisit::Continue; From 63fca5d7992c2a7c0f887f131a47f7f6d9631448 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 7 Nov 2024 01:09:42 +0100 Subject: [PATCH 25/25] Add assert that fgFirstBB does not have live-in locals added --- src/coreclr/jit/block.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index efc923b5e16b71..69017ea2dfb544 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -364,6 +364,10 @@ bool Compiler::IsInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) // bool Compiler::AddInsertedSsaLiveIn(BasicBlock* block, unsigned lclNum) { + // SSA-inserted locals always have explicit reaching defs for all uses, so + // it never makes sense for them to be live into the first block. + assert(block != fgFirstBB); + if (m_insertedSsaLocalsLiveIn == nullptr) { m_insertedSsaLocalsLiveIn = new (this, CMK_SSA) BasicBlockLocalPairSet(getAllocator(CMK_SSA));