@@ -9298,6 +9298,7 @@ static void addExitUsersForFirstOrderRecurrences(
92989298VPlanPtr
92999299LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes (VFRange &Range) {
93009300
9301+ using namespace llvm ::VPlanPatternMatch;
93019302 SmallPtrSet<const InterleaveGroup<Instruction> *, 1 > InterleaveGroups;
93029303
93039304 // ---------------------------------------------------------------------------
@@ -9321,6 +9322,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93219322 PSE, RequiresScalarEpilogueCheck,
93229323 CM.foldTailByMasking (), OrigLoop);
93239324
9325+ // Build hierarchical CFG.
9326+ VPlanHCFGBuilder HCFGBuilder (OrigLoop, LI, *Plan);
9327+ HCFGBuilder.buildHierarchicalCFG ();
9328+
93249329 // Don't use getDecisionAndClampRange here, because we don't know the UF
93259330 // so this function is better to be conservative, rather than to split
93269331 // it up into different VPlans.
@@ -9371,12 +9376,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93719376 // Construct recipes for the instructions in the loop
93729377 // ---------------------------------------------------------------------------
93739378
9374- // Scan the body of the loop in a topological order to visit each basic block
9375- // after having visited its predecessor basic blocks.
9376- LoopBlocksDFS DFS (OrigLoop);
9377- DFS.perform (LI);
9378-
9379- VPBasicBlock *HeaderVPBB = Plan->getVectorLoopRegion ()->getEntryBasicBlock ();
9379+ VPRegionBlock *LoopRegion = Plan->getVectorLoopRegion ();
9380+ VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock ();
93809381 VPBasicBlock *VPBB = HeaderVPBB;
93819382 BasicBlock *HeaderBB = OrigLoop->getHeader ();
93829383 bool NeedsMasks =
@@ -9389,26 +9390,70 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
93899390 RecipeBuilder.collectScaledReductions (Range);
93909391
93919392 auto *MiddleVPBB = Plan->getMiddleBlock ();
9393+
9394+ // Scan the body of the loop in a topological order to visit each basic block
9395+ // after having visited its predecessor basic blocks.
9396+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
9397+ HeaderVPBB);
9398+
93929399 VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi ();
9393- for (BasicBlock *BB : make_range (DFS.beginRPO (), DFS.endRPO ())) {
9394- // Relevant instructions from basic block BB will be grouped into VPRecipe
9395- // ingredients and fill a new VPBasicBlock.
9396- if (VPBB != HeaderVPBB)
9397- VPBB->setName (BB->getName ());
9398- Builder.setInsertPoint (VPBB);
9400+ VPBlockBase *PrevVPBB = nullptr ;
9401+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
9402+ // Handle VPBBs down to the latch.
9403+ if (VPBB == LoopRegion->getExiting ()) {
9404+ assert (!HCFGBuilder.getIRBBForVPB (VPBB) &&
9405+ " the latch block shouldn't have a corresponding IRBB" );
9406+ VPBlockUtils::connectBlocks (PrevVPBB, VPBB);
9407+ break ;
9408+ }
93999409
9400- if (VPBB == HeaderVPBB)
9410+ // Create mask based on the IR BB corresponding to VPBB.
9411+ // TODO: Predicate directly based on VPlan.
9412+ Builder.setInsertPoint (VPBB, VPBB->begin ());
9413+ if (VPBB == HeaderVPBB) {
9414+ Builder.setInsertPoint (VPBB, VPBB->getFirstNonPhi ());
94019415 RecipeBuilder.createHeaderMask ();
9402- else if (NeedsMasks)
9403- RecipeBuilder.createBlockInMask (BB);
9416+ } else if (NeedsMasks) {
9417+ // FIXME: At the moment, masks need to be placed at the beginning of the
9418+ // block, as blends introduced for phi nodes need to use it. The created
9419+ // blends should be sunk after the mask recipes.
9420+ RecipeBuilder.createBlockInMask (HCFGBuilder.getIRBBForVPB (VPBB));
9421+ }
9422+
9423+ // Convert input VPInstructions to widened recipes.
9424+ for (VPRecipeBase &R : make_early_inc_range (*VPBB)) {
9425+ auto *SingleDef = cast<VPSingleDefRecipe>(&R);
9426+ auto *UnderlyingValue = SingleDef->getUnderlyingValue ();
9427+ // Skip recipes that do not need transforming, including canonical IV,
9428+ // wide canonical IV and VPInstructions without underlying values. The
9429+ // latter are added above for masking.
9430+ // FIXME: Migrate code relying on the underlying instruction from VPlan0
9431+ // to construct recipes below to not use the underlying instruction.
9432+ if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe>(&R) ||
9433+ (isa<VPInstruction>(&R) && !UnderlyingValue))
9434+ continue ;
94049435
9405- // Introduce each ingredient into VPlan.
9406- // TODO: Model and preserve debug intrinsics in VPlan.
9407- for (Instruction &I : drop_end (BB->instructionsWithoutDebug (false ))) {
9408- Instruction *Instr = &I;
9436+ // FIXME: VPlan0, which models a copy of the original scalar loop, should
9437+ // not use VPWidenPHIRecipe to model the phis.
9438+ assert ((isa<VPWidenPHIRecipe>(&R) || isa<VPInstruction>(&R)) &&
9439+ UnderlyingValue && " unsupported recipe" );
9440+
9441+ if (isa<VPInstruction>(&R) &&
9442+ (cast<VPInstruction>(&R)->getOpcode () ==
9443+ VPInstruction::BranchOnCond ||
9444+ (cast<VPInstruction>(&R)->getOpcode () == Instruction::Switch))) {
9445+ R.eraseFromParent ();
9446+ break ;
9447+ }
9448+
9449+ // TODO: Gradually replace uses of underlying instruction by analyses on
9450+ // VPlan.
9451+ Instruction *Instr = cast<Instruction>(UnderlyingValue);
9452+ Builder.setInsertPoint (SingleDef);
94099453 SmallVector<VPValue *, 4 > Operands;
94109454 auto *Phi = dyn_cast<PHINode>(Instr);
94119455 if (Phi && Phi->getParent () == HeaderBB) {
9456+ // The backedge value will be added in fixHeaderPhis later.
94129457 Operands.push_back (Plan->getOrAddLiveIn (
94139458 Phi->getIncomingValueForBlock (OrigLoop->getLoopPreheader ())));
94149459 } else {
@@ -9420,15 +9465,16 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94209465 // in the exit block, a uniform store recipe will be created for the final
94219466 // invariant store of the reduction.
94229467 StoreInst *SI;
9423- if ((SI = dyn_cast<StoreInst>(&I )) &&
9468+ if ((SI = dyn_cast<StoreInst>(Instr )) &&
94249469 Legal->isInvariantAddressOfReduction (SI->getPointerOperand ())) {
94259470 // Only create recipe for the final invariant store of the reduction.
9426- if (!Legal->isInvariantStoreOfReduction (SI))
9427- continue ;
9428- auto *Recipe = new VPReplicateRecipe (
9429- SI, make_range (Operands.begin (), Operands.end ()),
9430- true /* IsUniform */ );
9431- Recipe->insertBefore (*MiddleVPBB, MBIP);
9471+ if (Legal->isInvariantStoreOfReduction (SI)) {
9472+ auto *Recipe = new VPReplicateRecipe (
9473+ SI, make_range (Operands.begin (), Operands.end ()),
9474+ true /* IsUniform */ );
9475+ Recipe->insertBefore (*MiddleVPBB, MBIP);
9476+ }
9477+ R.eraseFromParent ();
94329478 continue ;
94339479 }
94349480
@@ -9438,25 +9484,29 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
94389484 Recipe = RecipeBuilder.handleReplication (Instr, Operands, Range);
94399485
94409486 RecipeBuilder.setRecipe (Instr, Recipe);
9441- if (isa<VPHeaderPHIRecipe>(Recipe)) {
9442- // VPHeaderPHIRecipes must be kept in the phi section of HeaderVPBB. In
9443- // the following cases, VPHeaderPHIRecipes may be created after non-phi
9444- // recipes and need to be moved to the phi section of HeaderVPBB:
9445- // * tail-folding (non-phi recipes computing the header mask are
9446- // introduced earlier than regular header phi recipes, and should appear
9447- // after them)
9448- // * Optimizing truncates to VPWidenIntOrFpInductionRecipe.
9449-
9450- assert ((HeaderVPBB->getFirstNonPhi () == VPBB->end () ||
9451- CM.foldTailByMasking () || isa<TruncInst>(Instr)) &&
9452- " unexpected recipe needs moving" );
9487+ if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) && isa<TruncInst>(Instr)) {
9488+ // Optimized a truncate to VPWidenIntOrFpInductionRecipe. It needs to be
9489+ // moved to the phi section in the header.
94539490 Recipe->insertBefore (*HeaderVPBB, HeaderVPBB->getFirstNonPhi ());
9454- } else
9455- VPBB->appendRecipe (Recipe);
9456- }
9457-
9458- VPBlockUtils::insertBlockAfter (Plan->createVPBasicBlock (" " ), VPBB);
9459- VPBB = cast<VPBasicBlock>(VPBB->getSingleSuccessor ());
9491+ } else {
9492+ Builder.insert (Recipe);
9493+ }
9494+ if (Recipe->getNumDefinedValues () == 1 )
9495+ SingleDef->replaceAllUsesWith (Recipe->getVPSingleValue ());
9496+ else
9497+ assert (Recipe->getNumDefinedValues () == 0 &&
9498+ " Unexpected multidef recipe" );
9499+ R.eraseFromParent ();
9500+ }
9501+
9502+ // Flatten the CFG in the loop. Masks for blocks have already been generated
9503+ // and added to recipes as needed. To do so, first disconnect VPBB from its
9504+ // successors. Then connect VPBB to the previously visited VPBB.
9505+ for (auto *Succ : to_vector (VPBB->getSuccessors ()))
9506+ VPBlockUtils::disconnectBlocks (VPBB, Succ);
9507+ if (PrevVPBB)
9508+ VPBlockUtils::connectBlocks (PrevVPBB, VPBB);
9509+ PrevVPBB = VPBB;
94609510 }
94619511
94629512 // After here, VPBB should not be used.
0 commit comments