llvm
diff --git a/‎llvm/include/llvm/Analysis/LoopAccessAnalysis.h‎
Lines changed: 36 additions & 0 deletions b/‎llvm/include/llvm/Analysis/LoopAccessAnalysis.h‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Analysis/ScalarEvolution.h‎
Lines changed: 33 additions & 3 deletions b/‎llvm/include/llvm/Analysis/ScalarEvolution.h‎
Lines changed: 33 additions & 3 deletions
diff --git a/‎llvm/include/llvm/IR/IRBuilder.h‎
Lines changed: 7 additions & 0 deletions b/‎llvm/include/llvm/IR/IRBuilder.h‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Support/GenericLoopInfo.h‎
Lines changed: 4 additions & 0 deletions b/‎llvm/include/llvm/Support/GenericLoopInfo.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Support/GenericLoopInfoImpl.h‎
Lines changed: 10 additions & 0 deletions b/‎llvm/include/llvm/Support/GenericLoopInfoImpl.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h‎
Lines changed: 8 additions & 1 deletion b/‎llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h‎
Lines changed: 18 additions & 0 deletions b/‎llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h‎
Lines changed: 18 additions & 0 deletions
@@ -587,6 +587,9 @@ class LoopAccessInfo {
   /// not legal to insert them.
   bool hasConvergentOp() const { return HasConvergentOp; }
 
+  /// Return true if the loop may fault due to memory accesses.
+  bool mayFault() const { return LoopMayFault; }
+
   const RuntimePointerChecking *getRuntimePointerChecking() const {
     return PtrRtChecking.get();
   }
@@ -608,6 +611,24 @@ class LoopAccessInfo {
   unsigned getNumStores() const { return NumStores; }
   unsigned getNumLoads() const { return NumLoads;}
 
+  /// Returns the block that exits early from the loop, if there is one.
+  /// Otherwise returns nullptr.
+  BasicBlock *getSpeculativeEarlyExitingBlock() const {
+    return SpeculativeEarlyExitingBB;
+  }
+
+  /// Returns the successor of the block that exits early from the loop, if
+  /// there is one. Otherwise returns nullptr.
+  BasicBlock *getSpeculativeEarlyExitBlock() const {
+    return SpeculativeEarlyExitBB;
+  }
+
+  /// Returns all blocks with a countable exit, i.e. the exit-not-taken count
+  /// is known exactly at compile time.
+  const SmallVector<BasicBlock *, 4> &getCountableEarlyExitingBlocks() const {
+    return CountableEarlyExitBlocks;
+  }
+
   /// The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
   const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
@@ -659,6 +680,10 @@ class LoopAccessInfo {
   /// pass.
   bool canAnalyzeLoop();
 
+  /// Returns true if this is a supported early exit loop that we can analyze
+  /// in this pass.
+  bool isAnalyzableEarlyExitLoop();
+
   /// Save the analysis remark.
   ///
   /// LAA does not directly emits the remarks.  Instead it stores it which the
@@ -696,6 +721,17 @@ class LoopAccessInfo {
   /// Cache the result of analyzeLoop.
   bool CanVecMem = false;
   bool HasConvergentOp = false;
+  bool LoopMayFault = false;
+
+  /// Keeps track of the early-exiting block, if present.
+  BasicBlock *SpeculativeEarlyExitingBB = nullptr;
+
+  /// Keeps track of the successor of the early-exiting block, if present.
+  BasicBlock *SpeculativeEarlyExitBB = nullptr;
+
+  /// Keeps track of all the early exits with known or countable exit-not-taken
+  /// counts.
+  SmallVector<BasicBlock *, 4> CountableEarlyExitBlocks;
 
   /// Indicator that there are non vectorizable stores to a uniform address.
   bool HasDependenceInvolvingLoopInvariantAddress = false;
 
@@ -892,9 +892,13 @@ class ScalarEvolution {
   /// Similar to getBackedgeTakenCount, except it will add a set of
   /// SCEV predicates to Predicates that are required to be true in order for
   /// the answer to be correct. Predicates can be checked with run-time
-  /// checks and can be used to perform loop versioning.
-  const SCEV *getPredicatedBackedgeTakenCount(const Loop *L,
-                                              SmallVector<const SCEVPredicate *, 4> &Predicates);
+  /// checks and can be used to perform loop versioning. If \p Speculative is
+  /// true, this will attempt to return the speculative backedge count for loops
+  /// with early exits. However, this is only possible if we can formulate an
+  /// exact expression for the backedge count from the latch block.
+  const SCEV *getPredicatedBackedgeTakenCount(
+      const Loop *L, SmallVector<const SCEVPredicate *, 4> &Predicates,
+      bool Speculative = false);
 
   /// When successful, this returns a SCEVConstant that is greater than or equal
   /// to (i.e. a "conservative over-approximation") of the value returend by
@@ -912,6 +916,12 @@ class ScalarEvolution {
     return getBackedgeTakenCount(L, SymbolicMaximum);
   }
 
+  /// Return all the exiting blocks in with exact exit counts.
+  void getExactExitingBlocks(const Loop *L,
+                             SmallVector<BasicBlock *, 4> *Blocks) {
+    getBackedgeTakenInfo(L).getExactExitingBlocks(L, this, Blocks);
+  }
+
   /// Return true if the backedge taken count is either the value returned by
   /// getConstantMaxBackedgeTakenCount or zero.
   bool isBackedgeTakenCountMaxOrZero(const Loop *L);
@@ -1534,13 +1544,27 @@ class ScalarEvolution {
     const SCEV *getExact(const Loop *L, ScalarEvolution *SE,
                          SmallVector<const SCEVPredicate *, 4> *Predicates = nullptr) const;
 
+    /// Similar to the above, except we permit unknown exit counts from
+    /// non-latch exit blocks. Any such early exit blocks must dominate the
+    /// latch and so the returned expression represents the speculative, or
+    /// maximum possible, *backedge-taken* count of the loop. If there is no
+    /// exact exit count for the latch this function returns
+    /// SCEVCouldNotCompute.
+    const SCEV *getSpeculative(
+        const Loop *L, ScalarEvolution *SE,
+        SmallVector<const SCEVPredicate *, 4> *Predicates = nullptr) const;
+
     /// Return the number of times this loop exit may fall through to the back
     /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
     /// this block before this number of iterations, but may exit via another
     /// block.
     const SCEV *getExact(const BasicBlock *ExitingBlock,
                          ScalarEvolution *SE) const;
 
+    /// Return all the exiting blocks in with exact exit counts.
+    void getExactExitingBlocks(const Loop *L, ScalarEvolution *SE,
+                               SmallVector<BasicBlock *, 4> *Blocks) const;
+
     /// Get the constant max backedge taken count for the loop.
     const SCEV *getConstantMax(ScalarEvolution *SE) const;
 
@@ -2316,6 +2340,9 @@ class PredicatedScalarEvolution {
   /// Get the (predicated) backedge count for the analyzed loop.
   const SCEV *getBackedgeTakenCount();
 
+  /// Get the (predicated) speculative backedge count for the analyzed loop.
+  const SCEV *getSpeculativeBackedgeTakenCount();
+
   /// Adds a new predicate.
   void addPredicate(const SCEVPredicate &Pred);
 
@@ -2384,6 +2411,9 @@ class PredicatedScalarEvolution {
 
   /// The backedge taken count.
   const SCEV *BackedgeCount = nullptr;
+
+  /// The speculative backedge taken count.
+  const SCEV *SpeculativeBackedgeCount = nullptr;
 };
 
 template <> struct DenseMapInfo<ScalarEvolution::FoldID> {
 
@@ -2503,6 +2503,13 @@ class IRBuilderBase {
     return CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask, Name);
   }
 
+  Value *CreateCountTrailingZeroElems(Type *ResTy, Value *Mask,
+                                      const Twine &Name = "") {
+    return CreateIntrinsic(
+        Intrinsic::experimental_cttz_elts, {ResTy, Mask->getType()},
+        {Mask, getInt1(/*ZeroIsPoison=*/true)}, nullptr, Name);
+  }
+
   Value *CreateExtractValue(Value *Agg, ArrayRef<unsigned> Idxs,
                             const Twine &Name = "") {
     if (auto *V = Folder.FoldExtractValue(Agg, Idxs))
 
@@ -294,6 +294,10 @@ template <class BlockT, class LoopT> class LoopBase {
   /// Otherwise return null.
   BlockT *getUniqueExitBlock() const;
 
+  /// Return the exit block for the latch if one exists. This function assumes
+  /// the loop has a latch.
+  BlockT *getLatchExitBlock() const;
+
   /// Return true if this loop does not have any exit blocks.
   bool hasNoExitBlocks() const;
 
 
@@ -159,6 +159,16 @@ BlockT *LoopBase<BlockT, LoopT>::getUniqueExitBlock() const {
   return getExitBlockHelper(this, true).first;
 }
 
+template <class BlockT, class LoopT>
+BlockT *LoopBase<BlockT, LoopT>::getLatchExitBlock() const {
+  BlockT *Latch = getLoopLatch();
+  assert(Latch && "Latch block must exists");
+  for (BlockT *Successor : children<BlockT *>(Latch))
+    if (!contains(Successor))
+      return Successor;
+  return nullptr;
+}
+
 /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
 template <class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::getExitEdges(
 
@@ -124,6 +124,11 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
   /// "expanded" form.
   bool LSRMode;
 
+  /// If the loop has an early exit we may have to use the speculative backedge
+  /// count, since the normal backedge count function is unable to compute a
+  /// SCEV expression.
+  bool UseSpeculativeBackedgeCount;
+
   typedef IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> BuilderType;
   BuilderType Builder;
 
@@ -176,10 +181,12 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
 public:
   /// Construct a SCEVExpander in "canonical" mode.
   explicit SCEVExpander(ScalarEvolution &se, const DataLayout &DL,
-                        const char *name, bool PreserveLCSSA = true)
+                        const char *name, bool PreserveLCSSA = true,
+                        bool UseSpeculativeBackedgeCount = false)
       : SE(se), DL(DL), IVName(name), PreserveLCSSA(PreserveLCSSA),
         IVIncInsertLoop(nullptr), IVIncInsertPos(nullptr), CanonicalMode(true),
         LSRMode(false),
+        UseSpeculativeBackedgeCount(UseSpeculativeBackedgeCount),
         Builder(se.getContext(), InstSimplifyFolder(DL),
                 IRBuilderCallbackInserter(
                     [this](Instruction *I) { rememberInstruction(I); })) {
 
@@ -374,6 +374,24 @@ class LoopVectorizationLegality {
     return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
   }
 
+  /// Returns true if the loop has a early exit with a exact backedge
+  /// count that is speculative.
+  bool hasSpeculativeEarlyExit() const {
+    return LAI && LAI->getSpeculativeEarlyExitingBlock();
+  }
+
+  /// Returns the early exiting block in a loop with a speculative backedge
+  /// count.
+  BasicBlock *getSpeculativeEarlyExitingBlock() const {
+    return LAI->getSpeculativeEarlyExitingBlock();
+  }
+
+  /// Returns the destination of an early exiting block in a loop with a
+  /// speculative backedge count.
+  BasicBlock *getSpeculativeEarlyExitBlock() const {
+    return LAI->getSpeculativeEarlyExitBlock();
+  }
+
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
   bool isMaskRequired(const Instruction *I) const {