Skip to content

Commit 0d95b20

Browse files
committed
[LICM] Reassociate & hoist add expressions
This patch allows LICM to reassociate and hoist following expressions: ``` loop: %sum = add nsw %iv, %C1 %cmp = icmp <signed pred> %sum, C2 ``` where `C1` and `C2` are loop invariants. The reassociated version looks like ``` preheader: %inv_sum = C2 - C1 ... loop: %cmp = icmp <signed pred> %iv, %inv_sum ``` In order to prove legality, we need both initial addition and the newly created subtraction to happen without overflow. Differential Revision: https://reviews.llvm.org/D149132 Reviewed By: skatkov
1 parent a7380fb commit 0d95b20

File tree

2 files changed

+96
-8
lines changed

2 files changed

+96
-8
lines changed

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 90 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,8 @@ STATISTIC(NumMinMaxHoisted,
106106
"Number of min/max expressions hoisted out of the loop");
107107
STATISTIC(NumGEPsHoisted,
108108
"Number of geps reassociated and hoisted out of the loop");
109+
STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
110+
"and hoisted out of the loop");
109111

110112
/// Memory promotion is enabled by default.
111113
static cl::opt<bool>
@@ -2525,10 +2527,89 @@ static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
25252527
return true;
25262528
}
25272529

2530+
/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
2531+
/// C1 and C2 are loop invariants and LV is a loop-variant.
2532+
static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
2533+
Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
2534+
ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
2535+
AssumptionCache *AC, DominatorTree *DT) {
2536+
assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
2537+
assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
2538+
assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
2539+
2540+
// Try to represent VariantLHS as sum of invariant and variant operands.
2541+
using namespace PatternMatch;
2542+
Value *VariantOp, *InvariantOp;
2543+
if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
2544+
return false;
2545+
2546+
// LHS itself is a loop-variant, try to represent it in the form:
2547+
// "VariantOp + InvariantOp". If it is possible, then we can reassociate.
2548+
if (L.isLoopInvariant(VariantOp))
2549+
std::swap(VariantOp, InvariantOp);
2550+
if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
2551+
return false;
2552+
2553+
// In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
2554+
// freely move values from left side of inequality to right side (just as in
2555+
// normal linear arithmetics). Overflows make things much more complicated, so
2556+
// we want to avoid this.
2557+
auto &DL = L.getHeader()->getModule()->getDataLayout();
2558+
bool ProvedNoOverflowAfterReassociate =
2559+
computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp,
2560+
DT) == llvm::OverflowResult::NeverOverflows;
2561+
if (!ProvedNoOverflowAfterReassociate)
2562+
return false;
2563+
auto *Preheader = L.getLoopPreheader();
2564+
assert(Preheader && "Loop is not in simplify form?");
2565+
IRBuilder<> Builder(Preheader->getTerminator());
2566+
Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
2567+
/*HasNUW*/ false, /*HasNSW*/ true);
2568+
ICmp.setPredicate(Pred);
2569+
ICmp.setOperand(0, VariantOp);
2570+
ICmp.setOperand(1, NewCmpOp);
2571+
eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
2572+
return true;
2573+
}
2574+
2575+
/// Reassociate and hoist add/sub expressions.
2576+
static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
2577+
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2578+
DominatorTree *DT) {
2579+
using namespace PatternMatch;
2580+
ICmpInst::Predicate Pred;
2581+
Value *LHS, *RHS;
2582+
if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
2583+
return false;
2584+
2585+
// TODO: Support unsigned predicates?
2586+
if (!ICmpInst::isSigned(Pred))
2587+
return false;
2588+
2589+
// Put variant operand to LHS position.
2590+
if (L.isLoopInvariant(LHS)) {
2591+
std::swap(LHS, RHS);
2592+
Pred = ICmpInst::getSwappedPredicate(Pred);
2593+
}
2594+
// We want to delete the initial operation after reassociation, so only do it
2595+
// if it has no other uses.
2596+
if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse())
2597+
return false;
2598+
2599+
// TODO: We could go with smarter context, taking common dominator of all I's
2600+
// users instead of I itself.
2601+
if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
2602+
return true;
2603+
2604+
// TODO: Support Sub.
2605+
2606+
return false;
2607+
}
2608+
25282609
static bool hoistArithmetics(Instruction &I, Loop &L,
25292610
ICFLoopSafetyInfo &SafetyInfo,
2530-
MemorySSAUpdater &MSSAU,
2531-
AssumptionCache *AC, DominatorTree *DT) {
2611+
MemorySSAUpdater &MSSAU, AssumptionCache *AC,
2612+
DominatorTree *DT) {
25322613
// Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
25332614
// into (x < min(INV1, INV2)), and hoisting the invariant part of this
25342615
// expression out of the loop.
@@ -2545,6 +2626,13 @@ static bool hoistArithmetics(Instruction &I, Loop &L,
25452626
return true;
25462627
}
25472628

2629+
// Try to hoist add/sub's by reassociation.
2630+
if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
2631+
++NumHoisted;
2632+
++NumAddSubHoisted;
2633+
return true;
2634+
}
2635+
25482636
return false;
25492637
}
25502638

llvm/test/Transforms/LICM/hoist-add-sub.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -165,18 +165,18 @@ out_of_bounds:
165165
}
166166

167167

168-
; TODO: x + iv < 4 ==> iv < 4 - x
168+
; x + iv < 4 ==> iv < 4 - x
169169
define i32 @test_02(ptr %p, ptr %x_p, ptr %length_p) {
170170
; CHECK-LABEL: define i32 @test_02
171171
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
172172
; CHECK-NEXT: entry:
173173
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]]
174174
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
175+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]]
175176
; CHECK-NEXT: br label [[LOOP:%.*]]
176177
; CHECK: loop:
177178
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
178-
; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[X]], [[IV]]
179-
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
179+
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]]
180180
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
181181
; CHECK: backedge:
182182
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
@@ -391,18 +391,18 @@ failed:
391391
ret i32 -2
392392
}
393393

394-
; TODO: iv + x < 4 ==> iv < 4 - x
394+
; iv + x < 4 ==> iv < 4 - x
395395
define i32 @test_04(ptr %p, ptr %x_p, ptr %length_p) {
396396
; CHECK-LABEL: define i32 @test_04
397397
; CHECK-SAME: (ptr [[P:%.*]], ptr [[X_P:%.*]], ptr [[LENGTH_P:%.*]]) {
398398
; CHECK-NEXT: entry:
399399
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[X_P]], align 4, !range [[RNG0]]
400400
; CHECK-NEXT: [[LENGTH:%.*]] = load i32, ptr [[LENGTH_P]], align 4, !range [[RNG0]]
401+
; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 4, [[X]]
401402
; CHECK-NEXT: br label [[LOOP:%.*]]
402403
; CHECK: loop:
403404
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
404-
; CHECK-NEXT: [[ARITH:%.*]] = add nsw i32 [[IV]], [[X]]
405-
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[ARITH]], 4
405+
; CHECK-NEXT: [[X_CHECK:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_OP]]
406406
; CHECK-NEXT: br i1 [[X_CHECK]], label [[OUT_OF_BOUNDS:%.*]], label [[BACKEDGE]]
407407
; CHECK: backedge:
408408
; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]

0 commit comments

Comments
 (0)