Skip to content

Commit 450f5cf

Browse files
kripkenradekdoulik
authored andcommitted
[NFC] LocalGraph: Optimize params with no sets (WebAssembly#6046)
If a local index has no sets, then all gets of that index read from the entry block (a param, or a zero for a local). This is actually a common case, where a param has no other set, and so it is worth optimizing, which this PR does by avoiding any flowing operation at all for that index: we just skip and write the entry block as the source of information for such gets. WebAssembly#6042 on precompute-propagate goes from 3 minutes to 2 seconds with this (!). But that testcase is rather special in that it is a huge function with many, many gets in it, so the overhead we remove is very noticeable there.
1 parent 1c1aeb1 commit 450f5cf

File tree

2 files changed

+73
-7
lines changed

2 files changed

+73
-7
lines changed

src/ir/LocalGraph.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> {
106106
auto numLocals = func->getNumLocals();
107107
std::vector<FlowBlock*> work;
108108

109+
// Track if we have unreachable code anywhere, as if we do that may inhibit
110+
// certain optimizations below.
111+
bool hasUnreachable = false;
112+
109113
// Convert input blocks (basicBlocks) into more efficient flow blocks to
110114
// improve memory access.
111115
std::vector<FlowBlock> flowBlocks;
@@ -114,9 +118,19 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> {
114118
// Init mapping between basicblocks and flowBlocks
115119
std::unordered_map<BasicBlock*, FlowBlock*> basicToFlowMap;
116120
for (Index i = 0; i < basicBlocks.size(); ++i) {
117-
basicToFlowMap[basicBlocks[i].get()] = &flowBlocks[i];
121+
auto* block = basicBlocks[i].get();
122+
basicToFlowMap[block] = &flowBlocks[i];
123+
// Check for unreachable code. Note we ignore the entry block (index 0) as
124+
// that is always reached when we are called.
125+
if (i != 0 && block->in.empty()) {
126+
hasUnreachable = true;
127+
}
118128
}
119129

130+
// We note which local indexes have local.sets, as that can help us
131+
// optimize later (if there are none at all).
132+
std::vector<bool> hasSet(numLocals, false);
133+
120134
const size_t NULL_ITERATION = -1;
121135

122136
FlowBlock* entryFlowBlock = nullptr;
@@ -140,6 +154,7 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> {
140154
flowBlock.lastSets.reserve(block->contents.lastSets.size());
141155
for (auto set : block->contents.lastSets) {
142156
flowBlock.lastSets.emplace_back(set);
157+
hasSet[set.first] = true;
143158
}
144159
}
145160
assert(entryFlowBlock != nullptr);
@@ -185,6 +200,24 @@ struct Flower : public CFGWalker<Flower, Visitor<Flower>, Info> {
185200
if (gets.empty()) {
186201
continue;
187202
}
203+
if (!hasUnreachable && !hasSet[index]) {
204+
// This local index has no sets, so we know all gets will end up
205+
// reaching the entry block. Do that here as an optimization to avoid
206+
// flowing through the (potentially very many) blocks in the function.
207+
//
208+
// Note that we must check for unreachable code in this function, as
209+
// if there is any then we would not be precise: in that case, the
210+
// gets may either have the entry value, or no value at all. It would
211+
// be safe to mark the entry value in that case anyhow (as it only
212+
// matters in unreachable code), but to keep the IR consistent and to
213+
// avoid confusion when debugging, simply do not optimize if
214+
// there is anything unreachable (which will not happen normally, as
215+
// DCE should run before passes that use this utility).
216+
for (auto* get : gets) {
217+
getSetses[get].insert(nullptr);
218+
}
219+
continue;
220+
}
188221
work.push_back(&block);
189222
// Note that we may need to revisit the later parts of this initial
190223
// block, if we are in a loop, so don't mark it as seen.

test/lit/passes/precompute-gc.wast

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@
229229
(struct.get $struct 0 (local.get $x))
230230
)
231231
)
232-
;; CHECK: (func $ref-comparisons (type $8) (param $x (ref null $struct)) (param $y (ref null $struct))
232+
;; CHECK: (func $ref-comparisons (type $9) (param $x (ref null $struct)) (param $y (ref null $struct))
233233
;; CHECK-NEXT: (local $z (ref null $struct))
234234
;; CHECK-NEXT: (local $w (ref null $struct))
235235
;; CHECK-NEXT: (call $log
@@ -407,7 +407,7 @@
407407
(local.get $tempresult)
408408
)
409409

410-
;; CHECK: (func $propagate-different-params (type $9) (param $input1 (ref $empty)) (param $input2 (ref $empty)) (result i32)
410+
;; CHECK: (func $propagate-different-params (type $10) (param $input1 (ref $empty)) (param $input2 (ref $empty)) (result i32)
411411
;; CHECK-NEXT: (local $tempresult i32)
412412
;; CHECK-NEXT: (local.set $tempresult
413413
;; CHECK-NEXT: (ref.eq
@@ -723,7 +723,7 @@
723723
)
724724
)
725725

726-
;; CHECK: (func $helper (type $10) (param $0 i32) (result i32)
726+
;; CHECK: (func $helper (type $11) (param $0 i32) (result i32)
727727
;; CHECK-NEXT: (unreachable)
728728
;; CHECK-NEXT: )
729729
(func $helper (param i32) (result i32)
@@ -801,14 +801,14 @@
801801
)
802802
)
803803

804-
;; CHECK: (func $receive-f64 (type $11) (param $0 f64)
804+
;; CHECK: (func $receive-f64 (type $12) (param $0 f64)
805805
;; CHECK-NEXT: (unreachable)
806806
;; CHECK-NEXT: )
807807
(func $receive-f64 (param f64)
808808
(unreachable)
809809
)
810810

811-
;; CHECK: (func $odd-cast-and-get-non-null (type $12) (param $temp (ref $func-return-i32))
811+
;; CHECK: (func $odd-cast-and-get-non-null (type $13) (param $temp (ref $func-return-i32))
812812
;; CHECK-NEXT: (local.set $temp
813813
;; CHECK-NEXT: (ref.cast (ref nofunc)
814814
;; CHECK-NEXT: (ref.func $receive-f64)
@@ -836,7 +836,7 @@
836836
)
837837
)
838838

839-
;; CHECK: (func $new_block_unreachable (type $13) (result anyref)
839+
;; CHECK: (func $new_block_unreachable (type $8) (result anyref)
840840
;; CHECK-NEXT: (block ;; (replaces something unreachable we can't emit)
841841
;; CHECK-NEXT: (drop
842842
;; CHECK-NEXT: (block
@@ -1033,4 +1033,37 @@
10331033
)
10341034
)
10351035
)
1036+
1037+
;; CHECK: (func $get-nonnullable-in-unreachable (type $8) (result anyref)
1038+
;; CHECK-NEXT: (local $x (ref any))
1039+
;; CHECK-NEXT: (local.tee $x
1040+
;; CHECK-NEXT: (unreachable)
1041+
;; CHECK-NEXT: )
1042+
;; CHECK-NEXT: (if
1043+
;; CHECK-NEXT: (i32.const 1)
1044+
;; CHECK-NEXT: (unreachable)
1045+
;; CHECK-NEXT: )
1046+
;; CHECK-NEXT: (local.get $x)
1047+
;; CHECK-NEXT: )
1048+
(func $get-nonnullable-in-unreachable (result anyref)
1049+
(local $x (ref any))
1050+
;; We cannot read a non-nullable local without setting it first, but it is ok
1051+
;; to do so here because we are in unreachable code. We should also not error
1052+
;; about this get seeming to read the default value from the function entry
1053+
;; (because it does not, as the entry is not reachable from it). Nothing is
1054+
;; expected to be optimized here.
1055+
1056+
;; This unreachable set is needed for the later get to validate.
1057+
(local.set $x
1058+
(unreachable)
1059+
)
1060+
;; This if is needed so we have an interesting enough CFG that a possible
1061+
;; assertion can be hit about reading the default value from the entry in a
1062+
;; later block.
1063+
(if
1064+
(i32.const 1)
1065+
(unreachable)
1066+
)
1067+
(local.get $x)
1068+
)
10361069
)

0 commit comments

Comments
 (0)