|
| 1 | +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Antonio Maiorano < [email protected]> |
| 3 | +Date: Thu, 25 Apr 2024 16:49:11 -0400 |
| 4 | +Subject: Fixed crash in loop unroll caused by bug in structurize loop exits |
| 5 | + (#6548) |
| 6 | + |
| 7 | +Fixed a bug in `hlsl::RemoveUnstructuredLoopExits` where when a new |
| 8 | +exiting block is created from splitting, it was added to the current |
| 9 | +loop being processed, when it could also part of an inner loop. Not |
| 10 | +adding the new block to inner loops that it's part of makes the inner |
| 11 | +loops malformed, and causes crash. |
| 12 | + |
| 13 | +This fix adds the new block to the inner most loop that it should be |
| 14 | +part of. Also adds the `StructurizeLoopExits` option to `loop-unroll` |
| 15 | +pass, which was missing before. |
| 16 | + |
| 17 | +Bug: chromium:333508731 |
| 18 | +Change-Id: I7efc21bc61aeb81b4906a600c35272af232710ea |
| 19 | +Reviewed-on: https://chromium-review.googlesource.com/c/external/github.com/microsoft/DirectXShaderCompiler/+/5490380 |
| 20 | +Reviewed-by: James Price < [email protected]> |
| 21 | +Reviewed-by: Ben Clayton < [email protected]> |
| 22 | + |
| 23 | +diff --git a/lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp b/lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp |
| 24 | +index b6a07d6b27a23ee3831e84cee82299d6d405a288..ef6718f0f22ee33e3f16f9801a64c1a6fb6c653a 100644 |
| 25 | +--- a/lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp |
| 26 | ++++ b/lib/Transforms/Scalar/DxilRemoveUnstructuredLoopExits.cpp |
| 27 | +@@ -447,7 +447,12 @@ static bool RemoveUnstructuredLoopExitsIteration(BasicBlock *exiting_block, |
| 28 | + new_exiting_block->splitBasicBlock(new_exiting_block->getFirstNonPHI()); |
| 29 | + new_exiting_block->setName("dx.struct_exit.new_exiting"); |
| 30 | + new_not_exiting_block->setName(old_name); |
| 31 | +- L->addBasicBlockToLoop(new_not_exiting_block, *LI); |
| 32 | ++ // Query for new_exiting_block's own loop to add new_not_exiting_block to. |
| 33 | ++ // It's possible that new_exiting_block is part of another inner loop |
| 34 | ++ // separate from L. If added directly to L, the inner loop(s) will not |
| 35 | ++ // contain new_not_exiting_block, making them malformed. |
| 36 | ++ Loop *inner_loop_of_exiting_block = LI->getLoopFor(new_exiting_block); |
| 37 | ++ inner_loop_of_exiting_block->addBasicBlockToLoop(new_not_exiting_block, *LI); |
| 38 | + |
| 39 | + // Branch to latch_exit |
| 40 | + new_exiting_block->getTerminator()->eraseFromParent(); |
| 41 | +diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp |
| 42 | +index dd520f7e57d25311be7f3773849a00efaabe6717..b17a5a4a0bc368f16020c4153370ea2c92e5c26c 100644 |
| 43 | +--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp |
| 44 | ++++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp |
| 45 | +@@ -155,6 +155,18 @@ namespace { |
| 46 | + bool UserAllowPartial; |
| 47 | + bool UserRuntime; |
| 48 | + |
| 49 | ++ // HLSL Change - begin |
| 50 | ++ // Function overrides that resolve options when used for DxOpt |
| 51 | ++ void applyOptions(PassOptions O) override { |
| 52 | ++ GetPassOptionBool(O, "StructurizeLoopExits", &StructurizeLoopExits, |
| 53 | ++ false); |
| 54 | ++ } |
| 55 | ++ void dumpConfig(raw_ostream &OS) override { |
| 56 | ++ LoopPass::dumpConfig(OS); |
| 57 | ++ OS << ",StructurizeLoopExits=" << StructurizeLoopExits; |
| 58 | ++ } |
| 59 | ++ // HLSL Change - end |
| 60 | ++ |
| 61 | + bool runOnLoop(Loop *L, LPPassManager &LPM) override; |
| 62 | + |
| 63 | + /// This transformation requires natural loop information & requires that |
| 64 | +diff --git a/tools/clang/test/DXC/loop_structurize_exit_inner_latch_regression.ll b/tools/clang/test/DXC/loop_structurize_exit_inner_latch_regression.ll |
| 65 | +new file mode 100644 |
| 66 | +index 0000000000000000000000000000000000000000..743135541cd8faec287164ba3b321a59432832b6 |
| 67 | +--- /dev/null |
| 68 | ++++ b/tools/clang/test/DXC/loop_structurize_exit_inner_latch_regression.ll |
| 69 | +@@ -0,0 +1,75 @@ |
| 70 | ++; RUN: %dxopt %s -S -loop-unroll,StructurizeLoopExits=1 | FileCheck %s |
| 71 | ++; RUN: %dxopt %s -S -dxil-loop-unroll,StructurizeLoopExits=1 | FileCheck %s |
| 72 | ++ |
| 73 | ++; CHECK: mul nsw i32 |
| 74 | ++; CHECK: mul nsw i32 |
| 75 | ++; CHECK-NOT: mul nsw i32 |
| 76 | ++ |
| 77 | ++; This is a regression test for a crash in loop unroll. When there are multiple |
| 78 | ++; exits, the compiler will run hlsl::RemoveUnstructuredLoopExits to try to |
| 79 | ++; avoid unstructured code. |
| 80 | ++; |
| 81 | ++; In this test, the compiler will try to unroll the middle loop. The exit edge |
| 82 | ++; from %land.lhs.true to %if.then will be removed, and %if.end will be split at |
| 83 | ++; the beginning, and branch to %if.end instead. |
| 84 | ++; |
| 85 | ++; Since the new split block at %if.end becomes the new latch of the inner-most |
| 86 | ++; loop, it needs to be added to the Loop analysis structure of the inner loop. |
| 87 | ++; However, it was only added to the current middle loop that is being unrolled. |
| 88 | ++ |
| 89 | ++target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" |
| 90 | ++target triple = "dxil-ms-dx" |
| 91 | ++ |
| 92 | ++; Function Attrs: nounwind |
| 93 | ++define void @main(i32 *%arg0, i32 *%arg1, i32 *%arg2) #0 { |
| 94 | ++entry: |
| 95 | ++ br label %while.body.3.preheader.lr.ph |
| 96 | ++ |
| 97 | ++while.body.3.preheader.lr.ph.loopexit: ; preds = %for.inc |
| 98 | ++ br label %while.body.3.preheader.lr.ph |
| 99 | ++ |
| 100 | ++while.body.3.preheader.lr.ph: ; preds = %while.body.3.preheader.lr.ph.loopexit, %entry |
| 101 | ++ br label %while.body.3.preheader |
| 102 | ++ |
| 103 | ++while.body.3.preheader: ; preds = %while.body.3.preheader.lr.ph, %for.inc |
| 104 | ++ %i.0 = phi i32 [ 0, %while.body.3.preheader.lr.ph ], [ %inc, %for.inc ] |
| 105 | ++ br label %while.body.3 |
| 106 | ++ |
| 107 | ++while.body.3: ; preds = %while.body.3.preheader, %if.end |
| 108 | ++ %load_arg0 = load i32, i32* %arg0 |
| 109 | ++ %cmp4 = icmp sgt i32 %load_arg0, 0 |
| 110 | ++ br i1 %cmp4, label %land.lhs.true, label %if.end |
| 111 | ++ |
| 112 | ++land.lhs.true: ; preds = %while.body.3 |
| 113 | ++ %load_arg1 = load i32, i32* %arg1 |
| 114 | ++ %load_arg2 = load i32, i32* %arg2 |
| 115 | ++ %mul = mul nsw i32 %load_arg2, %load_arg1 |
| 116 | ++ %cmp7 = icmp eq i32 %mul, 10 |
| 117 | ++ br i1 %cmp7, label %if.then, label %if.end |
| 118 | ++ |
| 119 | ++if.then: ; preds = %land.lhs.true |
| 120 | ++ ret void |
| 121 | ++ |
| 122 | ++if.end: ; preds = %land.lhs.true, %while.body.3 |
| 123 | ++ %cmp10 = icmp sle i32 %i.0, 4 |
| 124 | ++ br i1 %cmp10, label %for.inc, label %while.body.3 |
| 125 | ++ |
| 126 | ++for.inc: ; preds = %if.end |
| 127 | ++ %inc = add nsw i32 %i.0, 1 |
| 128 | ++ %cmp = icmp slt i32 %inc, 2 |
| 129 | ++ br i1 %cmp, label %while.body.3.preheader, label %while.body.3.preheader.lr.ph.loopexit, !llvm.loop !3 |
| 130 | ++} |
| 131 | ++ |
| 132 | ++attributes #0 = { nounwind } |
| 133 | ++attributes #1 = { nounwind readnone } |
| 134 | ++attributes #2 = { nounwind readonly } |
| 135 | ++ |
| 136 | ++!llvm.module.flags = !{!0} |
| 137 | ++!pauseresume = !{!1} |
| 138 | ++!llvm.ident = !{!2} |
| 139 | ++ |
| 140 | ++!0 = !{i32 2, !"Debug Info Version", i32 3} |
| 141 | ++!1 = !{!"hlsl-dxilemit", !"hlsl-dxilload"} |
| 142 | ++!2 = !{!"dxc(private) 1.8.0.14563 (main, 07ce88034-dirty)"} |
| 143 | ++!3 = distinct !{!3, !4} |
| 144 | ++!4 = !{!"llvm.loop.unroll.full"} |
| 145 | +diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py |
| 146 | +index 77f5671016eb66a4ddf8a943ec8cb05e8d87c9cd..ca8d16bd2562e26e8572413499d32dc2232de5c0 100644 |
| 147 | +--- a/utils/hct/hctdb.py |
| 148 | ++++ b/utils/hct/hctdb.py |
| 149 | +@@ -6680,6 +6680,12 @@ class db_dxil(object): |
| 150 | + "t": "unsigned", |
| 151 | + "d": "Unrolled size limit for loops with an unroll(full) or unroll_count pragma.", |
| 152 | + }, |
| 153 | ++ { |
| 154 | ++ "n": "StructurizeLoopExits", |
| 155 | ++ "t": "bool", |
| 156 | ++ "c": 1, |
| 157 | ++ "d": "Whether the unroller should try to structurize loop exits first.", |
| 158 | ++ }, |
| 159 | + ], |
| 160 | + ) |
| 161 | + add_pass("mldst-motion", "MergedLoadStoreMotion", "MergedLoadStoreMotion", []) |
0 commit comments