Skip to content

Commit fbf630a

Browse files
committed
[RISCVISel] Add precommit test for RISCVISD::VCPOP_VL leading zeros compute
1 parent 5ecce45 commit fbf630a

File tree

2 files changed

+219
-0
lines changed

2 files changed

+219
-0
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v | FileCheck %s --check-prefixes=CHECK,RV32
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,RV64
4+
5+
define i32 @test(<8 x i1> %mask) {
6+
; CHECK-LABEL: test:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
9+
; CHECK-NEXT: vcpop.m a0, v0
10+
; CHECK-NEXT: andi a0, a0, 255
11+
; CHECK-NEXT: ret
12+
%1 = bitcast <8 x i1> %mask to i8
13+
%2 = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 %1)
14+
%3 = zext nneg i8 %2 to i32
15+
ret i32 %3
16+
}
17+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
18+
; RV32: {{.*}}
19+
; RV64: {{.*}}
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v | FileCheck %s --check-prefixes=CHECK,RV32
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,RV64
4+
5+
define dso_local void @test_store1(ptr nocapture noundef writeonly %dst, ptr nocapture noundef readonly %src, i32 noundef signext %c, i32 noundef signext %n) {
6+
; RV32-LABEL: test_store1:
7+
; RV32: # %bb.0: # %entry
8+
; RV32-NEXT: blez a3, .LBB0_6
9+
; RV32-NEXT: # %bb.1: # %for.body.preheader
10+
; RV32-NEXT: li a4, 8
11+
; RV32-NEXT: bltu a3, a4, .LBB0_7
12+
; RV32-NEXT: # %bb.2: # %for.body.preheader
13+
; RV32-NEXT: sub a4, a0, a1
14+
; RV32-NEXT: sltu a5, a0, a1
15+
; RV32-NEXT: neg a5, a5
16+
; RV32-NEXT: sltiu a4, a4, 32
17+
; RV32-NEXT: seqz a5, a5
18+
; RV32-NEXT: and a4, a5, a4
19+
; RV32-NEXT: bnez a4, .LBB0_7
20+
; RV32-NEXT: # %bb.3: # %vector.ph
21+
; RV32-NEXT: lui a5, 524288
22+
; RV32-NEXT: addi a5, a5, -8
23+
; RV32-NEXT: and a5, a3, a5
24+
; RV32-NEXT: li a7, 0
25+
; RV32-NEXT: li a6, 0
26+
; RV32-NEXT: .LBB0_4: # %vector.body
27+
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
28+
; RV32-NEXT: slli t0, a7, 2
29+
; RV32-NEXT: addi t1, a7, 8
30+
; RV32-NEXT: add t0, a1, t0
31+
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
32+
; RV32-NEXT: vle32.v v8, (t0)
33+
; RV32-NEXT: sltu a7, t1, a7
34+
; RV32-NEXT: xor t0, t1, a5
35+
; RV32-NEXT: add a6, a6, a7
36+
; RV32-NEXT: vmslt.vx v10, v8, a2
37+
; RV32-NEXT: vcompress.vm v12, v8, v10
38+
; RV32-NEXT: vcpop.m a7, v10
39+
; RV32-NEXT: vsetvli zero, a7, e32, m2, ta, ma
40+
; RV32-NEXT: vse32.v v12, (a0)
41+
; RV32-NEXT: slli a7, a7, 26
42+
; RV32-NEXT: srli a7, a7, 24
43+
; RV32-NEXT: or t0, t0, a6
44+
; RV32-NEXT: add a0, a0, a7
45+
; RV32-NEXT: mv a7, t1
46+
; RV32-NEXT: bnez t0, .LBB0_4
47+
; RV32-NEXT: # %bb.5: # %middle.block
48+
; RV32-NEXT: bne a5, a3, .LBB0_9
49+
; RV32-NEXT: .LBB0_6: # %for.cond.cleanup
50+
; RV32-NEXT: ret
51+
; RV32-NEXT: .LBB0_7:
52+
; RV32-NEXT: li a5, 0
53+
; RV32-NEXT: li a4, 0
54+
; RV32-NEXT: j .LBB0_9
55+
; RV32-NEXT: .LBB0_8: # %for.inc
56+
; RV32-NEXT: # in Loop: Header=BB0_9 Depth=1
57+
; RV32-NEXT: addi a5, a5, 1
58+
; RV32-NEXT: seqz a6, a5
59+
; RV32-NEXT: add a4, a4, a6
60+
; RV32-NEXT: xor a6, a5, a3
61+
; RV32-NEXT: or a6, a6, a4
62+
; RV32-NEXT: beqz a6, .LBB0_6
63+
; RV32-NEXT: .LBB0_9: # %for.body
64+
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
65+
; RV32-NEXT: slli a6, a5, 2
66+
; RV32-NEXT: add a6, a1, a6
67+
; RV32-NEXT: lw a6, 0(a6)
68+
; RV32-NEXT: bge a6, a2, .LBB0_8
69+
; RV32-NEXT: # %bb.10: # %if.then
70+
; RV32-NEXT: # in Loop: Header=BB0_9 Depth=1
71+
; RV32-NEXT: addi a7, a0, 4
72+
; RV32-NEXT: sw a6, 0(a0)
73+
; RV32-NEXT: mv a0, a7
74+
; RV32-NEXT: j .LBB0_8
75+
;
76+
; RV64-LABEL: test_store1:
77+
; RV64: # %bb.0: # %entry
78+
; RV64-NEXT: blez a3, .LBB0_6
79+
; RV64-NEXT: # %bb.1: # %for.body.preheader
80+
; RV64-NEXT: li a5, 8
81+
; RV64-NEXT: li a4, 0
82+
; RV64-NEXT: bltu a3, a5, .LBB0_7
83+
; RV64-NEXT: # %bb.2: # %for.body.preheader
84+
; RV64-NEXT: sub a5, a0, a1
85+
; RV64-NEXT: li a6, 31
86+
; RV64-NEXT: bgeu a6, a5, .LBB0_7
87+
; RV64-NEXT: # %bb.3: # %vector.ph
88+
; RV64-NEXT: lui a4, 524288
89+
; RV64-NEXT: addiw a4, a4, -8
90+
; RV64-NEXT: and a4, a3, a4
91+
; RV64-NEXT: slli a5, a4, 2
92+
; RV64-NEXT: add a5, a5, a1
93+
; RV64-NEXT: mv a6, a1
94+
; RV64-NEXT: .LBB0_4: # %vector.body
95+
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
96+
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
97+
; RV64-NEXT: vle32.v v8, (a6)
98+
; RV64-NEXT: addi a6, a6, 32
99+
; RV64-NEXT: vmslt.vx v10, v8, a2
100+
; RV64-NEXT: vcompress.vm v12, v8, v10
101+
; RV64-NEXT: vcpop.m a7, v10
102+
; RV64-NEXT: vsetvli zero, a7, e32, m2, ta, ma
103+
; RV64-NEXT: vse32.v v12, (a0)
104+
; RV64-NEXT: slli a7, a7, 58
105+
; RV64-NEXT: srli a7, a7, 56
106+
; RV64-NEXT: add a0, a0, a7
107+
; RV64-NEXT: bne a6, a5, .LBB0_4
108+
; RV64-NEXT: # %bb.5: # %middle.block
109+
; RV64-NEXT: bne a4, a3, .LBB0_7
110+
; RV64-NEXT: .LBB0_6: # %for.cond.cleanup
111+
; RV64-NEXT: ret
112+
; RV64-NEXT: .LBB0_7: # %for.body.preheader13
113+
; RV64-NEXT: slli a4, a4, 2
114+
; RV64-NEXT: slli a5, a3, 2
115+
; RV64-NEXT: add a3, a1, a4
116+
; RV64-NEXT: add a1, a1, a5
117+
; RV64-NEXT: j .LBB0_9
118+
; RV64-NEXT: .LBB0_8: # %for.inc
119+
; RV64-NEXT: # in Loop: Header=BB0_9 Depth=1
120+
; RV64-NEXT: addi a3, a3, 4
121+
; RV64-NEXT: beq a3, a1, .LBB0_6
122+
; RV64-NEXT: .LBB0_9: # %for.body
123+
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
124+
; RV64-NEXT: lw a4, 0(a3)
125+
; RV64-NEXT: bge a4, a2, .LBB0_8
126+
; RV64-NEXT: # %bb.10: # %if.then
127+
; RV64-NEXT: # in Loop: Header=BB0_9 Depth=1
128+
; RV64-NEXT: addi a5, a0, 4
129+
; RV64-NEXT: sw a4, 0(a0)
130+
; RV64-NEXT: mv a0, a5
131+
; RV64-NEXT: j .LBB0_8
132+
entry:
133+
%cmp8 = icmp sgt i32 %n, 0
134+
br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
135+
136+
for.body.preheader: ; preds = %entry
137+
%dst11 = ptrtoint ptr %dst to i64
138+
%src12 = ptrtoint ptr %src to i64
139+
%wide.trip.count = zext nneg i32 %n to i64
140+
%min.iters.check = icmp ult i32 %n, 8
141+
%0 = sub i64 %dst11, %src12
142+
%diff.check = icmp ult i64 %0, 32
143+
%or.cond = or i1 %min.iters.check, %diff.check
144+
br i1 %or.cond, label %for.body.preheader13, label %vector.ph
145+
146+
for.body.preheader13: ; preds = %middle.block, %for.body.preheader
147+
%indvars.iv.ph = phi i64 [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
148+
%dst.addr.09.ph = phi ptr [ %dst, %for.body.preheader ], [ %monotonic.add, %middle.block ]
149+
br label %for.body
150+
151+
vector.ph: ; preds = %for.body.preheader
152+
%n.vec = and i64 %wide.trip.count, 2147483640
153+
%broadcast.splatinsert = insertelement <8 x i32> poison, i32 %c, i64 0
154+
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
155+
br label %vector.body
156+
157+
vector.body: ; preds = %vector.body, %vector.ph
158+
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
159+
%monotonic.iv = phi ptr [ %dst, %vector.ph ], [ %monotonic.add, %vector.body ]
160+
%1 = getelementptr inbounds i32, ptr %src, i64 %index
161+
%wide.load = load <8 x i32>, ptr %1, align 4
162+
%2 = icmp slt <8 x i32> %wide.load, %broadcast.splat
163+
tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %wide.load, ptr align 4 %monotonic.iv, <8 x i1> %2)
164+
%3 = bitcast <8 x i1> %2 to i8
165+
%4 = tail call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 %3)
166+
%5 = shl nuw nsw i8 %4, 2
167+
%6 = zext nneg i8 %5 to i64
168+
%monotonic.add = getelementptr inbounds i8, ptr %monotonic.iv, i64 %6
169+
%index.next = add nuw i64 %index, 8
170+
%7 = icmp eq i64 %index.next, %n.vec
171+
br i1 %7, label %middle.block, label %vector.body
172+
173+
middle.block: ; preds = %vector.body
174+
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
175+
br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader13
176+
177+
for.cond.cleanup: ; preds = %for.inc, %middle.block, %entry
178+
ret void
179+
180+
for.body: ; preds = %for.body.preheader13, %for.inc
181+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ %indvars.iv.ph, %for.body.preheader13 ]
182+
%dst.addr.09 = phi ptr [ %dst.addr.1, %for.inc ], [ %dst.addr.09.ph, %for.body.preheader13 ]
183+
%arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv
184+
%8 = load i32, ptr %arrayidx, align 4
185+
%cmp1 = icmp slt i32 %8, %c
186+
br i1 %cmp1, label %if.then, label %for.inc
187+
188+
if.then: ; preds = %for.body
189+
%incdec.ptr = getelementptr inbounds i8, ptr %dst.addr.09, i64 4
190+
store i32 %8, ptr %dst.addr.09, align 4
191+
br label %for.inc
192+
193+
for.inc: ; preds = %for.body, %if.then
194+
%dst.addr.1 = phi ptr [ %incdec.ptr, %if.then ], [ %dst.addr.09, %for.body ]
195+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
196+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
197+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
198+
}
199+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
200+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)