Skip to content

Commit d0d56c4

Browse files
ghehglanza
authored andcommitted
[CIR][CIRGen] Generate CIR for neon_vget and neon_vdup lane intrinsics (llvm#884)
as title. This PR has simliar test case organization as to [PR882](llvm#882) Notice that comparing to OG, this PR combines cases for some pairs of intrinsics such as BI__builtin_neon_vget_lane_f32 and BI__builtin_neon_vdups_lane_f32. They have the same code generated in OG and CIRGen OG separate them into different case handling because it passes mnemonics which are different. CIRGen doesn't pass that so why not combine them. Co-authored-by: Guojin He <[email protected]>
1 parent f9e9b96 commit d0d56c4

File tree

3 files changed

+487
-16
lines changed

3 files changed

+487
-16
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2189,42 +2189,76 @@ CIRGenFunction::buildAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
21892189

21902190
case NEON::BI__builtin_neon_vget_lane_i8:
21912191
case NEON::BI__builtin_neon_vdupb_lane_i8:
2192-
llvm_unreachable("NYI");
2192+
Ops[0] = builder.createBitcast(
2193+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt8Ty, 8));
2194+
return builder.create<mlir::cir::VecExtractOp>(
2195+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
21932196
case NEON::BI__builtin_neon_vgetq_lane_i8:
21942197
case NEON::BI__builtin_neon_vdupb_laneq_i8:
2195-
llvm_unreachable("NYI");
2198+
Ops[0] = builder.createBitcast(
2199+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt8Ty, 16));
2200+
return builder.create<mlir::cir::VecExtractOp>(
2201+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
21962202
case NEON::BI__builtin_neon_vget_lane_i16:
21972203
case NEON::BI__builtin_neon_vduph_lane_i16:
2198-
llvm_unreachable("NYI");
2204+
Ops[0] = builder.createBitcast(
2205+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt16Ty, 4));
2206+
return builder.create<mlir::cir::VecExtractOp>(
2207+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
21992208
case NEON::BI__builtin_neon_vgetq_lane_i16:
22002209
case NEON::BI__builtin_neon_vduph_laneq_i16:
2201-
llvm_unreachable("NYI");
2210+
Ops[0] = builder.createBitcast(
2211+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt16Ty, 8));
2212+
return builder.create<mlir::cir::VecExtractOp>(
2213+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22022214
case NEON::BI__builtin_neon_vget_lane_i32:
22032215
case NEON::BI__builtin_neon_vdups_lane_i32:
2204-
llvm_unreachable("NYI");
2216+
Ops[0] = builder.createBitcast(
2217+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt32Ty, 2));
2218+
return builder.create<mlir::cir::VecExtractOp>(
2219+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
2220+
case NEON::BI__builtin_neon_vget_lane_f32:
22052221
case NEON::BI__builtin_neon_vdups_lane_f32:
2206-
llvm_unreachable("NYI");
2222+
Ops[0] = builder.createBitcast(
2223+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), FloatTy, 2));
2224+
return builder.create<mlir::cir::VecExtractOp>(
2225+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22072226
case NEON::BI__builtin_neon_vgetq_lane_i32:
22082227
case NEON::BI__builtin_neon_vdups_laneq_i32:
2209-
llvm_unreachable("NYI");
2228+
Ops[0] = builder.createBitcast(
2229+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt32Ty, 4));
2230+
return builder.create<mlir::cir::VecExtractOp>(
2231+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22102232
case NEON::BI__builtin_neon_vget_lane_i64:
22112233
case NEON::BI__builtin_neon_vdupd_lane_i64:
2212-
llvm_unreachable("NYI");
2234+
Ops[0] = builder.createBitcast(
2235+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt64Ty, 1));
2236+
return builder.create<mlir::cir::VecExtractOp>(
2237+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22132238
case NEON::BI__builtin_neon_vdupd_lane_f64:
2214-
llvm_unreachable("NYI");
2239+
case NEON::BI__builtin_neon_vget_lane_f64:
2240+
Ops[0] = builder.createBitcast(
2241+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), DoubleTy, 1));
2242+
return builder.create<mlir::cir::VecExtractOp>(
2243+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22152244
case NEON::BI__builtin_neon_vgetq_lane_i64:
22162245
case NEON::BI__builtin_neon_vdupd_laneq_i64:
2217-
llvm_unreachable("NYI");
2218-
case NEON::BI__builtin_neon_vget_lane_f32:
2219-
llvm_unreachable("NYI");
2220-
case NEON::BI__builtin_neon_vget_lane_f64:
2221-
llvm_unreachable("NYI");
2246+
Ops[0] = builder.createBitcast(
2247+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), UInt64Ty, 2));
2248+
return builder.create<mlir::cir::VecExtractOp>(
2249+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22222250
case NEON::BI__builtin_neon_vgetq_lane_f32:
22232251
case NEON::BI__builtin_neon_vdups_laneq_f32:
2224-
llvm_unreachable("NYI");
2252+
Ops[0] = builder.createBitcast(
2253+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), FloatTy, 4));
2254+
return builder.create<mlir::cir::VecExtractOp>(
2255+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22252256
case NEON::BI__builtin_neon_vgetq_lane_f64:
22262257
case NEON::BI__builtin_neon_vdupd_laneq_f64:
2227-
llvm_unreachable("NYI");
2258+
Ops[0] = builder.createBitcast(
2259+
Ops[0], mlir::cir::VectorType::get(builder.getContext(), DoubleTy, 2));
2260+
return builder.create<mlir::cir::VecExtractOp>(
2261+
getLoc(E->getExprLoc()), Ops[0], buildScalarExpr(E->getArg(1)));
22282262
case NEON::BI__builtin_neon_vaddh_f16:
22292263
llvm_unreachable("NYI");
22302264
case NEON::BI__builtin_neon_vsubh_f16:
Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
2+
// RUN: -emit-cir -target-feature +neon %s -o %t.cir
3+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
4+
// RUN: %clang_cc1 -triple aarch64-none-linux-android24 -fclangir \
5+
// RUN: -emit-llvm -target-feature +neon %s -o %t.ll
6+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
7+
// XFAIL: *
8+
9+
// Tetsting normal situation of vdup lane intrinsics.
10+
11+
// REQUIRES: aarch64-registered-target || arm-registered-target
12+
#include <arm_neon.h>
13+
14+
int8_t test_vdupb_lane_s8(int8x8_t src) {
15+
return vdupb_lane_s8(src, 7);
16+
}
17+
18+
// CIR-LABEL: test_vdupb_lane_s8
19+
// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
20+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 8>
21+
22+
// LLVM: define dso_local i8 @test_vdupb_lane_s8(<8 x i8> [[ARG:%.*]])
23+
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i8>, i64 1, align 8
24+
// LLVM: store <8 x i8> [[ARG]], ptr [[ARG_SAVE]], align 8
25+
// LLVM: [[TMP:%.*]] = load <8 x i8>, ptr [[ARG_SAVE:%.*]], align 8
26+
// LLVM: store <8 x i8> [[TMP]], ptr [[S0:%.*]], align 8
27+
// LLVM: [[INTRN_ARG:%.*]] = load <8 x i8>, ptr [[S0]], align 8
28+
// LLVM: {{%.*}} = extractelement <8 x i8> [[INTRN_ARG]], i32 7
29+
// LLVM: ret i8 {{%.*}}
30+
31+
int8_t test_vdupb_laneq_s8(int8x16_t a) {
32+
return vdupb_laneq_s8(a, 15);
33+
}
34+
35+
// CIR-LABEL: test_vdupb_laneq_s8
36+
// CIR: [[IDX:%.*]] = cir.const #cir.int<15> : !s32i
37+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u8i x 16>
38+
39+
// LLVM: define dso_local i8 @test_vdupb_laneq_s8(<16 x i8> [[ARG:%.*]])
40+
// LLVM: [[ARG_SAVE:%.*]] = alloca <16 x i8>, i64 1, align 16
41+
// LLVM: store <16 x i8> [[ARG]], ptr [[ARG_SAVE]], align 16
42+
// LLVM: [[TMP:%.*]] = load <16 x i8>, ptr [[ARG_SAVE:%.*]], align 16
43+
// LLVM: store <16 x i8> [[TMP]], ptr [[S0:%.*]], align 16
44+
// LLVM: [[INTRN_ARG:%.*]] = load <16 x i8>, ptr [[S0]], align 16
45+
// LLVM: {{%.*}} = extractelement <16 x i8> [[INTRN_ARG]], i32 15
46+
// LLVM: ret i8 {{%.*}}
47+
48+
int16_t test_vduph_lane_s16(int16x4_t src) {
49+
return vduph_lane_s16(src, 3);
50+
}
51+
52+
// CIR-LABEL: test_vduph_lane_s16
53+
// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
54+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 4>
55+
56+
57+
// LLVM: define dso_local i16 @test_vduph_lane_s16(<4 x i16> [[ARG:%.*]])
58+
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i16>, i64 1, align 8
59+
// LLVM: store <4 x i16> [[ARG]], ptr [[ARG_SAVE]], align 8
60+
// LLVM: [[TMP:%.*]] = load <4 x i16>, ptr [[ARG_SAVE:%.*]], align 8
61+
// LLVM: store <4 x i16> [[TMP]], ptr [[S0:%.*]], align 8
62+
// LLVM: [[INTRN_ARG:%.*]] = load <4 x i16>, ptr [[S0]], align 8
63+
// LLVM: {{%.*}} = extractelement <4 x i16> [[INTRN_ARG]], i32 3
64+
// LLVM: ret i16 {{%.*}}
65+
66+
int16_t test_vduph_laneq_s16(int16x8_t a) {
67+
return vduph_laneq_s16(a, 7);
68+
}
69+
70+
// CIR-LABEL: test_vduph_laneq_s16
71+
// CIR: [[IDX:%.*]] = cir.const #cir.int<7> : !s32i
72+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u16i x 8>
73+
74+
// LLVM: define dso_local i16 @test_vduph_laneq_s16(<8 x i16> [[ARG:%.*]])
75+
// LLVM: [[ARG_SAVE:%.*]] = alloca <8 x i16>, i64 1, align 16
76+
// LLVM: store <8 x i16> [[ARG]], ptr [[ARG_SAVE]], align 16
77+
// LLVM: [[TMP:%.*]] = load <8 x i16>, ptr [[ARG_SAVE:%.*]], align 16
78+
// LLVM: store <8 x i16> [[TMP]], ptr [[S0:%.*]], align 16
79+
// LLVM: [[INTRN_ARG:%.*]] = load <8 x i16>, ptr [[S0]], align 16
80+
// LLVM: {{%.*}} = extractelement <8 x i16> [[INTRN_ARG]], i32 7
81+
// LLVM: ret i16 {{%.*}}
82+
83+
int32_t test_vdups_lane_s32(int32x2_t a) {
84+
return vdups_lane_s32(a, 1);
85+
}
86+
87+
// CIR-LABEL: test_vdups_lane_s32
88+
// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
89+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 2>
90+
91+
// LLVM: define dso_local i32 @test_vdups_lane_s32(<2 x i32> [[ARG:%.*]])
92+
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i32>, i64 1, align 8
93+
// LLVM: store <2 x i32> [[ARG]], ptr [[ARG_SAVE]], align 8
94+
// LLVM: [[TMP:%.*]] = load <2 x i32>, ptr [[ARG_SAVE:%.*]], align 8
95+
// LLVM: store <2 x i32> [[TMP]], ptr [[S0:%.*]], align 8
96+
// LLVM: [[INTRN_ARG:%.*]] = load <2 x i32>, ptr [[S0]], align 8
97+
// LLVM: {{%.*}} = extractelement <2 x i32> [[INTRN_ARG]], i32 1
98+
// LLVM: ret i32 {{%.*}}
99+
100+
int32_t test_vdups_laneq_s32(int32x4_t a) {
101+
return vdups_laneq_s32(a, 3);
102+
}
103+
104+
// CIR-LABEL: test_vdups_laneq_s32
105+
// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
106+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u32i x 4>
107+
108+
// LLVM: define dso_local i32 @test_vdups_laneq_s32(<4 x i32> [[ARG:%.*]])
109+
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x i32>, i64 1, align 16
110+
// LLVM: store <4 x i32> [[ARG]], ptr [[ARG_SAVE]], align 16
111+
// LLVM: [[TMP:%.*]] = load <4 x i32>, ptr [[ARG_SAVE:%.*]], align 16
112+
// LLVM: store <4 x i32> [[TMP]], ptr [[S0:%.*]], align 16
113+
// LLVM: [[INTRN_ARG:%.*]] = load <4 x i32>, ptr [[S0]], align 16
114+
// LLVM: {{%.*}} = extractelement <4 x i32> [[INTRN_ARG]], i32 3
115+
// LLVM: ret i32 {{%.*}}
116+
117+
int64_t test_vdupd_lane_s64(int64x1_t src) {
118+
return vdupd_lane_s64(src, 0);
119+
}
120+
121+
// CIR-LABEL: test_vdupd_lane_s64
122+
// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
123+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 1>
124+
125+
// LLVM: define dso_local i64 @test_vdupd_lane_s64(<1 x i64> [[ARG:%.*]])
126+
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x i64>, i64 1, align 8
127+
// LLVM: store <1 x i64> [[ARG]], ptr [[ARG_SAVE]], align 8
128+
// LLVM: [[TMP:%.*]] = load <1 x i64>, ptr [[ARG_SAVE:%.*]], align 8
129+
// LLVM: store <1 x i64> [[TMP]], ptr [[S0:%.*]], align 8
130+
// LLVM: [[INTRN_ARG:%.*]] = load <1 x i64>, ptr [[S0]], align 8
131+
// LLVM: {{%.*}} = extractelement <1 x i64> [[INTRN_ARG]], i32 0
132+
// LLVM: ret i64 {{%.*}}
133+
134+
int64_t test_vdupd_laneq_s64(int64x2_t a) {
135+
return vdupd_laneq_s64(a, 1);
136+
}
137+
138+
// CIR-LABEL: test_vdupd_laneq_s64
139+
// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
140+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!u64i x 2>
141+
142+
// LLVM: define dso_local i64 @test_vdupd_laneq_s64(<2 x i64> [[ARG:%.*]])
143+
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x i64>, i64 1, align 16
144+
// LLVM: store <2 x i64> [[ARG]], ptr [[ARG_SAVE]], align 16
145+
// LLVM: [[TMP:%.*]] = load <2 x i64>, ptr [[ARG_SAVE:%.*]], align 16
146+
// LLVM: store <2 x i64> [[TMP]], ptr [[S0:%.*]], align 16
147+
// LLVM: [[INTRN_ARG:%.*]] = load <2 x i64>, ptr [[S0]], align 16
148+
// LLVM: {{%.*}} = extractelement <2 x i64> [[INTRN_ARG]], i32 1
149+
// LLVM: ret i64 {{%.*}}
150+
151+
float32_t test_vdups_lane_f32(float32x2_t src) {
152+
return vdups_lane_f32(src, 1);
153+
}
154+
155+
// CIR-LABEL: test_vdups_lane_f32
156+
// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
157+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 2>
158+
159+
// LLVM: define dso_local float @test_vdups_lane_f32(<2 x float> [[ARG:%.*]])
160+
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x float>, i64 1, align 8
161+
// LLVM: store <2 x float> [[ARG]], ptr [[ARG_SAVE]], align 8
162+
// LLVM: [[TMP:%.*]] = load <2 x float>, ptr [[ARG_SAVE:%.*]], align 8
163+
// LLVM: store <2 x float> [[TMP]], ptr [[S0:%.*]], align 8
164+
// LLVM: [[INTRN_ARG:%.*]] = load <2 x float>, ptr [[S0]], align 8
165+
// LLVM: {{%.*}} = extractelement <2 x float> [[INTRN_ARG]], i32 1
166+
// LLVM: ret float {{%.*}}
167+
168+
float64_t test_vdupd_lane_f64(float64x1_t src) {
169+
return vdupd_lane_f64(src, 0);
170+
}
171+
172+
// CIR-LABEL: test_vdupd_lane_f64
173+
// CIR: [[IDX:%.*]] = cir.const #cir.int<0> : !s32i
174+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 1>
175+
176+
// LLVM: define dso_local double @test_vdupd_lane_f64(<1 x double> [[ARG:%.*]])
177+
// LLVM: [[ARG_SAVE:%.*]] = alloca <1 x double>, i64 1, align 8
178+
// LLVM: store <1 x double> [[ARG]], ptr [[ARG_SAVE]], align 8
179+
// LLVM: [[TMP:%.*]] = load <1 x double>, ptr [[ARG_SAVE:%.*]], align 8
180+
// LLVM: store <1 x double> [[TMP]], ptr [[S0:%.*]], align 8
181+
// LLVM: [[INTRN_ARG:%.*]] = load <1 x double>, ptr [[S0]], align 8
182+
// LLVM: {{%.*}} = extractelement <1 x double> [[INTRN_ARG]], i32 0
183+
// LLVM: ret double {{%.*}}
184+
185+
float32_t test_vdups_laneq_f32(float32x4_t src) {
186+
return vdups_laneq_f32(src, 3);
187+
}
188+
189+
// CIR-LABEL: test_vdups_laneq_f32
190+
// CIR: [[IDX:%.*]] = cir.const #cir.int<3> : !s32i
191+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.float x 4>
192+
193+
// LLVM: define dso_local float @test_vdups_laneq_f32(<4 x float> [[ARG:%.*]])
194+
// LLVM: [[ARG_SAVE:%.*]] = alloca <4 x float>, i64 1, align 16
195+
// LLVM: store <4 x float> [[ARG]], ptr [[ARG_SAVE]], align 16
196+
// LLVM: [[TMP:%.*]] = load <4 x float>, ptr [[ARG_SAVE:%.*]], align 16
197+
// LLVM: store <4 x float> [[TMP]], ptr [[S0:%.*]], align 16
198+
// LLVM: [[INTRN_ARG:%.*]] = load <4 x float>, ptr [[S0]], align 16
199+
// LLVM: {{%.*}} = extractelement <4 x float> [[INTRN_ARG]], i32 3
200+
// LLVM: ret float {{%.*}}
201+
202+
float64_t test_vdupd_laneq_f64(float64x2_t src) {
203+
return vdupd_laneq_f64(src, 1);
204+
}
205+
206+
// CIR-LABEL: test_vdupd_laneq_f64
207+
// CIR: [[IDX:%.*]] = cir.const #cir.int<1> : !s32i
208+
// CIR: {{%.*}} = cir.vec.extract {{%.*}}[[[IDX]] : !s32i] : !cir.vector<!cir.double x 2>
209+
210+
// LLVM: define dso_local double @test_vdupd_laneq_f64(<2 x double> [[ARG:%.*]])
211+
// LLVM: [[ARG_SAVE:%.*]] = alloca <2 x double>, i64 1, align 16
212+
// LLVM: store <2 x double> [[ARG]], ptr [[ARG_SAVE]], align 16
213+
// LLVM: [[TMP:%.*]] = load <2 x double>, ptr [[ARG_SAVE:%.*]], align 16
214+
// LLVM: store <2 x double> [[TMP]], ptr [[S0:%.*]], align 16
215+
// LLVM: [[INTRN_ARG:%.*]] = load <2 x double>, ptr [[S0]], align 16
216+
// LLVM: {{%.*}} = extractelement <2 x double> [[INTRN_ARG]], i32 1
217+
// LLVM: ret double {{%.*}}

0 commit comments

Comments
 (0)