@@ -1218,6 +1218,133 @@ exit:
12181218 ret void
12191219}
12201220
1221+ define void @test_vector_tc_eq_16 (ptr %A ) {
1222+ ; VF8UF1-LABEL: define void @test_vector_tc_eq_16(
1223+ ; VF8UF1-SAME: ptr [[A:%.*]]) {
1224+ ; VF8UF1-NEXT: [[ENTRY:.*]]:
1225+ ; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1226+ ; VF8UF1: [[VECTOR_PH]]:
1227+ ; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1228+ ; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
1229+ ; VF8UF1: [[VECTOR_BODY]]:
1230+ ; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1231+ ; VF8UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1232+ ; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
1233+ ; VF8UF1-NEXT: [[TMP1:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
1234+ ; VF8UF1-NEXT: store <8 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1
1235+ ; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1236+ ; VF8UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1237+ ; VF8UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1238+ ; VF8UF1: [[MIDDLE_BLOCK]]:
1239+ ; VF8UF1-NEXT: br label %[[SCALAR_PH]]
1240+ ; VF8UF1: [[SCALAR_PH]]:
1241+ ; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1242+ ; VF8UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1243+ ; VF8UF1-NEXT: br label %[[LOOP:.*]]
1244+ ; VF8UF1: [[LOOP]]:
1245+ ; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1246+ ; VF8UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1247+ ; VF8UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1248+ ; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1249+ ; VF8UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1250+ ; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1251+ ; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1252+ ; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1253+ ; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
1254+ ; VF8UF1: [[EXIT]]:
1255+ ; VF8UF1-NEXT: ret void
1256+ ;
1257+ ; VF8UF2-LABEL: define void @test_vector_tc_eq_16(
1258+ ; VF8UF2-SAME: ptr [[A:%.*]]) {
1259+ ; VF8UF2-NEXT: [[ENTRY:.*]]:
1260+ ; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1261+ ; VF8UF2: [[VECTOR_PH]]:
1262+ ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1263+ ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
1264+ ; VF8UF2: [[VECTOR_BODY]]:
1265+ ; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1266+ ; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1267+ ; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8
1268+ ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
1269+ ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
1270+ ; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
1271+ ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
1272+ ; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[NEXT_GEP]], align 1
1273+ ; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1
1274+ ; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1275+ ; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1276+ ; VF8UF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1277+ ; VF8UF2: [[MIDDLE_BLOCK]]:
1278+ ; VF8UF2-NEXT: br label %[[SCALAR_PH]]
1279+ ; VF8UF2: [[SCALAR_PH]]:
1280+ ; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1281+ ; VF8UF2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1282+ ; VF8UF2-NEXT: br label %[[LOOP:.*]]
1283+ ; VF8UF2: [[LOOP]]:
1284+ ; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1285+ ; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1286+ ; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1287+ ; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1288+ ; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1289+ ; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1290+ ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1291+ ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1292+ ; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1293+ ; VF8UF2: [[EXIT]]:
1294+ ; VF8UF2-NEXT: ret void
1295+ ;
1296+ ; VF16UF1-LABEL: define void @test_vector_tc_eq_16(
1297+ ; VF16UF1-SAME: ptr [[A:%.*]]) {
1298+ ; VF16UF1-NEXT: [[ENTRY:.*]]:
1299+ ; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1300+ ; VF16UF1: [[VECTOR_PH]]:
1301+ ; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1302+ ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
1303+ ; VF16UF1: [[VECTOR_BODY]]:
1304+ ; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1305+ ; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1306+ ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
1307+ ; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10)
1308+ ; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1
1309+ ; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1310+ ; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1311+ ; VF16UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1312+ ; VF16UF1: [[MIDDLE_BLOCK]]:
1313+ ; VF16UF1-NEXT: br label %[[SCALAR_PH]]
1314+ ; VF16UF1: [[SCALAR_PH]]:
1315+ ; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1316+ ; VF16UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1317+ ; VF16UF1-NEXT: br label %[[LOOP:.*]]
1318+ ; VF16UF1: [[LOOP]]:
1319+ ; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1320+ ; VF16UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1321+ ; VF16UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1322+ ; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1323+ ; VF16UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1324+ ; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1325+ ; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1326+ ; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1327+ ; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1328+ ; VF16UF1: [[EXIT]]:
1329+ ; VF16UF1-NEXT: ret void
1330+ ;
1331+ entry:
1332+ br label %loop
1333+
1334+ loop:
1335+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
1336+ %p.src = phi ptr [ %A , %entry ], [ %p.src.next , %loop ]
1337+ %p.src.next = getelementptr inbounds i8 , ptr %p.src , i64 1
1338+ %l = load i8 , ptr %p.src , align 1
1339+ %add = add nsw i8 %l , 10
1340+ store i8 %add , ptr %p.src
1341+ %iv.next = add nsw i64 %iv , 1
1342+ %cmp = icmp eq i64 %iv.next , 17
1343+ br i1 %cmp , label %exit , label %loop
1344+
1345+ exit:
1346+ ret void
1347+ }
12211348;.
12221349; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
12231350; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -1227,18 +1354,24 @@ exit:
12271354; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
12281355; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
12291356; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1357+ ; VF8UF1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1358+ ; VF8UF1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
12301359;.
12311360; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
12321361; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
12331362; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
12341363; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
12351364; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
12361365; VF8UF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1366+ ; VF8UF2: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
1367+ ; VF8UF2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
12371368;.
12381369; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
12391370; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
12401371; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
12411372; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
12421373; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
12431374; VF16UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1375+ ; VF16UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
1376+ ; VF16UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
12441377;.
0 commit comments