@@ -310,6 +310,187 @@ define void @test_2x32bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #
310310 ret void
311311}
312312
313+ ; Extra use of the get_active_lane_mask from an extractelement, which is replaced with ptest_first.
314+
315+ define void @test_2x8bit_mask_with_extracts_and_ptest (i64 %i , i64 %n ) {
316+ ; CHECK-SVE-LABEL: test_2x8bit_mask_with_extracts_and_ptest:
317+ ; CHECK-SVE: // %bb.0: // %entry
318+ ; CHECK-SVE-NEXT: whilelo p1.b, x0, x1
319+ ; CHECK-SVE-NEXT: b.pl .LBB11_2
320+ ; CHECK-SVE-NEXT: // %bb.1: // %if.then
321+ ; CHECK-SVE-NEXT: punpklo p0.h, p1.b
322+ ; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
323+ ; CHECK-SVE-NEXT: b use
324+ ; CHECK-SVE-NEXT: .LBB11_2: // %if.end
325+ ; CHECK-SVE-NEXT: ret
326+ ;
327+ ; CHECK-SVE2p1-SME2-LABEL: test_2x8bit_mask_with_extracts_and_ptest:
328+ ; CHECK-SVE2p1-SME2: // %bb.0: // %entry
329+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p0.h, p1.h }, x0, x1
330+ ; CHECK-SVE2p1-SME2-NEXT: ptrue p2.b
331+ ; CHECK-SVE2p1-SME2-NEXT: uzp1 p3.b, p0.b, p1.b
332+ ; CHECK-SVE2p1-SME2-NEXT: ptest p2, p3.b
333+ ; CHECK-SVE2p1-SME2-NEXT: b.pl .LBB11_2
334+ ; CHECK-SVE2p1-SME2-NEXT: // %bb.1: // %if.then
335+ ; CHECK-SVE2p1-SME2-NEXT: b use
336+ ; CHECK-SVE2p1-SME2-NEXT: .LBB11_2: // %if.end
337+ ; CHECK-SVE2p1-SME2-NEXT: ret
338+ entry:
339+ %r = call <vscale x 16 x i1 > @llvm.get.active.lane.mask.nxv16i1.i32 (i64 %i , i64 %n )
340+ %v0 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 0 )
341+ %v1 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 8 )
342+ %elt0 = extractelement <vscale x 16 x i1 > %r , i32 0
343+ br i1 %elt0 , label %if.then , label %if.end
344+
345+ if.then:
346+ tail call void @use (<vscale x 8 x i1 > %v0 , <vscale x 8 x i1 > %v1 )
347+ br label %if.end
348+
349+ if.end:
350+ ret void
351+ }
352+
353+ ; Extra use of the get_active_lane_mask from an extractelement, which is
354+ ; replaced with ptest_first and reinterpret_casts because the extract is not nxv16i1.
355+
356+ define void @test_2x8bit_mask_with_extracts_and_reinterpret_casts (i64 %i , i64 %n ) {
357+ ; CHECK-SVE-LABEL: test_2x8bit_mask_with_extracts_and_reinterpret_casts:
358+ ; CHECK-SVE: // %bb.0: // %entry
359+ ; CHECK-SVE-NEXT: whilelo p1.h, x0, x1
360+ ; CHECK-SVE-NEXT: b.pl .LBB12_2
361+ ; CHECK-SVE-NEXT: // %bb.1: // %if.then
362+ ; CHECK-SVE-NEXT: punpklo p0.h, p1.b
363+ ; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
364+ ; CHECK-SVE-NEXT: b use
365+ ; CHECK-SVE-NEXT: .LBB12_2: // %if.end
366+ ; CHECK-SVE-NEXT: ret
367+ ;
368+ ; CHECK-SVE2p1-SME2-LABEL: test_2x8bit_mask_with_extracts_and_reinterpret_casts:
369+ ; CHECK-SVE2p1-SME2: // %bb.0: // %entry
370+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p0.s, p1.s }, x0, x1
371+ ; CHECK-SVE2p1-SME2-NEXT: ptrue p2.h
372+ ; CHECK-SVE2p1-SME2-NEXT: uzp1 p3.h, p0.h, p1.h
373+ ; CHECK-SVE2p1-SME2-NEXT: ptest p2, p3.b
374+ ; CHECK-SVE2p1-SME2-NEXT: b.pl .LBB12_2
375+ ; CHECK-SVE2p1-SME2-NEXT: // %bb.1: // %if.then
376+ ; CHECK-SVE2p1-SME2-NEXT: b use
377+ ; CHECK-SVE2p1-SME2-NEXT: .LBB12_2: // %if.end
378+ ; CHECK-SVE2p1-SME2-NEXT: ret
379+ entry:
380+ %r = call <vscale x 8 x i1 > @llvm.get.active.lane.mask.nxv8i1.i64 (i64 %i , i64 %n )
381+ %v0 = tail call <vscale x 4 x i1 > @llvm.vector.extract.nxv4i1.nxv8i1 (<vscale x 8 x i1 > %r , i64 0 )
382+ %v1 = tail call <vscale x 4 x i1 > @llvm.vector.extract.nxv4i1.nxv8i1 (<vscale x 8 x i1 > %r , i64 4 )
383+ %elt0 = extractelement <vscale x 8 x i1 > %r , i64 0
384+ br i1 %elt0 , label %if.then , label %if.end
385+
386+ if.then:
387+ tail call void @use (<vscale x 4 x i1 > %v0 , <vscale x 4 x i1 > %v1 )
388+ br label %if.end
389+
390+ if.end:
391+ ret void
392+ }
393+
394+ define void @test_4x4bit_mask_with_extracts_and_ptest (i64 %i , i64 %n ) {
395+ ; CHECK-SVE-LABEL: test_4x4bit_mask_with_extracts_and_ptest:
396+ ; CHECK-SVE: // %bb.0: // %entry
397+ ; CHECK-SVE-NEXT: whilelo p0.b, x0, x1
398+ ; CHECK-SVE-NEXT: b.pl .LBB13_2
399+ ; CHECK-SVE-NEXT: // %bb.1: // %if.then
400+ ; CHECK-SVE-NEXT: punpklo p1.h, p0.b
401+ ; CHECK-SVE-NEXT: punpkhi p3.h, p0.b
402+ ; CHECK-SVE-NEXT: punpklo p0.h, p1.b
403+ ; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
404+ ; CHECK-SVE-NEXT: punpklo p2.h, p3.b
405+ ; CHECK-SVE-NEXT: punpkhi p3.h, p3.b
406+ ; CHECK-SVE-NEXT: b use
407+ ; CHECK-SVE-NEXT: .LBB13_2: // %if.end
408+ ; CHECK-SVE-NEXT: ret
409+ ;
410+ ; CHECK-SVE2p1-SME2-LABEL: test_4x4bit_mask_with_extracts_and_ptest:
411+ ; CHECK-SVE2p1-SME2: // %bb.0: // %entry
412+ ; CHECK-SVE2p1-SME2-NEXT: cnth x8
413+ ; CHECK-SVE2p1-SME2-NEXT: adds x8, x0, x8
414+ ; CHECK-SVE2p1-SME2-NEXT: csinv x8, x8, xzr, lo
415+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p0.s, p1.s }, x0, x1
416+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p2.s, p3.s }, x8, x1
417+ ; CHECK-SVE2p1-SME2-NEXT: uzp1 p4.h, p0.h, p1.h
418+ ; CHECK-SVE2p1-SME2-NEXT: uzp1 p5.h, p2.h, p3.h
419+ ; CHECK-SVE2p1-SME2-NEXT: uzp1 p4.b, p4.b, p5.b
420+ ; CHECK-SVE2p1-SME2-NEXT: ptrue p5.b
421+ ; CHECK-SVE2p1-SME2-NEXT: ptest p5, p4.b
422+ ; CHECK-SVE2p1-SME2-NEXT: b.pl .LBB13_2
423+ ; CHECK-SVE2p1-SME2-NEXT: // %bb.1: // %if.then
424+ ; CHECK-SVE2p1-SME2-NEXT: b use
425+ ; CHECK-SVE2p1-SME2-NEXT: .LBB13_2: // %if.end
426+ ; CHECK-SVE2p1-SME2-NEXT: ret
427+ entry:
428+ %r = call <vscale x 16 x i1 > @llvm.get.active.lane.mask.nxv16i1.i32 (i64 %i , i64 %n )
429+ %v0 = call <vscale x 4 x i1 > @llvm.vector.extract.nxv4i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 0 )
430+ %v1 = call <vscale x 4 x i1 > @llvm.vector.extract.nxv4i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 4 )
431+ %v2 = call <vscale x 4 x i1 > @llvm.vector.extract.nxv4i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 8 )
432+ %v3 = call <vscale x 4 x i1 > @llvm.vector.extract.nxv4i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 12 )
433+ %elt0 = extractelement <vscale x 16 x i1 > %r , i32 0
434+ br i1 %elt0 , label %if.then , label %if.end
435+
436+ if.then:
437+ tail call void @use (<vscale x 4 x i1 > %v0 , <vscale x 4 x i1 > %v1 , <vscale x 4 x i1 > %v2 , <vscale x 4 x i1 > %v3 )
438+ br label %if.end
439+
440+ if.end:
441+ ret void
442+ }
443+
444+ define void @test_4x2bit_mask_with_extracts_and_reinterpret_casts (i64 %i , i64 %n ) {
445+ ; CHECK-SVE-LABEL: test_4x2bit_mask_with_extracts_and_reinterpret_casts:
446+ ; CHECK-SVE: // %bb.0: // %entry
447+ ; CHECK-SVE-NEXT: whilelo p0.h, x0, x1
448+ ; CHECK-SVE-NEXT: b.pl .LBB14_2
449+ ; CHECK-SVE-NEXT: // %bb.1: // %if.then
450+ ; CHECK-SVE-NEXT: punpklo p1.h, p0.b
451+ ; CHECK-SVE-NEXT: punpkhi p3.h, p0.b
452+ ; CHECK-SVE-NEXT: punpklo p0.h, p1.b
453+ ; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
454+ ; CHECK-SVE-NEXT: punpklo p2.h, p3.b
455+ ; CHECK-SVE-NEXT: punpkhi p3.h, p3.b
456+ ; CHECK-SVE-NEXT: b use
457+ ; CHECK-SVE-NEXT: .LBB14_2: // %if.end
458+ ; CHECK-SVE-NEXT: ret
459+ ;
460+ ; CHECK-SVE2p1-SME2-LABEL: test_4x2bit_mask_with_extracts_and_reinterpret_casts:
461+ ; CHECK-SVE2p1-SME2: // %bb.0: // %entry
462+ ; CHECK-SVE2p1-SME2-NEXT: cntw x8
463+ ; CHECK-SVE2p1-SME2-NEXT: adds x8, x0, x8
464+ ; CHECK-SVE2p1-SME2-NEXT: csinv x8, x8, xzr, lo
465+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p0.d, p1.d }, x0, x1
466+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p2.d, p3.d }, x8, x1
467+ ; CHECK-SVE2p1-SME2-NEXT: uzp1 p4.s, p0.s, p1.s
468+ ; CHECK-SVE2p1-SME2-NEXT: uzp1 p5.s, p2.s, p3.s
469+ ; CHECK-SVE2p1-SME2-NEXT: uzp1 p4.h, p4.h, p5.h
470+ ; CHECK-SVE2p1-SME2-NEXT: ptrue p5.h
471+ ; CHECK-SVE2p1-SME2-NEXT: ptest p5, p4.b
472+ ; CHECK-SVE2p1-SME2-NEXT: b.pl .LBB14_2
473+ ; CHECK-SVE2p1-SME2-NEXT: // %bb.1: // %if.then
474+ ; CHECK-SVE2p1-SME2-NEXT: b use
475+ ; CHECK-SVE2p1-SME2-NEXT: .LBB14_2: // %if.end
476+ ; CHECK-SVE2p1-SME2-NEXT: ret
477+ entry:
478+ %r = call <vscale x 8 x i1 > @llvm.get.active.lane.mask.nxv8i1.i32 (i64 %i , i64 %n )
479+ %v0 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 0 )
480+ %v1 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 2 )
481+ %v2 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 4 )
482+ %v3 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 6 )
483+ %elt0 = extractelement <vscale x 8 x i1 > %r , i32 0
484+ br i1 %elt0 , label %if.then , label %if.end
485+
486+ if.then:
487+ tail call void @use (<vscale x 2 x i1 > %v0 , <vscale x 2 x i1 > %v1 , <vscale x 2 x i1 > %v2 , <vscale x 2 x i1 > %v3 )
488+ br label %if.end
489+
490+ if.end:
491+ ret void
492+ }
493+
313494declare void @use (...)
314495
315496attributes #0 = { nounwind }
0 commit comments