@@ -346,6 +346,86 @@ define <3 x i32> @load_v3i32(ptr %src) {
346346 ret <3 x i32 > %l
347347}
348348
349+ define <3 x i32 > @load_v3i8_zext_to_3xi32 (ptr %src ) {
350+ ; CHECK-LABEL: load_v3i8_zext_to_3xi32:
351+ ; CHECK: ; %bb.0:
352+ ; CHECK-NEXT: sub sp, sp, #16
353+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
354+ ; CHECK-NEXT: ldrh w8, [x0]
355+ ; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff
356+ ; CHECK-NEXT: strh w8, [sp, #12]
357+ ; CHECK-NEXT: add x8, x0, #2
358+ ; CHECK-NEXT: ldr s0, [sp, #12]
359+ ; CHECK-NEXT: ushll.8h v0, v0, #0
360+ ; CHECK-NEXT: ld1.b { v0 }[4], [x8]
361+ ; CHECK-NEXT: ushll.4s v0, v0, #0
362+ ; CHECK-NEXT: and.16b v0, v0, v1
363+ ; CHECK-NEXT: add sp, sp, #16
364+ ; CHECK-NEXT: ret
365+ ;
366+ ; BE-LABEL: load_v3i8_zext_to_3xi32:
367+ ; BE: // %bb.0:
368+ ; BE-NEXT: sub sp, sp, #16
369+ ; BE-NEXT: .cfi_def_cfa_offset 16
370+ ; BE-NEXT: ldrh w8, [x0]
371+ ; BE-NEXT: movi v1.2d, #0x0000ff000000ff
372+ ; BE-NEXT: strh w8, [sp, #12]
373+ ; BE-NEXT: add x8, x0, #2
374+ ; BE-NEXT: ldr s0, [sp, #12]
375+ ; BE-NEXT: rev32 v0.8b, v0.8b
376+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
377+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
378+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
379+ ; BE-NEXT: and v0.16b, v0.16b, v1.16b
380+ ; BE-NEXT: rev64 v0.4s, v0.4s
381+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
382+ ; BE-NEXT: add sp, sp, #16
383+ ; BE-NEXT: ret
384+ %l = load <3 x i8 >, ptr %src , align 1
385+ %e = zext <3 x i8 > %l to <3 x i32 >
386+ ret <3 x i32 > %e
387+ }
388+
389+ define <3 x i32 > @load_v3i8_sext_to_3xi32 (ptr %src ) {
390+ ; CHECK-LABEL: load_v3i8_sext_to_3xi32:
391+ ; CHECK: ; %bb.0:
392+ ; CHECK-NEXT: sub sp, sp, #16
393+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
394+ ; CHECK-NEXT: ldrh w8, [x0]
395+ ; CHECK-NEXT: strh w8, [sp, #12]
396+ ; CHECK-NEXT: add x8, x0, #2
397+ ; CHECK-NEXT: ldr s0, [sp, #12]
398+ ; CHECK-NEXT: ushll.8h v0, v0, #0
399+ ; CHECK-NEXT: ld1.b { v0 }[4], [x8]
400+ ; CHECK-NEXT: ushll.4s v0, v0, #0
401+ ; CHECK-NEXT: shl.4s v0, v0, #24
402+ ; CHECK-NEXT: sshr.4s v0, v0, #24
403+ ; CHECK-NEXT: add sp, sp, #16
404+ ; CHECK-NEXT: ret
405+ ;
406+ ; BE-LABEL: load_v3i8_sext_to_3xi32:
407+ ; BE: // %bb.0:
408+ ; BE-NEXT: sub sp, sp, #16
409+ ; BE-NEXT: .cfi_def_cfa_offset 16
410+ ; BE-NEXT: ldrh w8, [x0]
411+ ; BE-NEXT: strh w8, [sp, #12]
412+ ; BE-NEXT: add x8, x0, #2
413+ ; BE-NEXT: ldr s0, [sp, #12]
414+ ; BE-NEXT: rev32 v0.8b, v0.8b
415+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
416+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
417+ ; BE-NEXT: ushll v0.4s, v0.4h, #0
418+ ; BE-NEXT: shl v0.4s, v0.4s, #24
419+ ; BE-NEXT: sshr v0.4s, v0.4s, #24
420+ ; BE-NEXT: rev64 v0.4s, v0.4s
421+ ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
422+ ; BE-NEXT: add sp, sp, #16
423+ ; BE-NEXT: ret
424+ %l = load <3 x i8 >, ptr %src , align 1
425+ %e = sext <3 x i8 > %l to <3 x i32 >
426+ ret <3 x i32 > %e
427+ }
428+
349429define void @store_trunc_from_64bits (ptr %src , ptr %dst ) {
350430; CHECK-LABEL: store_trunc_from_64bits:
351431; CHECK: ; %bb.0: ; %entry
@@ -389,11 +469,11 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
389469; CHECK-NEXT: add x8, x0, #4
390470; CHECK-NEXT: ldr s0, [x0]
391471; CHECK-NEXT: Lloh0:
392- ; CHECK-NEXT: adrp x9, lCPI9_0 @PAGE
472+ ; CHECK-NEXT: adrp x9, lCPI11_0 @PAGE
393473; CHECK-NEXT: ld1.h { v0 }[2], [x8]
394474; CHECK-NEXT: add x8, x1, #1
395475; CHECK-NEXT: Lloh1:
396- ; CHECK-NEXT: ldr d1, [x9, lCPI9_0 @PAGEOFF]
476+ ; CHECK-NEXT: ldr d1, [x9, lCPI11_0 @PAGEOFF]
397477; CHECK-NEXT: add x9, x1, #2
398478; CHECK-NEXT: add.4h v0, v0, v1
399479; CHECK-NEXT: st1.b { v0 }[2], [x8]
@@ -408,8 +488,8 @@ define void @store_trunc_add_from_64bits(ptr %src, ptr %dst) {
408488; BE-NEXT: .cfi_def_cfa_offset 16
409489; BE-NEXT: ldr s0, [x0]
410490; BE-NEXT: add x8, x0, #4
411- ; BE-NEXT: adrp x9, .LCPI9_0
412- ; BE-NEXT: add x9, x9, :lo12:.LCPI9_0
491+ ; BE-NEXT: adrp x9, .LCPI11_0
492+ ; BE-NEXT: add x9, x9, :lo12:.LCPI11_0
413493; BE-NEXT: rev32 v0.4h, v0.4h
414494; BE-NEXT: ld1 { v1.4h }, [x9]
415495; BE-NEXT: ld1 { v0.h }[2], [x8]
@@ -541,12 +621,12 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
541621; CHECK-NEXT: .cfi_def_cfa_offset 16
542622; CHECK-NEXT: ldrh w8, [x0]
543623; CHECK-NEXT: Lloh2:
544- ; CHECK-NEXT: adrp x9, lCPI13_0 @PAGE
624+ ; CHECK-NEXT: adrp x9, lCPI15_0 @PAGE
545625; CHECK-NEXT: strh w8, [sp, #12]
546626; CHECK-NEXT: add x8, x0, #2
547627; CHECK-NEXT: ldr s0, [sp, #12]
548628; CHECK-NEXT: Lloh3:
549- ; CHECK-NEXT: ldr d1, [x9, lCPI13_0 @PAGEOFF]
629+ ; CHECK-NEXT: ldr d1, [x9, lCPI15_0 @PAGEOFF]
550630; CHECK-NEXT: ushll.8h v0, v0, #0
551631; CHECK-NEXT: ld1.b { v0 }[4], [x8]
552632; CHECK-NEXT: add x8, x1, #4
@@ -569,8 +649,8 @@ define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {
569649; BE-NEXT: rev32 v0.8b, v0.8b
570650; BE-NEXT: ushll v0.8h, v0.8b, #0
571651; BE-NEXT: ld1 { v0.b }[4], [x8]
572- ; BE-NEXT: adrp x8, .LCPI13_0
573- ; BE-NEXT: add x8, x8, :lo12:.LCPI13_0
652+ ; BE-NEXT: adrp x8, .LCPI15_0
653+ ; BE-NEXT: add x8, x8, :lo12:.LCPI15_0
574654; BE-NEXT: ld1 { v1.4h }, [x8]
575655; BE-NEXT: bic v0.4h, #255, lsl #8
576656; BE-NEXT: add x8, x1, #4
@@ -799,3 +879,115 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {
799879 store volatile <3 x i8 > %t , ptr %dst , align 1
800880 ret void
801881}
882+
883+ define void @load_v3i8_zext_to_3xi32_add_trunc_store (ptr %src ) {
884+ ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
885+ ; CHECK: ; %bb.0:
886+ ; CHECK-NEXT: sub sp, sp, #16
887+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
888+ ; CHECK-NEXT: ldrh w9, [x0]
889+ ; CHECK-NEXT: Lloh4:
890+ ; CHECK-NEXT: adrp x8, lCPI22_0@PAGE
891+ ; CHECK-NEXT: strh w9, [sp, #12]
892+ ; CHECK-NEXT: add x9, x0, #2
893+ ; CHECK-NEXT: ldr s0, [sp, #12]
894+ ; CHECK-NEXT: Lloh5:
895+ ; CHECK-NEXT: ldr q1, [x8, lCPI22_0@PAGEOFF]
896+ ; CHECK-NEXT: add x8, x0, #1
897+ ; CHECK-NEXT: ushll.8h v0, v0, #0
898+ ; CHECK-NEXT: ld1.b { v0 }[4], [x9]
899+ ; CHECK-NEXT: uaddw.4s v0, v1, v0
900+ ; CHECK-NEXT: st1.b { v0 }[4], [x8]
901+ ; CHECK-NEXT: st1.b { v0 }[8], [x9]
902+ ; CHECK-NEXT: st1.b { v0 }[0], [x0]
903+ ; CHECK-NEXT: add sp, sp, #16
904+ ; CHECK-NEXT: ret
905+ ; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
906+ ;
907+ ; BE-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:
908+ ; BE: // %bb.0:
909+ ; BE-NEXT: sub sp, sp, #16
910+ ; BE-NEXT: .cfi_def_cfa_offset 16
911+ ; BE-NEXT: ldrh w8, [x0]
912+ ; BE-NEXT: adrp x9, .LCPI22_0
913+ ; BE-NEXT: add x9, x9, :lo12:.LCPI22_0
914+ ; BE-NEXT: strh w8, [sp, #12]
915+ ; BE-NEXT: add x8, x0, #2
916+ ; BE-NEXT: ldr s0, [sp, #12]
917+ ; BE-NEXT: ld1 { v1.4h }, [x9]
918+ ; BE-NEXT: rev32 v0.8b, v0.8b
919+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
920+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
921+ ; BE-NEXT: add v0.4h, v0.4h, v1.4h
922+ ; BE-NEXT: xtn v1.8b, v0.8h
923+ ; BE-NEXT: umov w8, v0.h[2]
924+ ; BE-NEXT: rev32 v1.16b, v1.16b
925+ ; BE-NEXT: str s1, [sp, #8]
926+ ; BE-NEXT: ldrh w9, [sp, #8]
927+ ; BE-NEXT: strb w8, [x0, #2]
928+ ; BE-NEXT: strh w9, [x0]
929+ ; BE-NEXT: add sp, sp, #16
930+ ; BE-NEXT: ret
931+ %l = load <3 x i8 >, ptr %src , align 1
932+ %e = zext <3 x i8 > %l to <3 x i32 >
933+ %add = add <3 x i32 > %e , <i32 1 , i32 2 , i32 3 >
934+ %t = trunc <3 x i32 > %add to <3 x i8 >
935+ store <3 x i8 > %t , ptr %src
936+ ret void
937+ }
938+
939+ define void @load_v3i8_sext_to_3xi32_add_trunc_store (ptr %src ) {
940+ ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
941+ ; CHECK: ; %bb.0:
942+ ; CHECK-NEXT: sub sp, sp, #16
943+ ; CHECK-NEXT: .cfi_def_cfa_offset 16
944+ ; CHECK-NEXT: ldrh w9, [x0]
945+ ; CHECK-NEXT: Lloh6:
946+ ; CHECK-NEXT: adrp x8, lCPI23_0@PAGE
947+ ; CHECK-NEXT: strh w9, [sp, #12]
948+ ; CHECK-NEXT: add x9, x0, #2
949+ ; CHECK-NEXT: ldr s0, [sp, #12]
950+ ; CHECK-NEXT: Lloh7:
951+ ; CHECK-NEXT: ldr q1, [x8, lCPI23_0@PAGEOFF]
952+ ; CHECK-NEXT: add x8, x0, #1
953+ ; CHECK-NEXT: ushll.8h v0, v0, #0
954+ ; CHECK-NEXT: ld1.b { v0 }[4], [x9]
955+ ; CHECK-NEXT: uaddw.4s v0, v1, v0
956+ ; CHECK-NEXT: st1.b { v0 }[4], [x8]
957+ ; CHECK-NEXT: st1.b { v0 }[8], [x9]
958+ ; CHECK-NEXT: st1.b { v0 }[0], [x0]
959+ ; CHECK-NEXT: add sp, sp, #16
960+ ; CHECK-NEXT: ret
961+ ; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
962+ ;
963+ ; BE-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:
964+ ; BE: // %bb.0:
965+ ; BE-NEXT: sub sp, sp, #16
966+ ; BE-NEXT: .cfi_def_cfa_offset 16
967+ ; BE-NEXT: ldrh w8, [x0]
968+ ; BE-NEXT: adrp x9, .LCPI23_0
969+ ; BE-NEXT: add x9, x9, :lo12:.LCPI23_0
970+ ; BE-NEXT: strh w8, [sp, #12]
971+ ; BE-NEXT: add x8, x0, #2
972+ ; BE-NEXT: ldr s0, [sp, #12]
973+ ; BE-NEXT: ld1 { v1.4h }, [x9]
974+ ; BE-NEXT: rev32 v0.8b, v0.8b
975+ ; BE-NEXT: ushll v0.8h, v0.8b, #0
976+ ; BE-NEXT: ld1 { v0.b }[4], [x8]
977+ ; BE-NEXT: add v0.4h, v0.4h, v1.4h
978+ ; BE-NEXT: xtn v1.8b, v0.8h
979+ ; BE-NEXT: umov w8, v0.h[2]
980+ ; BE-NEXT: rev32 v1.16b, v1.16b
981+ ; BE-NEXT: str s1, [sp, #8]
982+ ; BE-NEXT: ldrh w9, [sp, #8]
983+ ; BE-NEXT: strb w8, [x0, #2]
984+ ; BE-NEXT: strh w9, [x0]
985+ ; BE-NEXT: add sp, sp, #16
986+ ; BE-NEXT: ret
987+ %l = load <3 x i8 >, ptr %src , align 1
988+ %e = sext <3 x i8 > %l to <3 x i32 >
989+ %add = add <3 x i32 > %e , <i32 1 , i32 2 , i32 3 >
990+ %t = trunc <3 x i32 > %add to <3 x i8 >
991+ store <3 x i8 > %t , ptr %src
992+ ret void
993+ }
0 commit comments