| 
5 | 5 | define <16 x i8> @load_v3i8(ptr %src) {  | 
6 | 6 | ; CHECK-LABEL: load_v3i8:  | 
7 | 7 | ; CHECK:       ; %bb.0:  | 
8 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
9 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
10 |  | -; CHECK-NEXT:    ldrh w8, [x0]  | 
11 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
12 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
13 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
14 |  | -; CHECK-NEXT:    umov.h w8, v0[0]  | 
15 |  | -; CHECK-NEXT:    umov.h w9, v0[1]  | 
 | 8 | +; CHECK-NEXT:    ldrb w8, [x0, #2]  | 
 | 9 | +; CHECK-NEXT:    ldrh w9, [x0]  | 
 | 10 | +; CHECK-NEXT:    orr w8, w9, w8, lsl #16  | 
16 | 11 | ; CHECK-NEXT:    fmov s0, w8  | 
17 |  | -; CHECK-NEXT:    add x8, x0, #2  | 
18 |  | -; CHECK-NEXT:    mov.b v0[1], w9  | 
19 |  | -; CHECK-NEXT:    ld1.b { v0 }[2], [x8]  | 
20 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
21 | 12 | ; CHECK-NEXT:    ret  | 
22 | 13 | ;  | 
23 | 14 | ; BE-LABEL: load_v3i8:  | 
@@ -47,19 +38,14 @@ define <16 x i8> @load_v3i8(ptr %src) {  | 
47 | 38 | define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {  | 
48 | 39 | ; CHECK-LABEL: load_v3i8_to_4xi32:  | 
49 | 40 | ; CHECK:       ; %bb.0:  | 
50 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
51 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
52 |  | -; CHECK-NEXT:    ldrh w8, [x0]  | 
 | 41 | +; CHECK-NEXT:    ldrb w8, [x0, #2]  | 
 | 42 | +; CHECK-NEXT:    ldrh w9, [x0]  | 
53 | 43 | ; CHECK-NEXT:    movi.2d v1, #0x0000ff000000ff  | 
54 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
55 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
56 |  | -; CHECK-NEXT:    ldrsb w8, [x0, #2]  | 
57 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
58 |  | -; CHECK-NEXT:    mov.h v0[1], v0[1]  | 
59 |  | -; CHECK-NEXT:    mov.h v0[2], w8  | 
 | 44 | +; CHECK-NEXT:    orr w8, w9, w8, lsl #16  | 
 | 45 | +; CHECK-NEXT:    fmov s0, w8  | 
 | 46 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
60 | 47 | ; CHECK-NEXT:    ushll.4s v0, v0, #0  | 
61 | 48 | ; CHECK-NEXT:    and.16b v0, v0, v1  | 
62 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
63 | 49 | ; CHECK-NEXT:    ret  | 
64 | 50 | ;  | 
65 | 51 | ; BE-LABEL: load_v3i8_to_4xi32:  | 
@@ -90,19 +76,14 @@ define <4 x i32> @load_v3i8_to_4xi32(ptr %src) {  | 
90 | 76 | define <4 x i32> @load_v3i8_to_4xi32_align_2(ptr %src) {  | 
91 | 77 | ; CHECK-LABEL: load_v3i8_to_4xi32_align_2:  | 
92 | 78 | ; CHECK:       ; %bb.0:  | 
93 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
94 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
95 |  | -; CHECK-NEXT:    ldrh w8, [x0]  | 
 | 79 | +; CHECK-NEXT:    ldrb w8, [x0, #2]  | 
 | 80 | +; CHECK-NEXT:    ldrh w9, [x0]  | 
96 | 81 | ; CHECK-NEXT:    movi.2d v1, #0x0000ff000000ff  | 
97 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
98 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
99 |  | -; CHECK-NEXT:    ldrsb w8, [x0, #2]  | 
100 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
101 |  | -; CHECK-NEXT:    mov.h v0[1], v0[1]  | 
102 |  | -; CHECK-NEXT:    mov.h v0[2], w8  | 
 | 82 | +; CHECK-NEXT:    orr w8, w9, w8, lsl #16  | 
 | 83 | +; CHECK-NEXT:    fmov s0, w8  | 
 | 84 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
103 | 85 | ; CHECK-NEXT:    ushll.4s v0, v0, #0  | 
104 | 86 | ; CHECK-NEXT:    and.16b v0, v0, v1  | 
105 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
106 | 87 | ; CHECK-NEXT:    ret  | 
107 | 88 | ;  | 
108 | 89 | ; BE-LABEL: load_v3i8_to_4xi32_align_2:  | 
@@ -161,19 +142,14 @@ define <4 x i32> @load_v3i8_to_4xi32_align_4(ptr %src) {  | 
161 | 142 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {  | 
162 | 143 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_1:  | 
163 | 144 | ; CHECK:       ; %bb.0:  | 
164 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
165 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
166 |  | -; CHECK-NEXT:    ldurh w8, [x0, #1]  | 
 | 145 | +; CHECK-NEXT:    ldrb w8, [x0, #3]  | 
 | 146 | +; CHECK-NEXT:    ldurh w9, [x0, #1]  | 
167 | 147 | ; CHECK-NEXT:    movi.2d v1, #0x0000ff000000ff  | 
168 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
169 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
170 |  | -; CHECK-NEXT:    ldrsb w8, [x0, #3]  | 
171 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
172 |  | -; CHECK-NEXT:    mov.h v0[1], v0[1]  | 
173 |  | -; CHECK-NEXT:    mov.h v0[2], w8  | 
 | 148 | +; CHECK-NEXT:    orr w8, w9, w8, lsl #16  | 
 | 149 | +; CHECK-NEXT:    fmov s0, w8  | 
 | 150 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
174 | 151 | ; CHECK-NEXT:    ushll.4s v0, v0, #0  | 
175 | 152 | ; CHECK-NEXT:    and.16b v0, v0, v1  | 
176 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
177 | 153 | ; CHECK-NEXT:    ret  | 
178 | 154 | ;  | 
179 | 155 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_1:  | 
@@ -205,19 +181,14 @@ define <4 x i32> @load_v3i8_to_4xi32_const_offset_1(ptr %src) {  | 
205 | 181 | define <4 x i32> @load_v3i8_to_4xi32_const_offset_3(ptr %src) {  | 
206 | 182 | ; CHECK-LABEL: load_v3i8_to_4xi32_const_offset_3:  | 
207 | 183 | ; CHECK:       ; %bb.0:  | 
208 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
209 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
210 |  | -; CHECK-NEXT:    ldurh w8, [x0, #3]  | 
 | 184 | +; CHECK-NEXT:    ldrb w8, [x0, #5]  | 
 | 185 | +; CHECK-NEXT:    ldurh w9, [x0, #3]  | 
211 | 186 | ; CHECK-NEXT:    movi.2d v1, #0x0000ff000000ff  | 
212 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
213 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
214 |  | -; CHECK-NEXT:    ldrsb w8, [x0, #5]  | 
215 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
216 |  | -; CHECK-NEXT:    mov.h v0[1], v0[1]  | 
217 |  | -; CHECK-NEXT:    mov.h v0[2], w8  | 
 | 187 | +; CHECK-NEXT:    orr w8, w9, w8, lsl #16  | 
 | 188 | +; CHECK-NEXT:    fmov s0, w8  | 
 | 189 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
218 | 190 | ; CHECK-NEXT:    ushll.4s v0, v0, #0  | 
219 | 191 | ; CHECK-NEXT:    and.16b v0, v0, v1  | 
220 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
221 | 192 | ; CHECK-NEXT:    ret  | 
222 | 193 | ;  | 
223 | 194 | ; BE-LABEL: load_v3i8_to_4xi32_const_offset_3:  | 
@@ -349,18 +320,14 @@ define <3 x i32> @load_v3i32(ptr %src) {  | 
349 | 320 | define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {  | 
350 | 321 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32:  | 
351 | 322 | ; CHECK:       ; %bb.0:  | 
352 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
353 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
354 |  | -; CHECK-NEXT:    ldrh w8, [x0]  | 
 | 323 | +; CHECK-NEXT:    ldrb w8, [x0, #2]  | 
 | 324 | +; CHECK-NEXT:    ldrh w9, [x0]  | 
355 | 325 | ; CHECK-NEXT:    movi.2d v1, #0x0000ff000000ff  | 
356 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
357 |  | -; CHECK-NEXT:    add x8, x0, #2  | 
358 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
359 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
360 |  | -; CHECK-NEXT:    ld1.b { v0 }[4], [x8]  | 
 | 326 | +; CHECK-NEXT:    orr w8, w9, w8, lsl #16  | 
 | 327 | +; CHECK-NEXT:    fmov s0, w8  | 
 | 328 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
361 | 329 | ; CHECK-NEXT:    ushll.4s v0, v0, #0  | 
362 | 330 | ; CHECK-NEXT:    and.16b v0, v0, v1  | 
363 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
364 | 331 | ; CHECK-NEXT:    ret  | 
365 | 332 | ;  | 
366 | 333 | ; BE-LABEL: load_v3i8_zext_to_3xi32:  | 
@@ -389,18 +356,14 @@ define <3 x i32> @load_v3i8_zext_to_3xi32(ptr %src) {  | 
389 | 356 | define <3 x i32> @load_v3i8_sext_to_3xi32(ptr %src) {  | 
390 | 357 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32:  | 
391 | 358 | ; CHECK:       ; %bb.0:  | 
392 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
393 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
394 |  | -; CHECK-NEXT:    ldrh w8, [x0]  | 
395 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
396 |  | -; CHECK-NEXT:    add x8, x0, #2  | 
397 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
398 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
399 |  | -; CHECK-NEXT:    ld1.b { v0 }[4], [x8]  | 
 | 359 | +; CHECK-NEXT:    ldrb w8, [x0, #2]  | 
 | 360 | +; CHECK-NEXT:    ldrh w9, [x0]  | 
 | 361 | +; CHECK-NEXT:    orr w8, w9, w8, lsl #16  | 
 | 362 | +; CHECK-NEXT:    fmov s0, w8  | 
 | 363 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
400 | 364 | ; CHECK-NEXT:    ushll.4s v0, v0, #0  | 
401 | 365 | ; CHECK-NEXT:    shl.4s v0, v0, #24  | 
402 | 366 | ; CHECK-NEXT:    sshr.4s v0, v0, #24  | 
403 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
404 | 367 | ; CHECK-NEXT:    ret  | 
405 | 368 | ;  | 
406 | 369 | ; BE-LABEL: load_v3i8_sext_to_3xi32:  | 
@@ -514,19 +477,15 @@ entry:  | 
514 | 477 | define void @load_ext_to_64bits(ptr %src, ptr %dst) {  | 
515 | 478 | ; CHECK-LABEL: load_ext_to_64bits:  | 
516 | 479 | ; CHECK:       ; %bb.0: ; %entry  | 
517 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
518 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
519 |  | -; CHECK-NEXT:    ldrh w8, [x0]  | 
520 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
521 |  | -; CHECK-NEXT:    add x8, x0, #2  | 
522 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
523 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
524 |  | -; CHECK-NEXT:    ld1.b { v0 }[4], [x8]  | 
 | 480 | +; CHECK-NEXT:    ldrb w9, [x0, #2]  | 
525 | 481 | ; CHECK-NEXT:    add x8, x1, #4  | 
 | 482 | +; CHECK-NEXT:    ldrh w10, [x0]  | 
 | 483 | +; CHECK-NEXT:    orr w9, w10, w9, lsl #16  | 
 | 484 | +; CHECK-NEXT:    fmov s0, w9  | 
 | 485 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
526 | 486 | ; CHECK-NEXT:    bic.4h v0, #255, lsl #8  | 
527 | 487 | ; CHECK-NEXT:    st1.h { v0 }[2], [x8]  | 
528 | 488 | ; CHECK-NEXT:    str s0, [x1]  | 
529 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
530 | 489 | ; CHECK-NEXT:    ret  | 
531 | 490 | ;  | 
532 | 491 | ; BE-LABEL: load_ext_to_64bits:  | 
@@ -617,24 +576,20 @@ entry:  | 
617 | 576 | define void @load_ext_add_to_64bits(ptr %src, ptr %dst) {  | 
618 | 577 | ; CHECK-LABEL: load_ext_add_to_64bits:  | 
619 | 578 | ; CHECK:       ; %bb.0: ; %entry  | 
620 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
621 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
622 |  | -; CHECK-NEXT:    ldrh w8, [x0]  | 
 | 579 | +; CHECK-NEXT:    ldrb w8, [x0, #2]  | 
 | 580 | +; CHECK-NEXT:    ldrh w9, [x0]  | 
 | 581 | +; CHECK-NEXT:    orr w8, w9, w8, lsl #16  | 
 | 582 | +; CHECK-NEXT:    fmov s0, w8  | 
623 | 583 | ; CHECK-NEXT:  Lloh2:  | 
624 |  | -; CHECK-NEXT:    adrp x9, lCPI15_0@PAGE  | 
625 |  | -; CHECK-NEXT:    strh w8, [sp, #12]  | 
626 |  | -; CHECK-NEXT:    add x8, x0, #2  | 
627 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
 | 584 | +; CHECK-NEXT:    adrp x8, lCPI15_0@PAGE  | 
 | 585 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
628 | 586 | ; CHECK-NEXT:  Lloh3:  | 
629 |  | -; CHECK-NEXT:    ldr d1, [x9, lCPI15_0@PAGEOFF]  | 
630 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
631 |  | -; CHECK-NEXT:    ld1.b { v0 }[4], [x8]  | 
 | 587 | +; CHECK-NEXT:    ldr d1, [x8, lCPI15_0@PAGEOFF]  | 
632 | 588 | ; CHECK-NEXT:    add x8, x1, #4  | 
633 | 589 | ; CHECK-NEXT:    bic.4h v0, #255, lsl #8  | 
634 | 590 | ; CHECK-NEXT:    add.4h v0, v0, v1  | 
635 | 591 | ; CHECK-NEXT:    st1.h { v0 }[2], [x8]  | 
636 | 592 | ; CHECK-NEXT:    str s0, [x1]  | 
637 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
638 | 593 | ; CHECK-NEXT:    ret  | 
639 | 594 | ; CHECK-NEXT:    .loh AdrpLdr Lloh2, Lloh3  | 
640 | 595 | ;  | 
@@ -883,24 +838,21 @@ define void @shift_trunc_volatile_store(ptr %src, ptr %dst) {  | 
883 | 838 | define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {  | 
884 | 839 | ; CHECK-LABEL: load_v3i8_zext_to_3xi32_add_trunc_store:  | 
885 | 840 | ; CHECK:       ; %bb.0:  | 
886 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
887 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
888 |  | -; CHECK-NEXT:    ldrh w9, [x0]  | 
 | 841 | +; CHECK-NEXT:    ldrb w10, [x0, #2]  | 
889 | 842 | ; CHECK-NEXT:  Lloh4:  | 
890 | 843 | ; CHECK-NEXT:    adrp x8, lCPI22_0@PAGE  | 
891 |  | -; CHECK-NEXT:    strh w9, [sp, #12]  | 
 | 844 | +; CHECK-NEXT:    ldrh w11, [x0]  | 
892 | 845 | ; CHECK-NEXT:    add x9, x0, #2  | 
893 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
894 | 846 | ; CHECK-NEXT:  Lloh5:  | 
895 | 847 | ; CHECK-NEXT:    ldr q1, [x8, lCPI22_0@PAGEOFF]  | 
896 | 848 | ; CHECK-NEXT:    add x8, x0, #1  | 
897 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
898 |  | -; CHECK-NEXT:    ld1.b { v0 }[4], [x9]  | 
 | 849 | +; CHECK-NEXT:    orr w10, w11, w10, lsl #16  | 
 | 850 | +; CHECK-NEXT:    fmov s0, w10  | 
 | 851 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
899 | 852 | ; CHECK-NEXT:    uaddw.4s v0, v1, v0  | 
900 |  | -; CHECK-NEXT:    st1.b { v0 }[4], [x8]  | 
901 | 853 | ; CHECK-NEXT:    st1.b { v0 }[8], [x9]  | 
902 | 854 | ; CHECK-NEXT:    st1.b { v0 }[0], [x0]  | 
903 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
 | 855 | +; CHECK-NEXT:    st1.b { v0 }[4], [x8]  | 
904 | 856 | ; CHECK-NEXT:    ret  | 
905 | 857 | ; CHECK-NEXT:    .loh AdrpLdr Lloh4, Lloh5  | 
906 | 858 | ;  | 
@@ -939,24 +891,21 @@ define void @load_v3i8_zext_to_3xi32_add_trunc_store(ptr %src) {  | 
939 | 891 | define void @load_v3i8_sext_to_3xi32_add_trunc_store(ptr %src) {  | 
940 | 892 | ; CHECK-LABEL: load_v3i8_sext_to_3xi32_add_trunc_store:  | 
941 | 893 | ; CHECK:       ; %bb.0:  | 
942 |  | -; CHECK-NEXT:    sub sp, sp, #16  | 
943 |  | -; CHECK-NEXT:    .cfi_def_cfa_offset 16  | 
944 |  | -; CHECK-NEXT:    ldrh w9, [x0]  | 
 | 894 | +; CHECK-NEXT:    ldrb w10, [x0, #2]  | 
945 | 895 | ; CHECK-NEXT:  Lloh6:  | 
946 | 896 | ; CHECK-NEXT:    adrp x8, lCPI23_0@PAGE  | 
947 |  | -; CHECK-NEXT:    strh w9, [sp, #12]  | 
 | 897 | +; CHECK-NEXT:    ldrh w11, [x0]  | 
948 | 898 | ; CHECK-NEXT:    add x9, x0, #2  | 
949 |  | -; CHECK-NEXT:    ldr s0, [sp, #12]  | 
950 | 899 | ; CHECK-NEXT:  Lloh7:  | 
951 | 900 | ; CHECK-NEXT:    ldr q1, [x8, lCPI23_0@PAGEOFF]  | 
952 | 901 | ; CHECK-NEXT:    add x8, x0, #1  | 
953 |  | -; CHECK-NEXT:    ushll.8h v0, v0, #0  | 
954 |  | -; CHECK-NEXT:    ld1.b { v0 }[4], [x9]  | 
 | 902 | +; CHECK-NEXT:    orr w10, w11, w10, lsl #16  | 
 | 903 | +; CHECK-NEXT:    fmov s0, w10  | 
 | 904 | +; CHECK-NEXT:    zip1.8b v0, v0, v0  | 
955 | 905 | ; CHECK-NEXT:    uaddw.4s v0, v1, v0  | 
956 |  | -; CHECK-NEXT:    st1.b { v0 }[4], [x8]  | 
957 | 906 | ; CHECK-NEXT:    st1.b { v0 }[8], [x9]  | 
958 | 907 | ; CHECK-NEXT:    st1.b { v0 }[0], [x0]  | 
959 |  | -; CHECK-NEXT:    add sp, sp, #16  | 
 | 908 | +; CHECK-NEXT:    st1.b { v0 }[4], [x8]  | 
960 | 909 | ; CHECK-NEXT:    ret  | 
961 | 910 | ; CHECK-NEXT:    .loh AdrpLdr Lloh6, Lloh7  | 
962 | 911 | ;  | 
 | 
0 commit comments