@@ -355,15 +355,41 @@ define i128 @i128_mul(i128 %x, i128 %y) {
355355define { i128 , i8 } @i128_checked_mul (i128 %x , i128 %y ) {
356356; CHECK-LABEL: i128_checked_mul:
357357; CHECK: // %bb.0:
358- ; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
359- ; CHECK-NEXT: .cfi_def_cfa_offset 16
360- ; CHECK-NEXT: .cfi_offset w30, -16
361- ; CHECK-NEXT: add x4, sp, #8
362- ; CHECK-NEXT: bl __muloti4
363- ; CHECK-NEXT: ldr x8, [sp, #8]
364- ; CHECK-NEXT: cmp x8, #0
358+ ; CHECK-NEXT: asr x8, x1, #63
359+ ; CHECK-NEXT: asr x11, x3, #63
360+ ; CHECK-NEXT: umulh x13, x0, x2
361+ ; CHECK-NEXT: mul x9, x2, x8
362+ ; CHECK-NEXT: umulh x10, x2, x8
363+ ; CHECK-NEXT: umulh x12, x11, x0
364+ ; CHECK-NEXT: mul x14, x1, x2
365+ ; CHECK-NEXT: add x10, x10, x9
366+ ; CHECK-NEXT: madd x8, x3, x8, x10
367+ ; CHECK-NEXT: madd x10, x11, x1, x12
368+ ; CHECK-NEXT: mul x11, x11, x0
369+ ; CHECK-NEXT: umulh x12, x1, x2
370+ ; CHECK-NEXT: mul x15, x0, x3
371+ ; CHECK-NEXT: add x10, x10, x11
372+ ; CHECK-NEXT: adds x9, x11, x9
373+ ; CHECK-NEXT: umulh x16, x0, x3
374+ ; CHECK-NEXT: adc x10, x10, x8
375+ ; CHECK-NEXT: adds x8, x14, x13
376+ ; CHECK-NEXT: cinc x12, x12, hs
377+ ; CHECK-NEXT: mul x11, x1, x3
378+ ; CHECK-NEXT: adds x8, x15, x8
379+ ; CHECK-NEXT: umulh x13, x1, x3
380+ ; CHECK-NEXT: mov x1, x8
381+ ; CHECK-NEXT: cinc x14, x16, hs
382+ ; CHECK-NEXT: adds x12, x12, x14
383+ ; CHECK-NEXT: mul x0, x0, x2
384+ ; CHECK-NEXT: cset w14, hs
385+ ; CHECK-NEXT: adds x11, x11, x12
386+ ; CHECK-NEXT: asr x12, x8, #63
387+ ; CHECK-NEXT: adc x13, x13, x14
388+ ; CHECK-NEXT: adds x9, x11, x9
389+ ; CHECK-NEXT: adc x10, x13, x10
390+ ; CHECK-NEXT: cmp x9, x12
391+ ; CHECK-NEXT: ccmp x10, x12, #0, eq
365392; CHECK-NEXT: cset w2, eq
366- ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
367393; CHECK-NEXT: ret
368394 %1 = tail call { i128 , i1 } @llvm.smul.with.overflow.i128 (i128 %x , i128 %y )
369395 %2 = extractvalue { i128 , i1 } %1 , 0
@@ -378,15 +404,41 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
378404define { i128 , i8 } @i128_overflowing_mul (i128 %x , i128 %y ) {
379405; CHECK-LABEL: i128_overflowing_mul:
380406; CHECK: // %bb.0:
381- ; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
382- ; CHECK-NEXT: .cfi_def_cfa_offset 16
383- ; CHECK-NEXT: .cfi_offset w30, -16
384- ; CHECK-NEXT: add x4, sp, #8
385- ; CHECK-NEXT: bl __muloti4
386- ; CHECK-NEXT: ldr x8, [sp, #8]
387- ; CHECK-NEXT: cmp x8, #0
407+ ; CHECK-NEXT: asr x8, x1, #63
408+ ; CHECK-NEXT: asr x11, x3, #63
409+ ; CHECK-NEXT: umulh x13, x0, x2
410+ ; CHECK-NEXT: mul x9, x2, x8
411+ ; CHECK-NEXT: umulh x10, x2, x8
412+ ; CHECK-NEXT: umulh x12, x11, x0
413+ ; CHECK-NEXT: mul x14, x1, x2
414+ ; CHECK-NEXT: add x10, x10, x9
415+ ; CHECK-NEXT: madd x8, x3, x8, x10
416+ ; CHECK-NEXT: madd x10, x11, x1, x12
417+ ; CHECK-NEXT: mul x11, x11, x0
418+ ; CHECK-NEXT: umulh x12, x1, x2
419+ ; CHECK-NEXT: mul x15, x0, x3
420+ ; CHECK-NEXT: add x10, x10, x11
421+ ; CHECK-NEXT: adds x9, x11, x9
422+ ; CHECK-NEXT: umulh x16, x0, x3
423+ ; CHECK-NEXT: adc x10, x10, x8
424+ ; CHECK-NEXT: adds x8, x14, x13
425+ ; CHECK-NEXT: cinc x12, x12, hs
426+ ; CHECK-NEXT: mul x11, x1, x3
427+ ; CHECK-NEXT: adds x8, x15, x8
428+ ; CHECK-NEXT: umulh x13, x1, x3
429+ ; CHECK-NEXT: mov x1, x8
430+ ; CHECK-NEXT: cinc x14, x16, hs
431+ ; CHECK-NEXT: adds x12, x12, x14
432+ ; CHECK-NEXT: mul x0, x0, x2
433+ ; CHECK-NEXT: cset w14, hs
434+ ; CHECK-NEXT: adds x11, x11, x12
435+ ; CHECK-NEXT: asr x12, x8, #63
436+ ; CHECK-NEXT: adc x13, x13, x14
437+ ; CHECK-NEXT: adds x9, x11, x9
438+ ; CHECK-NEXT: adc x10, x13, x10
439+ ; CHECK-NEXT: cmp x9, x12
440+ ; CHECK-NEXT: ccmp x10, x12, #0, eq
388441; CHECK-NEXT: cset w2, ne
389- ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
390442; CHECK-NEXT: ret
391443 %1 = tail call { i128 , i1 } @llvm.smul.with.overflow.i128 (i128 %x , i128 %y )
392444 %2 = extractvalue { i128 , i1 } %1 , 0
@@ -400,26 +452,46 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
400452define i128 @i128_saturating_mul (i128 %x , i128 %y ) {
401453; CHECK-LABEL: i128_saturating_mul:
402454; CHECK: // %bb.0:
403- ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
404- ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
405- ; CHECK-NEXT: .cfi_def_cfa_offset 32
406- ; CHECK-NEXT: .cfi_offset w19, -8
407- ; CHECK-NEXT: .cfi_offset w20, -16
408- ; CHECK-NEXT: .cfi_offset w30, -32
409- ; CHECK-NEXT: add x4, sp, #8
410- ; CHECK-NEXT: mov x19, x3
411- ; CHECK-NEXT: mov x20, x1
412- ; CHECK-NEXT: str xzr, [sp, #8]
413- ; CHECK-NEXT: bl __muloti4
414- ; CHECK-NEXT: eor x8, x19, x20
415- ; CHECK-NEXT: ldr x9, [sp, #8]
416- ; CHECK-NEXT: asr x8, x8, #63
417- ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
418- ; CHECK-NEXT: cmp x9, #0
419- ; CHECK-NEXT: eor x10, x8, #0x7fffffffffffffff
420- ; CHECK-NEXT: csinv x0, x0, x8, eq
421- ; CHECK-NEXT: csel x1, x10, x1, ne
422- ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
455+ ; CHECK-NEXT: asr x8, x1, #63
456+ ; CHECK-NEXT: asr x11, x3, #63
457+ ; CHECK-NEXT: umulh x13, x0, x2
458+ ; CHECK-NEXT: mul x9, x2, x8
459+ ; CHECK-NEXT: umulh x10, x2, x8
460+ ; CHECK-NEXT: umulh x12, x11, x0
461+ ; CHECK-NEXT: mul x14, x1, x2
462+ ; CHECK-NEXT: add x10, x10, x9
463+ ; CHECK-NEXT: madd x8, x3, x8, x10
464+ ; CHECK-NEXT: madd x10, x11, x1, x12
465+ ; CHECK-NEXT: mul x11, x11, x0
466+ ; CHECK-NEXT: umulh x12, x1, x2
467+ ; CHECK-NEXT: mul x16, x0, x3
468+ ; CHECK-NEXT: add x10, x10, x11
469+ ; CHECK-NEXT: adds x9, x11, x9
470+ ; CHECK-NEXT: umulh x15, x0, x3
471+ ; CHECK-NEXT: adc x8, x10, x8
472+ ; CHECK-NEXT: adds x10, x14, x13
473+ ; CHECK-NEXT: cinc x12, x12, hs
474+ ; CHECK-NEXT: mul x17, x1, x3
475+ ; CHECK-NEXT: adds x10, x16, x10
476+ ; CHECK-NEXT: umulh x11, x1, x3
477+ ; CHECK-NEXT: cinc x13, x15, hs
478+ ; CHECK-NEXT: adds x12, x12, x13
479+ ; CHECK-NEXT: cset w13, hs
480+ ; CHECK-NEXT: adds x12, x17, x12
481+ ; CHECK-NEXT: adc x11, x11, x13
482+ ; CHECK-NEXT: adds x9, x12, x9
483+ ; CHECK-NEXT: asr x12, x10, #63
484+ ; CHECK-NEXT: mul x13, x0, x2
485+ ; CHECK-NEXT: adc x8, x11, x8
486+ ; CHECK-NEXT: eor x11, x3, x1
487+ ; CHECK-NEXT: eor x8, x8, x12
488+ ; CHECK-NEXT: eor x9, x9, x12
489+ ; CHECK-NEXT: asr x11, x11, #63
490+ ; CHECK-NEXT: orr x8, x9, x8
491+ ; CHECK-NEXT: eor x9, x11, #0x7fffffffffffffff
492+ ; CHECK-NEXT: cmp x8, #0
493+ ; CHECK-NEXT: csel x1, x9, x10, ne
494+ ; CHECK-NEXT: csinv x0, x13, x11, eq
423495; CHECK-NEXT: ret
424496 %1 = tail call { i128 , i1 } @llvm.smul.with.overflow.i128 (i128 %x , i128 %y )
425497 %2 = extractvalue { i128 , i1 } %1 , 0
0 commit comments