@@ -1516,3 +1516,203 @@ uint64x2_t test_splatq_laneq_u64(uint64x2_t v) {
15161516  // LLVM: [[RES:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> zeroinitializer 
15171517  // LLVM: ret <2 x i64> [[RES]] 
15181518}
1519+ 
1520+ int16x4_t  test_vpadal_s8 (int16x4_t  a , int8x8_t  b ) {
1521+   return  vpadal_s8 (a , b );
1522+ 
1523+   // CIR-LABEL: vpadal_s8 
1524+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : 
1525+   // CIR-SAME: (!cir.vector<!s8i x 8>) -> !cir.vector<!s16i x 4> 
1526+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s16i x 4> 
1527+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s16i x 4> 
1528+ 
1529+   // LLVM: {{.*}}test_vpadal_s8(<4 x i16>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]]) 
1530+   // LLVM:   [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8> 
1531+   // LLVM:   [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> [[b]]) 
1532+   // LLVM:   [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], [[a]] 
1533+   // LLVM:   ret <4 x i16> [[TMP1]] 
1534+ }
1535+ 
1536+ int32x2_t  test_vpadal_s16 (int32x2_t  a , int16x4_t  b ) {
1537+   return  vpadal_s16 (a , b );
1538+ 
1539+   // CIR-LABEL: vpadal_s16 
1540+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : 
1541+   // CIR-SAME: (!cir.vector<!s16i x 4>) -> !cir.vector<!s32i x 2> 
1542+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s32i x 2> 
1543+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s32i x 2> 
1544+ 
1545+   // LLVM: {{.*}}test_vpadal_s16(<2 x i32>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]]) 
1546+   // LLVM:   [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8> 
1547+   // LLVM:   [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8> 
1548+   // LLVM:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> [[b]]) 
1549+   // LLVM:   [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], [[a]] 
1550+   // LLVM:   ret <2 x i32> [[TMP2]] 
1551+ }
1552+ 
1553+ int64x1_t  test_vpadal_s32 (int64x1_t  a , int32x2_t  b ) {
1554+   return  vpadal_s32 (a , b );
1555+ 
1556+   // CIR-LABEL: vpadal_s32 
1557+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : 
1558+   // CIR-SAME: (!cir.vector<!s32i x 2>) -> !cir.vector<!s64i x 1> 
1559+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!s64i x 1> 
1560+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s64i x 1> 
1561+ 
1562+   // LLVM: {{.*}}test_vpadal_s32(<1 x i64>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]]) 
1563+   // LLVM:   [[TMP0:%.*]] = bitcast <1 x i64> [[a]] to <8 x i8> 
1564+   // LLVM:   [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8> 
1565+   // LLVM:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> [[b]]) 
1566+   // LLVM:   [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], [[a]] 
1567+   // LLVM:   ret <1 x i64> [[TMP2]] 
1568+ }
1569+ 
1570+ uint16x4_t  test_vpadal_u8 (uint16x4_t  a , uint8x8_t  b ) {
1571+   return  vpadal_u8 (a , b );
1572+ 
1573+   // CIR-LABEL: vpadal_u8 
1574+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : 
1575+   // CIR-SAME: (!cir.vector<!u8i x 8>) -> !cir.vector<!u16i x 4> 
1576+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u16i x 4> 
1577+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u16i x 4> 
1578+ 
1579+   // LLVM: {{.*}}test_vpadal_u8(<4 x i16>{{.*}}[[a:%.*]], <8 x i8>{{.*}}[[b:%.*]]) 
1580+   // LLVM:   [[TMP0:%.*]] = bitcast <4 x i16> [[a]] to <8 x i8> 
1581+   // LLVM:   [[VPADAL_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> [[b]]) 
1582+   // LLVM:   [[TMP1:%.*]] = add <4 x i16> [[VPADAL_I]], [[a]] 
1583+   // LLVM:   ret <4 x i16> [[TMP1]] 
1584+ }
1585+ 
1586+ uint32x2_t  test_vpadal_u16 (uint32x2_t  a , uint16x4_t  b ) {
1587+   return  vpadal_u16 (a , b );
1588+ 
1589+   // CIR-LABEL: vpadal_u16 
1590+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : 
1591+   // CIR-SAME: (!cir.vector<!u16i x 4>) -> !cir.vector<!u32i x 2> 
1592+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u32i x 2> 
1593+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u32i x 2> 
1594+ 
1595+   // LLVM: {{.*}}test_vpadal_u16(<2 x i32>{{.*}}[[a:%.*]], <4 x i16>{{.*}}[[b:%.*]]) 
1596+   // LLVM:   [[TMP0:%.*]] = bitcast <2 x i32> [[a]] to <8 x i8> 
1597+   // LLVM:   [[TMP1:%.*]] = bitcast <4 x i16> [[b]] to <8 x i8> 
1598+   // LLVM:   [[VPADAL1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> [[b]]) 
1599+   // LLVM:   [[TMP2:%.*]] = add <2 x i32> [[VPADAL1_I]], [[a]] 
1600+   // LLVM:   ret <2 x i32> [[TMP2]] 
1601+ }
1602+ 
1603+ uint64x1_t  test_vpadal_u32 (uint64x1_t  a , uint32x2_t  b ) {
1604+   return  vpadal_u32 (a , b );
1605+ 
1606+   // CIR-LABEL: vpadal_u32 
1607+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : 
1608+   // CIR-SAME: (!cir.vector<!u32i x 2>) -> !cir.vector<!u64i x 1> 
1609+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 8>), !cir.vector<!u64i x 1> 
1610+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u64i x 1> 
1611+ 
1612+   // LLVM: {{.*}}test_vpadal_u32(<1 x i64>{{.*}}[[a:%.*]], <2 x i32>{{.*}}[[b:%.*]]) 
1613+   // LLVM:   [[TMP0:%.*]] = bitcast <1 x i64> [[a]] to <8 x i8> 
1614+   // LLVM:   [[TMP1:%.*]] = bitcast <2 x i32> [[b]] to <8 x i8> 
1615+   // LLVM:   [[VPADAL1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> [[b]]) 
1616+   // LLVM:   [[TMP2:%.*]] = add <1 x i64> [[VPADAL1_I]], [[a]] 
1617+   // LLVM:   ret <1 x i64> [[TMP2]] 
1618+ }
1619+ 
1620+ int16x8_t  test_vpadalq_s8 (int16x8_t  a , int8x16_t  b ) {
1621+   return  vpadalq_s8 (a , b );
1622+ 
1623+   // CIR-LABEL: vpadalq_s8 
1624+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : 
1625+   // CIR-SAME: (!cir.vector<!s8i x 16>) -> !cir.vector<!s16i x 8> 
1626+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s16i x 8> 
1627+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s16i x 8> 
1628+ 
1629+   // LLVM: {{.*}}test_vpadalq_s8(<8 x i16>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]]) 
1630+   // LLVM:   [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8> 
1631+   // LLVM:   [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> [[b]]) 
1632+   // LLVM:   [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], [[a]] 
1633+   // LLVM:   ret <8 x i16> [[TMP1]] 
1634+ }
1635+ 
1636+ int32x4_t  test_vpadalq_s16 (int32x4_t  a , int16x8_t  b ) {
1637+   return  vpadalq_s16 (a , b );
1638+ 
1639+   // CIR-LABEL: vpadalq_s16 
1640+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : 
1641+   // CIR-SAME: (!cir.vector<!s16i x 8>) -> !cir.vector<!s32i x 4> 
1642+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s32i x 4> 
1643+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s32i x 4> 
1644+ 
1645+   // LLVM: {{.*}}test_vpadalq_s16(<4 x i32>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]]) 
1646+   // LLVM:   [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8> 
1647+   // LLVM:   [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8> 
1648+   // LLVM:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> [[b]]) 
1649+   // LLVM:   [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], [[a]] 
1650+   // LLVM:   ret <4 x i32> [[TMP2]] 
1651+ }
1652+ 
1653+ int64x2_t  test_vpadalq_s32 (int64x2_t  a , int32x4_t  b ) {
1654+   return  vpadalq_s32 (a , b );
1655+ 
1656+   // CIR-LABEL: vpadalq_s32 
1657+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.saddlp" {{%.*}} : 
1658+   // CIR-SAME: (!cir.vector<!s32i x 4>) -> !cir.vector<!s64i x 2> 
1659+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!s64i x 2> 
1660+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!s64i x 2> 
1661+ 
1662+   // LLVM: {{.*}}test_vpadalq_s32(<2 x i64>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]]) 
1663+   // LLVM:   [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8> 
1664+   // LLVM:   [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8> 
1665+   // LLVM:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> [[b]]) 
1666+   // LLVM:   [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], [[a]] 
1667+   // LLVM:   ret <2 x i64> [[TMP2]] 
1668+ }
1669+ 
1670+ uint16x8_t  test_vpadalq_u8 (uint16x8_t  a , uint8x16_t  b ) {
1671+   return  vpadalq_u8 (a , b );
1672+ 
1673+   // CIR-LABEL: vpadalq_u8 
1674+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : 
1675+   // CIR-SAME: (!cir.vector<!u8i x 16>) -> !cir.vector<!u16i x 8> 
1676+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u16i x 8> 
1677+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u16i x 8> 
1678+ 
1679+   // LLVM: {{.*}}test_vpadalq_u8(<8 x i16>{{.*}}[[a:%.*]], <16 x i8>{{.*}}[[b:%.*]]) 
1680+   // LLVM:   [[TMP0:%.*]] = bitcast <8 x i16> [[a]] to <16 x i8> 
1681+   // LLVM:   [[VPADAL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> [[b]]) 
1682+   // LLVM:   [[TMP1:%.*]] = add <8 x i16> [[VPADAL_I]], [[a]] 
1683+   // LLVM:   ret <8 x i16> [[TMP1]] 
1684+ }
1685+ 
1686+ uint32x4_t  test_vpadalq_u16 (uint32x4_t  a , uint16x8_t  b ) {
1687+   return  vpadalq_u16 (a , b );
1688+ 
1689+   // CIR-LABEL: vpadalq_u16 
1690+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : 
1691+   // CIR-SAME: (!cir.vector<!u16i x 8>) -> !cir.vector<!u32i x 4> 
1692+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u32i x 4> 
1693+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u32i x 4> 
1694+ 
1695+   // LLVM: {{.*}}test_vpadalq_u16(<4 x i32>{{.*}}[[a:%.*]], <8 x i16>{{.*}}[[b:%.*]]) 
1696+   // LLVM:   [[TMP0:%.*]] = bitcast <4 x i32> [[a]] to <16 x i8> 
1697+   // LLVM:   [[TMP1:%.*]] = bitcast <8 x i16> [[b]] to <16 x i8> 
1698+   // LLVM:   [[VPADAL1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> [[b]]) 
1699+   // LLVM:   [[TMP2:%.*]] = add <4 x i32> [[VPADAL1_I]], [[a]] 
1700+   // LLVM:   ret <4 x i32> [[TMP2]] 
1701+ }
1702+ 
1703+ uint64x2_t  test_vpadalq_u32 (uint64x2_t  a , uint32x4_t  b ) {
1704+   return  vpadalq_u32 (a , b );
1705+ 
1706+   // CIR-LABEL: vpadalq_u32 
1707+   // CIR: [[VPADAL_I:%.*]] = cir.llvm.intrinsic "aarch64.neon.uaddlp" {{%.*}} : 
1708+   // CIR-SAME: (!cir.vector<!u32i x 4>) -> !cir.vector<!u64i x 2> 
1709+   // CIR: [[a:%.*]] = cir.cast(bitcast, {{%.*}} : !cir.vector<!s8i x 16>), !cir.vector<!u64i x 2> 
1710+   // CIR: {{%.*}} = cir.binop(add, [[VPADAL_I]], [[a]]) : !cir.vector<!u64i x 2> 
1711+ 
1712+   // LLVM: {{.*}}test_vpadalq_u32(<2 x i64>{{.*}}[[a:%.*]], <4 x i32>{{.*}}[[b:%.*]]) 
1713+   // LLVM:   [[TMP0:%.*]] = bitcast <2 x i64> [[a]] to <16 x i8> 
1714+   // LLVM:   [[TMP1:%.*]] = bitcast <4 x i32> [[b]] to <16 x i8> 
1715+   // LLVM:   [[VPADAL1_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> [[b]]) 
1716+   // LLVM:   [[TMP2:%.*]] = add <2 x i64> [[VPADAL1_I]], [[a]] 
1717+   // LLVM:   ret <2 x i64> [[TMP2]] 
1718+ }
0 commit comments