@@ -1471,9 +1471,23 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
14711471 op2 = userIntrin->Op (1 );
14721472 }
14731473
1474- NamedIntrinsic intrinsic =
1475- GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp (comp, GT_AND_NOT, op1, op2, simdBaseType,
1476- simdSize, false );
1474+ NamedIntrinsic intrinsic = NI_Illegal;
1475+
1476+ if (comp->IsBaselineSimdIsaSupported ())
1477+ {
1478+ intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp (comp, GT_AND_NOT, op1, op2,
1479+ simdBaseType, simdSize, false );
1480+ }
1481+ else
1482+ {
1483+ // We need to ensure we optimize even if SSE2 is disabled
1484+
1485+ assert (simdBaseType == TYP_FLOAT);
1486+ assert (simdSize <= 16 );
1487+
1488+ intrinsic = NI_SSE_AndNot;
1489+ }
1490+
14771491 userIntrin->ResetHWIntrinsicId (intrinsic, comp, op1, op2);
14781492
14791493 return nextNode;
@@ -1487,24 +1501,55 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
14871501 BlockRange ().Remove (node);
14881502 op3 = userIntrin->Op (2 );
14891503
1504+ // Tracks which two operands get used first
1505+ TernaryLogicUseFlags firstOpUseFlags = TernaryLogicUseFlags::AB;
1506+
14901507 if (op3 == node)
14911508 {
1492- op3 = userIntrin->Op (1 );
1509+ if (userOper == GT_AND_NOT)
1510+ {
1511+ op3 = op2;
1512+ op2 = op1;
1513+ op1 = userIntrin->Op (1 );
1514+
1515+ // AND_NOT isn't commutative so we need to shift parameters down
1516+ firstOpUseFlags = TernaryLogicUseFlags::BC;
1517+ }
1518+ else
1519+ {
1520+ op3 = userIntrin->Op (1 );
1521+ }
14931522 }
14941523
14951524 uint8_t controlByte = 0x00 ;
14961525
14971526 if ((userOper == GT_XOR) && op3->IsVectorAllBitsSet ())
14981527 {
1499- // We're being used by what is actually GT_NOT, so we
1500- // need to shift parameters down so that A is unused
1528+ // We have XOR(OP(A, B), AllBitsSet)
1529+ // A: op1
1530+ // B: op2
1531+ // C: op3 (AllBitsSet)
1532+ //
1533+ // We want A to be the unused parameter so swap it around
1534+ // A: op3 (AllBitsSet)
1535+ // B: op1
1536+ // C: op2
1537+ //
1538+ // This gives us NOT(OP(B, C))
1539+
1540+ assert (firstOpUseFlags == TernaryLogicUseFlags::AB);
15011541
15021542 std::swap (op2, op3);
15031543 std::swap (op1, op2);
15041544
15051545 if (isOperNot)
15061546 {
1507- // We have what is actually a double not, so just return op2
1547+ // We have NOT(XOR(B, AllBitsSet))
1548+ // A: op3 (AllBitsSet)
1549+ // B: op1
1550+ // C: op2 (AllBitsSet)
1551+ //
1552+ // This represents a double not, so so just return op2
15081553 // which is the only actual value now that the parameters
15091554 // were shifted around
15101555
@@ -1538,20 +1583,64 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
15381583 }
15391584 else if (isOperNot)
15401585 {
1541- // A is unused, so we just want OP(NOT(B), C)
1586+ if (firstOpUseFlags == TernaryLogicUseFlags::AB)
1587+ {
1588+ // We have OP(XOR(A, AllBitsSet), C)
1589+ // A: op1
1590+ // B: op2 (AllBitsSet)
1591+ // C: op3
1592+ //
1593+ // We want A to be the unused parameter so swap it around
1594+ // A: op2 (AllBitsSet)
1595+ // B: op1
1596+ // C: op3
1597+ //
1598+ // This gives us OP(NOT(B), C)
15421599
1543- assert (op2->IsVectorAllBitsSet ());
1544- std::swap (op1, op2);
1600+ assert (op2->IsVectorAllBitsSet ());
1601+ std::swap (op1, op2);
15451602
1546- controlByte = static_cast <uint8_t >(~B);
1547- controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, controlByte, C);
1603+ controlByte = static_cast <uint8_t >(~B);
1604+ controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, controlByte, C);
1605+ }
1606+ else
1607+ {
1608+ // We have OP(A, XOR(B, AllBitsSet))
1609+ // A: op1
1610+ // B: op2
1611+ // C: op3 (AllBitsSet)
1612+ //
1613+ // We want A to be the unused parameter so swap it around
1614+ // A: op3 (AllBitsSet)
1615+ // B: op1
1616+ // C: op2
1617+ //
1618+ // This gives us OP(B, NOT(C))
1619+
1620+ assert (firstOpUseFlags == TernaryLogicUseFlags::BC);
1621+
1622+ assert (op3->IsVectorAllBitsSet ());
1623+ std::swap (op2, op3);
1624+ std::swap (op1, op2);
1625+
1626+ controlByte = static_cast <uint8_t >(~C);
1627+ controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, B, controlByte);
1628+ }
15481629 }
1549- else
1630+ else if (firstOpUseFlags == TernaryLogicUseFlags::AB)
15501631 {
15511632 // We have OP2(OP1(A, B), C)
15521633 controlByte = TernaryLogicInfo::GetTernaryControlByte (oper, A, B);
15531634 controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, controlByte, C);
15541635 }
1636+ else
1637+ {
1638+ // We have OP2(A, OP1(B, C))
1639+ assert (firstOpUseFlags == TernaryLogicUseFlags::BC);
1640+
1641+ controlByte = TernaryLogicInfo::GetTernaryControlByte (oper, B, C);
1642+ controlByte = TernaryLogicInfo::GetTernaryControlByte (userOper, A, controlByte);
1643+ }
15551644
15561645 NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic;
15571646
0 commit comments