@@ -4050,47 +4050,49 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode)
40504050        GenTree*            nestedOp1    = nestedCndSel->Op (1 );
40514051        GenTree*            nestedOp2    = nestedCndSel->Op (2 );
40524052        assert (varTypeIsMask (nestedOp1));
4053-         assert (nestedOp2->OperIsHWIntrinsic ());
40544053
4055-         NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic ()->GetHWIntrinsicId ();
4056- 
4057-         //  If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if
4058-         //  op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this optimisation
4059-         //  when the nestedOp is a reduce operation.
4060- 
4061-         if  (nestedOp1->IsMaskAllBitsSet () && !HWIntrinsicInfo::IsReduceOperation (nestedOp2Id) &&
4062-             (!HWIntrinsicInfo::IsZeroingMaskedOperation (nestedOp2Id) || op3->IsVectorZero ()))
4054+         if  (nestedOp2->OperIsHWIntrinsic ())
40634055        {
4064-             GenTree* nestedOp2 = nestedCndSel->Op (2 );
4065-             GenTree* nestedOp3 = nestedCndSel->Op (3 );
4056+             NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic ()->GetHWIntrinsicId ();
40664057
4067-             JITDUMP ( " lowering  nested ConditionalSelect HWIntrinisic (before): \n " ); 
4068-             DISPTREERANGE ( BlockRange (), cndSelNode); 
4069-             JITDUMP ( " \n " ); 
4058+             //  If the  nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if 
4059+             //  op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this 
4060+             //  optimisation when the nestedOp is a reduce operation. 
40704061
4071-             //  Transform:
4072-             // 
4073-             //  CndSel(mask, CndSel(AllTrue, embeddedMask(trueValOp2), trueValOp3), op3) to
4074-             //  CndSel(mask, embedded(trueValOp2), op3)
4075-             // 
4076-             cndSelNode->Op (2 ) = nestedCndSel->Op (2 );
4077-             if  (nestedOp3->IsMaskZero ())
4078-             {
4079-                 BlockRange ().Remove (nestedOp3);
4080-             }
4081-             else 
4062+             if  (nestedOp1->IsMaskAllBitsSet () && !HWIntrinsicInfo::IsReduceOperation (nestedOp2Id) &&
4063+                 (!HWIntrinsicInfo::IsZeroingMaskedOperation (nestedOp2Id) || op3->IsVectorZero ()))
40824064            {
4083-                 nestedOp3->SetUnusedValue ();
4084-             }
4065+                 GenTree* nestedOp2 = nestedCndSel->Op (2 );
4066+                 GenTree* nestedOp3 = nestedCndSel->Op (3 );
4067+ 
4068+                 JITDUMP (" lowering nested ConditionalSelect HWIntrinisic (before):\n "  );
4069+                 DISPTREERANGE (BlockRange (), cndSelNode);
4070+                 JITDUMP (" \n "  );
4071+ 
4072+                 //  Transform:
4073+                 // 
4074+                 //  CndSel(mask, CndSel(AllTrue, embeddedMask(trueValOp2), trueValOp3), op3) to
4075+                 //  CndSel(mask, embedded(trueValOp2), op3)
4076+                 // 
4077+                 cndSelNode->Op (2 ) = nestedCndSel->Op (2 );
4078+                 if  (nestedOp3->IsMaskZero ())
4079+                 {
4080+                     BlockRange ().Remove (nestedOp3);
4081+                 }
4082+                 else 
4083+                 {
4084+                     nestedOp3->SetUnusedValue ();
4085+                 }
40854086
4086-             BlockRange ().Remove (nestedOp1);
4087-             BlockRange ().Remove (nestedCndSel);
4087+                  BlockRange ().Remove (nestedOp1);
4088+                  BlockRange ().Remove (nestedCndSel);
40884089
4089-             JITDUMP (" lowering nested ConditionalSelect HWIntrinisic (after):\n "  );
4090-             DISPTREERANGE (BlockRange (), cndSelNode);
4091-             JITDUMP (" \n "  );
4090+                  JITDUMP (" lowering nested ConditionalSelect HWIntrinisic (after):\n "  );
4091+                  DISPTREERANGE (BlockRange (), cndSelNode);
4092+                  JITDUMP (" \n "  );
40924093
4093-             return  cndSelNode;
4094+                 return  cndSelNode;
4095+             }
40944096        }
40954097    }
40964098    else  if  (op1->IsMaskAllBitsSet ())
0 commit comments