Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29442,6 +29442,57 @@ bool GenTree::IsInvariant() const
return OperIsConst() || OperIs(GT_LCL_ADDR) || OperIs(GT_FTN_ADDR);
}

//-------------------------------------------------------------------
// IsVectorPerElementMask: returns true if this node is a vector constant per-element mask
// (every element has either all bits set or none of them).
//
// Returns:
// True if this node is a vector constant per-element mask
//
bool GenTree::IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const
{
#ifdef FEATURE_SIMD
if (IsCnsVec())
{
const GenTreeVecCon* vecCon = AsVecCon();
if (vecCon->IsAllBitsSet() || vecCon->IsZero())
{
return true;
}

int elementCount = vecCon->ElementCount(simdSize, simdBaseType);

switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u8[0], elementCount);
case TYP_SHORT:
case TYP_USHORT:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u16[0], elementCount);
case TYP_INT:
case TYP_UINT:
case TYP_FLOAT:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u32[0], elementCount);
case TYP_LONG:
case TYP_ULONG:
case TYP_DOUBLE:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u64[0], elementCount);
default:
unreached();
}
}
else if (OperIsHWIntrinsic())
{
// TODO-XARCH-AVX512 Use VPBLENDM* and take input directly from K registers if cond is from
// MoveMaskToVectorSpecial.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment isn't applicable to the general query, it was specific to the CndSel lowering logic

return HWIntrinsicInfo::ReturnsPerElementMask(AsHWIntrinsic()->GetHWIntrinsicId());
}
#endif // FEATURE_SIMD

return false;
}

//------------------------------------------------------------------------
// IsNeverNegative: returns true if the given tree is known to be never
// negative, i. e. the upper bit will always be zero.
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2317,6 +2317,7 @@ struct GenTree
bool Precedes(GenTree* other);

bool IsInvariant() const;
bool IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const;

bool IsNeverNegative(Compiler* comp) const;
bool IsNeverNegativeOne(Compiler* comp) const;
Expand Down
9 changes: 4 additions & 5 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2994,13 +2994,12 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
GenTree* op3 = node->Op(3);

// If the condition vector comes from a hardware intrinsic that
// returns a per-element mask (marked with HW_Flag_ReturnsPerElementMask),
// we can optimize the entire conditional select to
// a single BlendVariable instruction (if supported by the architecture)
// returns a per-element mask, we can optimize the entire
// conditional select to a single BlendVariable instruction
// (if supported by the architecture)

// TODO-XARCH-AVX512 Use VPBLENDM* and take input directly from K registers if cond is from MoveMaskToVectorSpecial.
// First, determine if the condition is a per-element mask
if (op1->OperIsHWIntrinsic() && HWIntrinsicInfo::ReturnsPerElementMask(op1->AsHWIntrinsic()->GetHWIntrinsicId()))
if (op1->IsVectorPerElementMask(simdBaseType, simdSize))
{
// Next, determine if the target architecture supports BlendVariable
NamedIntrinsic blendVariableId = NI_Illegal;
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ static bool ElementsAreSame(T* array, size_t size)
return true;
}

template <typename T>
static bool ElementsAreAllBitsSetOrZero(T* array, size_t size)
{
for (size_t i = 0; i < size; i++)
{
if (array[i] != static_cast<T>(0) && array[i] != static_cast<T>(~0))
return false;
}
return true;
}

struct simd8_t
{
union
Expand Down