Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25286,6 +25286,17 @@ bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const
}
}

//------------------------------------------------------------------------
// OperIsBitwiseHWIntrinsic: Is this HWIntrinsic a bitwise logic intrinsic node.
//
// Return Value:
// Whether "this" is a bitwise logic intrinsic node.
//
bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic()
{
return HWOperGet() == GT_AND || HWOperGet() == GT_OR || HWOperGet() == GT_XOR || HWOperGet() == GT_AND_NOT;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: It'd be nice to explicitly cache HWOperGet(). The compiler "should" be doing it for us, but its not a trivial call and is better to be safe.

}

//------------------------------------------------------------------------------
// OperRequiresAsgFlag : Check whether the operation requires GTF_ASG flag regardless
// of the children's flags.
Expand Down Expand Up @@ -25483,6 +25494,8 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet()
case NI_SSE2_And:
case NI_AVX_And:
case NI_AVX2_And:
case NI_AVX512F_And:
case NI_AVX512DQ_And:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_And:
#endif
Expand All @@ -25502,13 +25515,38 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet()
case NI_SSE2_Xor:
case NI_AVX_Xor:
case NI_AVX2_Xor:
case NI_AVX512F_Xor:
case NI_AVX512DQ_Xor:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Xor:
#endif
{
return GT_XOR;
}

#if defined(TARGET_XARCH)
case NI_SSE_Or:
case NI_SSE2_Or:
case NI_AVX_Or:
case NI_AVX2_Or:
case NI_AVX512F_Or:
case NI_AVX512DQ_Or:
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to either include the { return GT_OR; } as part of the ifdef -or- more ideally we add the relevant:

#elif defined(TARGET_ARM64)
    case NI_AdvSimd_Or:
#endif

{
return GT_OR;
}

#if defined(TARGET_XARCH)
case NI_SSE_AndNot:
case NI_SSE2_AndNot:
case NI_AVX_AndNot:
case NI_AVX2_AndNot:
case NI_AVX512F_AndNot:
case NI_AVX512DQ_AndNot:
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here. We need to either include the braces as part of the ifdef or cover NI_AdvSimd_AndNot:

{
return GT_AND_NOT;
}
// TODO: Handle other cases

default:
Expand Down Expand Up @@ -26295,6 +26333,80 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree

return 0;
}

//------------------------------------------------------------------------
// GetTernaryControlByte: calculate the value of the control byte for ternary node
// with given logic nodes on the input.
//
// Return value: the value of the ternary control byte.
uint8_t GenTreeHWIntrinsic::GetTernaryControlByte(GenTreeHWIntrinsic* second)
{
// we assume we have a structure like:
/*
/- A
+- B
t1 = binary logical op1

/- C
+- t1
t2 = binary logical op2
*/

// To calculate the control byte value:
// The way the constants work is we have three keys:
// * A: 0xF0
// * B: 0xCC
// * C: 0xAA
//
// To compute the correct control byte, you simply perform the corresponding operation on these keys. So, if you
// wanted to do (A & B) ^ C, you would compute (0xF0 & 0xCC) ^ 0xAA or 0x6A.
assert(second->Op(1) == this || second->Op(2) == this);
const uint8_t A = 240; // 0xF0
const uint8_t B = 204; // 0xCC
const uint8_t C = 170; // 0xAA
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just make these directly hex:

    const uint8_t A = 0xF0;
    const uint8_t B = 0xCC;
    const uint8_t C = 0xAA;


NamedIntrinsic firstLogic = GetHWIntrinsicId();
NamedIntrinsic secondLogic = second->GetHWIntrinsicId();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These look like dead locals now.


uint8_t AB = 0;
uint8_t ABC = 0;

if (HWOperGet() == GT_AND)
{
AB = A & B;
}
else if (HWOperGet() == GT_OR)
{
AB = A | B;
}
else if (HWOperGet() == GT_XOR)
{
AB = A ^ B;
}
else
{
unreached();
}

if (second->HWOperGet() == GT_AND)
{
ABC = AB & C;
}
else if (second->HWOperGet() == GT_OR)
{
ABC = AB | C;
}
else if (second->HWOperGet() == GT_XOR)
{
ABC = AB ^ C;
}
else
{
unreached();
}

return ABC;
}
#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS

unsigned GenTreeLclFld::GetSize() const
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6249,11 +6249,13 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
bool OperIsEmbBroadcastCompatible() const;
bool OperIsBroadcastScalar() const;
bool OperIsCreateScalarUnsafe() const;
bool OperIsBitwiseHWIntrinsic();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be bool OperIsBitwiseHWIntrinsic() const; to indicate it doesn't mutate the instance?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah Yes, you are right, and do we consider also making HWOperGet() constant? it would make sense to me based on its semantic, and it would save some const_cast statement.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it should be annotated as such since it doesn't mutate (and never should)


bool OperRequiresAsgFlag() const;
bool OperRequiresCallFlag() const;

unsigned GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3);
uint8_t GetTernaryControlByte(GenTreeHWIntrinsic* second);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, shouldn't this be uint8_t GetTernaryControlByte(GenTreeHWIntrinsic* second) const;?


ClassLayout* GetLayout(Compiler* compiler) const;

Expand Down
76 changes: 76 additions & 0 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1662,6 +1662,82 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
LowerFusedMultiplyAdd(node);
break;

case NI_SSE_And:
case NI_SSE2_And:
case NI_AVX_And:
case NI_AVX2_And:
case NI_AVX512F_And:
case NI_AVX512DQ_And:
case NI_SSE_Or:
case NI_SSE2_Or:
case NI_AVX_Or:
case NI_AVX2_Or:
case NI_AVX512F_Or:
case NI_AVX512DQ_Or:
case NI_SSE_Xor:
case NI_SSE2_Xor:
case NI_AVX_Xor:
case NI_AVX2_Xor:
case NI_AVX512F_Xor:
case NI_AVX512DQ_Xor:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any concerns around this becoming out of sync from OperIsBitwiseHWIntrinsic

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this list, we have the intrinsics that can be folded into ternary logic, while ANDNOT related intrinsics cannot be folded currently. On the other hand, OperIsBitwiseHWIntrinsic should be consistent with its name from my view, so I included ANDNOT there.

I could leave some comments there to specify this issue, if this is the better way to make thing more clear.

{
if (!comp->IsBaselineVector512IsaSupportedOpportunistically())
{
break;
}
GenTree* op1 = node->Op(1);
GenTree* op2 = node->Op(2);

LIR::Use use;
if (BlockRange().TryGetUse(node, &use))
{
// search for structure like:
/*
/- A
+- B
t1 = binary logical op1

/- C
+- t1
t2 = binary logical op2
*/
GenTree* second = use.User();
if (!second->OperIs(GT_HWINTRINSIC) || !second->AsHWIntrinsic()->OperIsBitwiseHWIntrinsic())
{
break;
}

if (second->AsHWIntrinsic()->HWOperGet() == GT_AND_NOT)
{
// currently ANDNOT logic cannot be optimized by the ternary node.
break;
}
GenTree* op3 = second->AsHWIntrinsic()->Op(1) == node ? second->AsHWIntrinsic()->Op(2)
: second->AsHWIntrinsic()->Op(1);
GenTree* control = comp->gtNewIconNode(node->GetTernaryControlByte(second->AsHWIntrinsic()));
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
unsigned simdSize = node->GetSimdSize();
var_types simdType = Compiler::getSIMDTypeForSize(simdSize);
GenTree* ternaryNode =
comp->gtNewSimdTernaryLogicNode(simdType, op1, op2, op3, control, simdBaseJitType, simdSize);
BlockRange().InsertBefore(second, control, ternaryNode);
LIR::Use finalRes;
if (BlockRange().TryGetUse(second, &finalRes))
{
finalRes.ReplaceWith(ternaryNode);
}
else
{
ternaryNode->SetUnusedValue();
}
GenTree* next = node->gtNext;
BlockRange().Remove(node);
BlockRange().Remove(second);
return next;
}
break;
}

default:
break;
}
Expand Down