-
Notifications
You must be signed in to change notification settings - Fork 5.2k
[JIT] Fold some bitwise operations to vpternlog #91227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
23a2341
bda0f04
0525e21
e5e38ff
83dd601
26bc5a3
8cde9e2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -25286,6 +25286,17 @@ bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const | |
| } | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------ | ||
| // OperIsBitwiseHWIntrinsic: Is this HWIntrinsic a bitwise logic intrinsic node. | ||
| // | ||
| // Return Value: | ||
| // Whether "this" is a bitwise logic intrinsic node. | ||
| // | ||
| bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic() | ||
| { | ||
| return HWOperGet() == GT_AND || HWOperGet() == GT_OR || HWOperGet() == GT_XOR || HWOperGet() == GT_AND_NOT; | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------------ | ||
| // OperRequiresAsgFlag : Check whether the operation requires GTF_ASG flag regardless | ||
| // of the children's flags. | ||
|
|
@@ -25483,6 +25494,8 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet() | |
| case NI_SSE2_And: | ||
| case NI_AVX_And: | ||
| case NI_AVX2_And: | ||
| case NI_AVX512F_And: | ||
| case NI_AVX512DQ_And: | ||
| #elif defined(TARGET_ARM64) | ||
| case NI_AdvSimd_And: | ||
| #endif | ||
|
|
@@ -25502,13 +25515,38 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet() | |
| case NI_SSE2_Xor: | ||
| case NI_AVX_Xor: | ||
| case NI_AVX2_Xor: | ||
| case NI_AVX512F_Xor: | ||
| case NI_AVX512DQ_Xor: | ||
| #elif defined(TARGET_ARM64) | ||
| case NI_AdvSimd_Xor: | ||
| #endif | ||
| { | ||
| return GT_XOR; | ||
| } | ||
|
|
||
| #if defined(TARGET_XARCH) | ||
| case NI_SSE_Or: | ||
| case NI_SSE2_Or: | ||
| case NI_AVX_Or: | ||
| case NI_AVX2_Or: | ||
| case NI_AVX512F_Or: | ||
| case NI_AVX512DQ_Or: | ||
| #endif | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to either include the |
||
| { | ||
| return GT_OR; | ||
| } | ||
|
|
||
| #if defined(TARGET_XARCH) | ||
| case NI_SSE_AndNot: | ||
| case NI_SSE2_AndNot: | ||
| case NI_AVX_AndNot: | ||
| case NI_AVX2_AndNot: | ||
| case NI_AVX512F_AndNot: | ||
| case NI_AVX512DQ_AndNot: | ||
| #endif | ||
|
||
| { | ||
| return GT_AND_NOT; | ||
| } | ||
| // TODO: Handle other cases | ||
|
|
||
| default: | ||
|
|
@@ -26295,6 +26333,80 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree | |
|
|
||
| return 0; | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------ | ||
| // GetTernaryControlByte: calculate the value of the control byte for ternary node | ||
| // with given logic nodes on the input. | ||
| // | ||
| // Return value: the value of the ternary control byte. | ||
| uint8_t GenTreeHWIntrinsic::GetTernaryControlByte(GenTreeHWIntrinsic* second) | ||
| { | ||
| // we assume we have a structure like: | ||
| /* | ||
| /- A | ||
| +- B | ||
| t1 = binary logical op1 | ||
|
|
||
| /- C | ||
| +- t1 | ||
| t2 = binary logical op2 | ||
| */ | ||
|
|
||
| // To calculate the control byte value: | ||
| // The way the constants work is we have three keys: | ||
| // * A: 0xF0 | ||
| // * B: 0xCC | ||
| // * C: 0xAA | ||
| // | ||
| // To compute the correct control byte, you simply perform the corresponding operation on these keys. So, if you | ||
| // wanted to do (A & B) ^ C, you would compute (0xF0 & 0xCC) ^ 0xAA or 0x6A. | ||
| assert(second->Op(1) == this || second->Op(2) == this); | ||
| const uint8_t A = 240; // 0xF0 | ||
| const uint8_t B = 204; // 0xCC | ||
| const uint8_t C = 170; // 0xAA | ||
|
||
|
|
||
| NamedIntrinsic firstLogic = GetHWIntrinsicId(); | ||
| NamedIntrinsic secondLogic = second->GetHWIntrinsicId(); | ||
|
||
|
|
||
| uint8_t AB = 0; | ||
| uint8_t ABC = 0; | ||
|
|
||
| if (HWOperGet() == GT_AND) | ||
| { | ||
| AB = A & B; | ||
| } | ||
| else if (HWOperGet() == GT_OR) | ||
| { | ||
| AB = A | B; | ||
| } | ||
| else if (HWOperGet() == GT_XOR) | ||
| { | ||
| AB = A ^ B; | ||
| } | ||
| else | ||
| { | ||
| unreached(); | ||
| } | ||
|
|
||
| if (second->HWOperGet() == GT_AND) | ||
| { | ||
| ABC = AB & C; | ||
| } | ||
| else if (second->HWOperGet() == GT_OR) | ||
| { | ||
| ABC = AB | C; | ||
| } | ||
| else if (second->HWOperGet() == GT_XOR) | ||
| { | ||
| ABC = AB ^ C; | ||
| } | ||
| else | ||
| { | ||
| unreached(); | ||
| } | ||
|
|
||
| return ABC; | ||
| } | ||
| #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS | ||
|
|
||
| unsigned GenTreeLclFld::GetSize() const | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6249,11 +6249,13 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic | |
| bool OperIsEmbBroadcastCompatible() const; | ||
| bool OperIsBroadcastScalar() const; | ||
| bool OperIsCreateScalarUnsafe() const; | ||
| bool OperIsBitwiseHWIntrinsic(); | ||
|
||
|
|
||
| bool OperRequiresAsgFlag() const; | ||
| bool OperRequiresCallFlag() const; | ||
|
|
||
| unsigned GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3); | ||
| uint8_t GetTernaryControlByte(GenTreeHWIntrinsic* second); | ||
|
||
|
|
||
| ClassLayout* GetLayout(Compiler* compiler) const; | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1662,6 +1662,82 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) | |
| LowerFusedMultiplyAdd(node); | ||
| break; | ||
|
|
||
| case NI_SSE_And: | ||
| case NI_SSE2_And: | ||
| case NI_AVX_And: | ||
| case NI_AVX2_And: | ||
| case NI_AVX512F_And: | ||
| case NI_AVX512DQ_And: | ||
| case NI_SSE_Or: | ||
| case NI_SSE2_Or: | ||
| case NI_AVX_Or: | ||
| case NI_AVX2_Or: | ||
| case NI_AVX512F_Or: | ||
| case NI_AVX512DQ_Or: | ||
| case NI_SSE_Xor: | ||
| case NI_SSE2_Xor: | ||
| case NI_AVX_Xor: | ||
| case NI_AVX2_Xor: | ||
| case NI_AVX512F_Xor: | ||
| case NI_AVX512DQ_Xor: | ||
EgorBo marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are there any concerns around this becoming out of sync from
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this list, we have the intrinsics that can be folded into ternary logic, while ANDNOT related intrinsics cannot be folded currently. On the other hand, I could leave some comments there to specify this issue, if this is the better way to make thing more clear. |
||
| { | ||
tannergooding marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (!comp->IsBaselineVector512IsaSupportedOpportunistically()) | ||
| { | ||
| break; | ||
| } | ||
| GenTree* op1 = node->Op(1); | ||
| GenTree* op2 = node->Op(2); | ||
|
|
||
| LIR::Use use; | ||
| if (BlockRange().TryGetUse(node, &use)) | ||
| { | ||
| // search for structure like: | ||
| /* | ||
| /- A | ||
| +- B | ||
| t1 = binary logical op1 | ||
|
|
||
| /- C | ||
| +- t1 | ||
| t2 = binary logical op2 | ||
| */ | ||
| GenTree* second = use.User(); | ||
| if (!second->OperIs(GT_HWINTRINSIC) || !second->AsHWIntrinsic()->OperIsBitwiseHWIntrinsic()) | ||
| { | ||
| break; | ||
| } | ||
|
|
||
| if (second->AsHWIntrinsic()->HWOperGet() == GT_AND_NOT) | ||
| { | ||
| // currently ANDNOT logic cannot be optimized by the ternary node. | ||
| break; | ||
| } | ||
| GenTree* op3 = second->AsHWIntrinsic()->Op(1) == node ? second->AsHWIntrinsic()->Op(2) | ||
| : second->AsHWIntrinsic()->Op(1); | ||
| GenTree* control = comp->gtNewIconNode(node->GetTernaryControlByte(second->AsHWIntrinsic())); | ||
| CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); | ||
| unsigned simdSize = node->GetSimdSize(); | ||
| var_types simdType = Compiler::getSIMDTypeForSize(simdSize); | ||
| GenTree* ternaryNode = | ||
| comp->gtNewSimdTernaryLogicNode(simdType, op1, op2, op3, control, simdBaseJitType, simdSize); | ||
| BlockRange().InsertBefore(second, control, ternaryNode); | ||
| LIR::Use finalRes; | ||
| if (BlockRange().TryGetUse(second, &finalRes)) | ||
| { | ||
| finalRes.ReplaceWith(ternaryNode); | ||
| } | ||
| else | ||
| { | ||
| ternaryNode->SetUnusedValue(); | ||
| } | ||
| GenTree* next = node->gtNext; | ||
| BlockRange().Remove(node); | ||
| BlockRange().Remove(second); | ||
| return next; | ||
| } | ||
| break; | ||
| } | ||
|
|
||
| default: | ||
| break; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: It'd be nice to explicitly cache
HWOperGet(). The compiler "should" be doing it for us, but its not a trivial call and is better to be safe.