Skip to content

Commit 75bf5ca

Browse files
committed
Enable conditional compare chaining for AMD64.
1 parent c8459bb commit 75bf5ca

File tree

14 files changed

+472
-9
lines changed

14 files changed

+472
-9
lines changed

src/coreclr/jit/codegen.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -967,7 +967,7 @@ class CodeGen final : public CodeGenInterface
967967
void genIntToFloatCast(GenTree* treeNode);
968968
void genCkfinite(GenTree* treeNode);
969969
void genCodeForCompare(GenTreeOp* tree);
970-
#ifdef TARGET_ARM64
970+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
971971
void genCodeForCCMP(GenTreeCCMP* ccmp);
972972
#endif
973973
void genCodeForSelect(GenTreeOp* select);
@@ -1712,6 +1712,8 @@ class CodeGen final : public CodeGenInterface
17121712
static insOpts ShiftOpToInsOpts(genTreeOps op);
17131713
#elif defined(TARGET_XARCH)
17141714
static instruction JumpKindToCmov(emitJumpKind condition);
1715+
static instruction JumpKindToCcmp(emitJumpKind condition);
1716+
static insOpts OptsFromCFlags(insCflags flags);
17151717
#endif
17161718

17171719
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)

src/coreclr/jit/codegenxarch.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,46 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
15721572
return s_table[condition];
15731573
}
15741574

1575+
//------------------------------------------------------------------------
1576+
// JumpKindToCcmp:
1577+
// Convert an emitJumpKind to the corresponding ccmp instruction.
1578+
//
1579+
// Arguments:
1580+
// condition - the condition
1581+
//
1582+
// Returns:
1583+
// A ccmp instruction.
1584+
//
1585+
instruction CodeGen::JumpKindToCcmp(emitJumpKind condition)
1586+
{
1587+
static constexpr instruction s_table[EJ_COUNT] = {
1588+
INS_none, INS_none, INS_ccmpo, INS_ccmpno, INS_ccmpb, INS_ccmpae, INS_ccmpe, INS_ccmpne, INS_ccmpbe,
1589+
INS_ccmpa, INS_ccmps, INS_ccmpns, INS_none, INS_none, INS_ccmpl, INS_ccmpge, INS_ccmple, INS_ccmpg,
1590+
};
1591+
1592+
static_assert_no_msg(s_table[EJ_NONE] == INS_none);
1593+
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
1594+
static_assert_no_msg(s_table[EJ_jo] == INS_ccmpo);
1595+
static_assert_no_msg(s_table[EJ_jno] == INS_ccmpno);
1596+
static_assert_no_msg(s_table[EJ_jb] == INS_ccmpb);
1597+
static_assert_no_msg(s_table[EJ_jae] == INS_ccmpae);
1598+
static_assert_no_msg(s_table[EJ_je] == INS_ccmpe);
1599+
static_assert_no_msg(s_table[EJ_jne] == INS_ccmpne);
1600+
static_assert_no_msg(s_table[EJ_jbe] == INS_ccmpbe);
1601+
static_assert_no_msg(s_table[EJ_ja] == INS_ccmpa);
1602+
static_assert_no_msg(s_table[EJ_js] == INS_ccmps);
1603+
static_assert_no_msg(s_table[EJ_jns] == INS_ccmpns);
1604+
static_assert_no_msg(s_table[EJ_jp] == INS_none);
1605+
static_assert_no_msg(s_table[EJ_jnp] == INS_none);
1606+
static_assert_no_msg(s_table[EJ_jl] == INS_ccmpl);
1607+
static_assert_no_msg(s_table[EJ_jge] == INS_ccmpge);
1608+
static_assert_no_msg(s_table[EJ_jle] == INS_ccmple);
1609+
static_assert_no_msg(s_table[EJ_jg] == INS_ccmpg);
1610+
1611+
assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
1612+
return s_table[condition];
1613+
}
1614+
15751615
//------------------------------------------------------------------------
15761616
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node.
15771617
//
@@ -2259,6 +2299,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
22592299
// Do nothing; these nodes are simply markers for debug info.
22602300
break;
22612301

2302+
#if defined(TARGET_AMD64)
2303+
case GT_CCMP:
2304+
genCodeForCCMP(treeNode->AsCCMP());
2305+
break;
2306+
#endif
2307+
22622308
default:
22632309
{
22642310
#ifdef DEBUG
@@ -8938,6 +8984,58 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
89388984
regSet.verifyRegistersUsed(killMask);
89398985
}
89408986

8987+
insOpts CodeGen::OptsFromCFlags(insCflags flags)
8988+
{
8989+
unsigned opts = 0x0;
8990+
if (flags & INS_FLAGS_CF)
8991+
opts |= INS_OPTS_EVEX_dfv_cf;
8992+
if (flags & INS_FLAGS_ZF)
8993+
opts |= INS_OPTS_EVEX_dfv_zf;
8994+
if (flags & INS_FLAGS_SF)
8995+
opts |= INS_OPTS_EVEX_dfv_sf;
8996+
if (flags & INS_FLAGS_OF)
8997+
opts |= INS_OPTS_EVEX_dfv_of;
8998+
return (insOpts)opts;
8999+
}
9000+
9001+
#ifdef TARGET_AMD64
9002+
9003+
void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp)
9004+
{
9005+
emitter* emit = GetEmitter();
9006+
assert(emit->UsePromotedEVEXEncoding());
9007+
9008+
genConsumeOperands(ccmp);
9009+
GenTree* op1 = ccmp->gtGetOp1();
9010+
GenTree* op2 = ccmp->gtGetOp2();
9011+
var_types op1Type = genActualType(op1->TypeGet());
9012+
var_types op2Type = genActualType(op2->TypeGet());
9013+
emitAttr cmpSize = emitActualTypeSize(op1Type);
9014+
regNumber srcReg1 = op1->GetRegNum();
9015+
9016+
// No float support or swapping op1 and op2 to generate cmp reg, imm.
9017+
assert(!varTypeIsFloating(op2Type));
9018+
assert(!op1->isContainedIntOrIImmed());
9019+
9020+
// For the ccmp flags, invert the condition of the compare.
9021+
// For the condition, use the previous compare.
9022+
const GenConditionDesc& condDesc = GenConditionDesc::Get(ccmp->gtCondition);
9023+
instruction ccmpIns = JumpKindToCcmp(condDesc.jumpKind1);
9024+
insOpts opts = OptsFromCFlags(ccmp->gtFlagsVal);
9025+
9026+
if (op2->isContainedIntOrIImmed())
9027+
{
9028+
GenTreeIntConCommon* intConst = op2->AsIntConCommon();
9029+
emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts);
9030+
}
9031+
else
9032+
{
9033+
regNumber srcReg2 = op2->GetRegNum();
9034+
emit->emitIns_R_R(ccmpIns, cmpSize, srcReg1, srcReg2, opts);
9035+
}
9036+
}
9037+
#endif // TARGET_AMD64
9038+
89419039
#if defined(DEBUG) && defined(TARGET_AMD64)
89429040

89439041
/*****************************************************************************

src/coreclr/jit/compiler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7192,6 +7192,9 @@ class Compiler
71927192
PhaseStatus optSwitchRecognition();
71937193
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
71947194
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
7195+
#if defined(TARGET_AMD64)
7196+
bool optSwitchDetectLikely(BasicBlock* firstBlock);
7197+
#endif
71957198

71967199
PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
71977200
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication

src/coreclr/jit/gentree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9590,7 +9590,7 @@ enum insCC : unsigned
95909590
};
95919591
#endif
95929592

9593-
#if defined(TARGET_ARM64)
9593+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
95949594
struct GenTreeCCMP final : public GenTreeOpCC
95959595
{
95969596
insCflags gtFlagsVal;

src/coreclr/jit/gtlist.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,16 @@ GTNODE(JCC , GenTreeCC ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI
244244
GTNODE(SETCC , GenTreeCC ,0,0,GTK_LEAF|DBK_NOTHIR)
245245
// Variant of SELECT that reuses flags computed by a previous node with the specified condition.
246246
GTNODE(SELECTCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR)
247-
#ifdef TARGET_ARM64
248-
// The arm64 ccmp instruction. If the specified condition is true, compares two
247+
248+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
249+
// The arm64 and x86 ccmp instruction. If the specified condition is true, compares two
249250
// operands and sets the condition flags according to the result. Otherwise
250251
// sets the condition flags to the specified immediate value.
251252
GTNODE(CCMP , GenTreeCCMP ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
253+
#endif
254+
255+
256+
#ifdef TARGET_ARM64
252257
// Maps to arm64 csinc/cinc instruction. Computes result = condition ? op1 : op2 + 1.
253258
// If op2 is null, computes result = condition ? op1 + 1 : op1.
254259
GTNODE(SELECT_INC , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR)

src/coreclr/jit/gtstructs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
116116
GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP)
117117
GTSTRUCT_1(ArrAddr , GT_ARR_ADDR)
118118
GTSTRUCT_2(CC , GT_JCC, GT_SETCC)
119-
#ifdef TARGET_ARM64
119+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
120120
GTSTRUCT_1(CCMP , GT_CCMP)
121+
#endif
122+
#ifdef TARGET_ARM64
121123
GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SELECT_INVCC, GT_SELECT_NEGCC)
122124
#else
123125
GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST)

src/coreclr/jit/jitconfigvalues.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,7 @@ RELEASE_CONFIG_INTEGER(EnableMultiRegLocals, "EnableMultiRegLocals", 1)
369369
RELEASE_CONFIG_INTEGER(JitNoInline, "JitNoInline", 0)
370370

371371
#if defined(DEBUG)
372+
CONFIG_INTEGER(JitBypassApxCheck, "JitBypassApxCheck", 0) // Enable rex2 encoding for compatible instructions.
372373
CONFIG_INTEGER(JitStressRex2Encoding, "JitStressRex2Encoding", 0) // Enable rex2 encoding for compatible instructions.
373374
CONFIG_INTEGER(JitStressPromotedEvexEncoding, "JitStressPromotedEvexEncoding", 0) // Enable promoted EVEX encoding for
374375
// compatible instructions.
@@ -442,6 +443,7 @@ RELEASE_CONFIG_INTEGER(EnableArm64Sve, "EnableArm64Sve",
442443
RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, "EnableEmbeddedBroadcast", 1) // Allows embedded broadcasts to be disabled
443444
RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", 1) // Allows embedded masking to be disabled
444445
RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled
446+
RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining
445447

446448
// clang-format on
447449

src/coreclr/jit/lower.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4640,7 +4640,10 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select)
46404640
// Return Value:
46414641
// True if relop was transformed and is now right before 'parent'; otherwise false.
46424642
//
4643-
bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond)
4643+
bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent,
4644+
GenTree* condition,
4645+
GenCondition* cond,
4646+
bool allowMultipleFlagsChecks)
46444647
{
46454648
JITDUMP("Lowering condition:\n");
46464649
DISPTREERANGE(BlockRange(), condition);
@@ -4662,6 +4665,11 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition,
46624665
GenTree* relopOp2 = relop->gtGetOp2();
46634666

46644667
#ifdef TARGET_XARCH
4668+
if (!allowMultipleFlagsChecks && cond->IsFloat())
4669+
{
4670+
return false;
4671+
}
4672+
46654673
// Optimize FP x != x to only check parity flag. This is a common way of
46664674
// checking NaN and avoids two branches that we would otherwise emit.
46674675
if (optimizing && (cond->GetCode() == GenCondition::FNEU) && relopOp1->OperIsLocal() &&

src/coreclr/jit/lower.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,10 @@ class Lowering final : public Phase
8989
void ContainCheckReturnTrap(GenTreeOp* node);
9090
void ContainCheckLclHeap(GenTreeOp* node);
9191
void ContainCheckRet(GenTreeUnOp* ret);
92-
#ifdef TARGET_ARM64
92+
#if defined(TARGET_AMD64)
93+
bool ProducesPotentialConsumableFlagsForCCMP(GenTree* op);
94+
#endif
95+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
9396
bool TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next);
9497
insCflags TruthifyingFlags(GenCondition cond);
9598
void ContainCheckConditionalCompare(GenTreeCCMP* ccmp);
@@ -157,7 +160,10 @@ class Lowering final : public Phase
157160
GenTree* LowerCompare(GenTree* cmp);
158161
GenTree* LowerJTrue(GenTreeOp* jtrue);
159162
GenTree* LowerSelect(GenTreeConditional* cond);
160-
bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* code);
163+
bool TryLowerConditionToFlagsNode(GenTree* parent,
164+
GenTree* condition,
165+
GenCondition* code,
166+
bool allowMultipleFlagChecks = true);
161167
GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition);
162168
void LowerJmpMethod(GenTree* jmp);
163169
void LowerRet(GenTreeOp* ret);

0 commit comments

Comments
 (0)