Skip to content

Commit f0fae30

Browse files
committed
Enable conditional compare chaining for AMD64.
1 parent d846a88 commit f0fae30

File tree

14 files changed

+441
-9
lines changed

14 files changed

+441
-9
lines changed

src/coreclr/jit/codegen.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,7 @@ class CodeGen final : public CodeGenInterface
969969
void genIntToFloatCast(GenTree* treeNode);
970970
void genCkfinite(GenTree* treeNode);
971971
void genCodeForCompare(GenTreeOp* tree);
972-
#ifdef TARGET_ARM64
972+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
973973
void genCodeForCCMP(GenTreeCCMP* ccmp);
974974
#endif
975975
void genCodeForSelect(GenTreeOp* select);
@@ -1708,6 +1708,8 @@ class CodeGen final : public CodeGenInterface
17081708
static insOpts ShiftOpToInsOpts(genTreeOps op);
17091709
#elif defined(TARGET_XARCH)
17101710
static instruction JumpKindToCmov(emitJumpKind condition);
1711+
static instruction JumpKindToCcmp(emitJumpKind condition);
1712+
static insOpts OptsFromCFlags(insCflags flags);
17111713
#endif
17121714

17131715
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)

src/coreclr/jit/codegenxarch.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,6 +1573,46 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
15731573
return s_table[condition];
15741574
}
15751575

1576+
//------------------------------------------------------------------------
1577+
// JumpKindToCcmp:
1578+
// Convert an emitJumpKind to the corresponding ccmp instruction.
1579+
//
1580+
// Arguments:
1581+
// condition - the condition
1582+
//
1583+
// Returns:
1584+
// A ccmp instruction.
1585+
//
1586+
instruction CodeGen::JumpKindToCcmp(emitJumpKind condition)
1587+
{
1588+
static constexpr instruction s_table[EJ_COUNT] = {
1589+
INS_none, INS_none, INS_ccmpo, INS_ccmpno, INS_ccmpb, INS_ccmpae, INS_ccmpe, INS_ccmpne, INS_ccmpbe,
1590+
INS_ccmpa, INS_ccmps, INS_ccmpns, INS_none, INS_none, INS_ccmpl, INS_ccmpge, INS_ccmple, INS_ccmpg,
1591+
};
1592+
1593+
static_assert_no_msg(s_table[EJ_NONE] == INS_none);
1594+
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
1595+
static_assert_no_msg(s_table[EJ_jo] == INS_ccmpo);
1596+
static_assert_no_msg(s_table[EJ_jno] == INS_ccmpno);
1597+
static_assert_no_msg(s_table[EJ_jb] == INS_ccmpb);
1598+
static_assert_no_msg(s_table[EJ_jae] == INS_ccmpae);
1599+
static_assert_no_msg(s_table[EJ_je] == INS_ccmpe);
1600+
static_assert_no_msg(s_table[EJ_jne] == INS_ccmpne);
1601+
static_assert_no_msg(s_table[EJ_jbe] == INS_ccmpbe);
1602+
static_assert_no_msg(s_table[EJ_ja] == INS_ccmpa);
1603+
static_assert_no_msg(s_table[EJ_js] == INS_ccmps);
1604+
static_assert_no_msg(s_table[EJ_jns] == INS_ccmpns);
1605+
static_assert_no_msg(s_table[EJ_jp] == INS_none);
1606+
static_assert_no_msg(s_table[EJ_jnp] == INS_none);
1607+
static_assert_no_msg(s_table[EJ_jl] == INS_ccmpl);
1608+
static_assert_no_msg(s_table[EJ_jge] == INS_ccmpge);
1609+
static_assert_no_msg(s_table[EJ_jle] == INS_ccmple);
1610+
static_assert_no_msg(s_table[EJ_jg] == INS_ccmpg);
1611+
1612+
assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
1613+
return s_table[condition];
1614+
}
1615+
15761616
//------------------------------------------------------------------------
15771617
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node.
15781618
//
@@ -2260,6 +2300,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
22602300
// Do nothing; these nodes are simply markers for debug info.
22612301
break;
22622302

2303+
#if defined(TARGET_AMD64)
2304+
case GT_CCMP:
2305+
genCodeForCCMP(treeNode->AsCCMP());
2306+
break;
2307+
#endif
2308+
22632309
default:
22642310
{
22652311
#ifdef DEBUG
@@ -8908,6 +8954,58 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
89088954
regSet.verifyRegistersUsed(killMask);
89098955
}
89108956

8957+
insOpts CodeGen::OptsFromCFlags(insCflags flags)
8958+
{
8959+
unsigned opts = 0x0;
8960+
if (flags & INS_FLAGS_CF)
8961+
opts |= INS_OPTS_EVEX_dfv_cf;
8962+
if (flags & INS_FLAGS_ZF)
8963+
opts |= INS_OPTS_EVEX_dfv_zf;
8964+
if (flags & INS_FLAGS_SF)
8965+
opts |= INS_OPTS_EVEX_dfv_sf;
8966+
if (flags & INS_FLAGS_OF)
8967+
opts |= INS_OPTS_EVEX_dfv_of;
8968+
return (insOpts)opts;
8969+
}
8970+
8971+
#ifdef TARGET_AMD64
8972+
8973+
void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp)
8974+
{
8975+
emitter* emit = GetEmitter();
8976+
assert(emit->UsePromotedEVEXEncoding());
8977+
8978+
genConsumeOperands(ccmp);
8979+
GenTree* op1 = ccmp->gtGetOp1();
8980+
GenTree* op2 = ccmp->gtGetOp2();
8981+
var_types op1Type = genActualType(op1->TypeGet());
8982+
var_types op2Type = genActualType(op2->TypeGet());
8983+
emitAttr cmpSize = emitActualTypeSize(op1Type);
8984+
regNumber srcReg1 = op1->GetRegNum();
8985+
8986+
// No float support or swapping op1 and op2 to generate cmp reg, imm.
8987+
assert(!varTypeIsFloating(op2Type));
8988+
assert(!op1->isContainedIntOrIImmed());
8989+
8990+
// For the ccmp flags, invert the condition of the compare.
8991+
// For the condition, use the previous compare.
8992+
const GenConditionDesc& condDesc = GenConditionDesc::Get(ccmp->gtCondition);
8993+
instruction ccmpIns = JumpKindToCcmp(condDesc.jumpKind1);
8994+
insOpts opts = OptsFromCFlags(ccmp->gtFlagsVal);
8995+
8996+
if (op2->isContainedIntOrIImmed())
8997+
{
8998+
GenTreeIntConCommon* intConst = op2->AsIntConCommon();
8999+
emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts);
9000+
}
9001+
else
9002+
{
9003+
regNumber srcReg2 = op2->GetRegNum();
9004+
emit->emitIns_R_R(ccmpIns, cmpSize, srcReg1, srcReg2, opts);
9005+
}
9006+
}
9007+
#endif // TARGET_AMD64
9008+
89119009
#if defined(DEBUG) && defined(TARGET_AMD64)
89129010

89139011
/*****************************************************************************

src/coreclr/jit/compiler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7004,6 +7004,9 @@ class Compiler
70047004
PhaseStatus optSwitchRecognition();
70057005
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
70067006
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
7007+
#if defined(TARGET_AMD64)
7008+
bool optSwitchDetectLikely(BasicBlock* firstBlock);
7009+
#endif
70077010

70087011
PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
70097012
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication

src/coreclr/jit/gentree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9410,7 +9410,7 @@ enum insCC : unsigned
94109410
};
94119411
#endif
94129412

9413-
#if defined(TARGET_ARM64)
9413+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
94149414
struct GenTreeCCMP final : public GenTreeOpCC
94159415
{
94169416
insCflags gtFlagsVal;

src/coreclr/jit/gtlist.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,16 @@ GTNODE(JCC , GenTreeCC ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI
244244
GTNODE(SETCC , GenTreeCC ,0,0,GTK_LEAF|DBK_NOTHIR)
245245
// Variant of SELECT that reuses flags computed by a previous node with the specified condition.
246246
GTNODE(SELECTCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR)
247-
#ifdef TARGET_ARM64
248-
// The arm64 ccmp instruction. If the specified condition is true, compares two
247+
248+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
249+
// The arm64 and x86 ccmp instruction. If the specified condition is true, compares two
249250
// operands and sets the condition flags according to the result. Otherwise
250251
// sets the condition flags to the specified immediate value.
251252
GTNODE(CCMP , GenTreeCCMP ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
253+
#endif
254+
255+
256+
#ifdef TARGET_ARM64
252257
// Maps to arm64 csinc/cinc instruction. Computes result = condition ? op1 : op2 + 1.
253258
// If op2 is null, computes result = condition ? op1 + 1 : op1.
254259
GTNODE(SELECT_INC , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR)

src/coreclr/jit/gtstructs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
116116
GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP)
117117
GTSTRUCT_1(ArrAddr , GT_ARR_ADDR)
118118
GTSTRUCT_2(CC , GT_JCC, GT_SETCC)
119-
#ifdef TARGET_ARM64
119+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
120120
GTSTRUCT_1(CCMP , GT_CCMP)
121+
#endif
122+
#ifdef TARGET_ARM64
121123
GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SELECT_INVCC, GT_SELECT_NEGCC)
122124
#else
123125
GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST)

src/coreclr/jit/jitconfigvalues.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ RELEASE_CONFIG_INTEGER(EnableArm64Sve, "EnableArm64Sve",
443443
RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, "EnableEmbeddedBroadcast", 1) // Allows embedded broadcasts to be disabled
444444
RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", 1) // Allows embedded masking to be disabled
445445
RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled
446+
RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining
446447

447448
// clang-format on
448449

src/coreclr/jit/lower.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4400,7 +4400,10 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select)
44004400
// Return Value:
44014401
// True if relop was transformed and is now right before 'parent'; otherwise false.
44024402
//
4403-
bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond)
4403+
bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent,
4404+
GenTree* condition,
4405+
GenCondition* cond,
4406+
bool allowMultipleFlagsChecks)
44044407
{
44054408
JITDUMP("Lowering condition:\n");
44064409
DISPTREERANGE(BlockRange(), condition);
@@ -4422,6 +4425,11 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition,
44224425
GenTree* relopOp2 = relop->gtGetOp2();
44234426

44244427
#ifdef TARGET_XARCH
4428+
if (!allowMultipleFlagsChecks && cond->IsFloat())
4429+
{
4430+
return false;
4431+
}
4432+
44254433
// Optimize FP x != x to only check parity flag. This is a common way of
44264434
// checking NaN and avoids two branches that we would otherwise emit.
44274435
if (optimizing && (cond->GetCode() == GenCondition::FNEU) && relopOp1->OperIsLocal() &&

src/coreclr/jit/lower.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class Lowering final : public Phase
9090
void ContainCheckReturnTrap(GenTreeOp* node);
9191
void ContainCheckLclHeap(GenTreeOp* node);
9292
void ContainCheckRet(GenTreeUnOp* ret);
93-
#ifdef TARGET_ARM64
93+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
9494
bool TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next);
9595
insCflags TruthifyingFlags(GenCondition cond);
9696
void ContainCheckConditionalCompare(GenTreeCCMP* ccmp);
@@ -162,7 +162,10 @@ class Lowering final : public Phase
162162
GenTree* LowerCompare(GenTree* cmp);
163163
GenTree* LowerJTrue(GenTreeOp* jtrue);
164164
GenTree* LowerSelect(GenTreeConditional* cond);
165-
bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* code);
165+
bool TryLowerConditionToFlagsNode(GenTree* parent,
166+
GenTree* condition,
167+
GenCondition* code,
168+
bool allowMultipleFlagChecks = true);
166169
GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition);
167170
void LowerJmpMethod(GenTree* jmp);
168171
void LowerRet(GenTreeOp* ret);

0 commit comments

Comments
 (0)