Skip to content

Commit c341c3b

Browse files
committed
Enable conditional compare chaining for AMD64.
1 parent a8ce396 commit c341c3b

File tree

14 files changed

+441
-9
lines changed

14 files changed

+441
-9
lines changed

src/coreclr/jit/codegen.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,7 @@ class CodeGen final : public CodeGenInterface
969969
void genIntToFloatCast(GenTree* treeNode);
970970
void genCkfinite(GenTree* treeNode);
971971
void genCodeForCompare(GenTreeOp* tree);
972-
#ifdef TARGET_ARM64
972+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
973973
void genCodeForCCMP(GenTreeCCMP* ccmp);
974974
#endif
975975
void genCodeForSelect(GenTreeOp* select);
@@ -1708,6 +1708,8 @@ class CodeGen final : public CodeGenInterface
17081708
static insOpts ShiftOpToInsOpts(genTreeOps op);
17091709
#elif defined(TARGET_XARCH)
17101710
static instruction JumpKindToCmov(emitJumpKind condition);
1711+
static instruction JumpKindToCcmp(emitJumpKind condition);
1712+
static insOpts OptsFromCFlags(insCflags flags);
17111713
#endif
17121714

17131715
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)

src/coreclr/jit/codegenxarch.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,46 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
15721572
return s_table[condition];
15731573
}
15741574

1575+
//------------------------------------------------------------------------
1576+
// JumpKindToCcmp:
1577+
// Convert an emitJumpKind to the corresponding ccmp instruction.
1578+
//
1579+
// Arguments:
1580+
// condition - the condition
1581+
//
1582+
// Returns:
1583+
// A ccmp instruction.
1584+
//
1585+
instruction CodeGen::JumpKindToCcmp(emitJumpKind condition)
1586+
{
1587+
static constexpr instruction s_table[EJ_COUNT] = {
1588+
INS_none, INS_none, INS_ccmpo, INS_ccmpno, INS_ccmpb, INS_ccmpae, INS_ccmpe, INS_ccmpne, INS_ccmpbe,
1589+
INS_ccmpa, INS_ccmps, INS_ccmpns, INS_none, INS_none, INS_ccmpl, INS_ccmpge, INS_ccmple, INS_ccmpg,
1590+
};
1591+
1592+
static_assert_no_msg(s_table[EJ_NONE] == INS_none);
1593+
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
1594+
static_assert_no_msg(s_table[EJ_jo] == INS_ccmpo);
1595+
static_assert_no_msg(s_table[EJ_jno] == INS_ccmpno);
1596+
static_assert_no_msg(s_table[EJ_jb] == INS_ccmpb);
1597+
static_assert_no_msg(s_table[EJ_jae] == INS_ccmpae);
1598+
static_assert_no_msg(s_table[EJ_je] == INS_ccmpe);
1599+
static_assert_no_msg(s_table[EJ_jne] == INS_ccmpne);
1600+
static_assert_no_msg(s_table[EJ_jbe] == INS_ccmpbe);
1601+
static_assert_no_msg(s_table[EJ_ja] == INS_ccmpa);
1602+
static_assert_no_msg(s_table[EJ_js] == INS_ccmps);
1603+
static_assert_no_msg(s_table[EJ_jns] == INS_ccmpns);
1604+
static_assert_no_msg(s_table[EJ_jp] == INS_none);
1605+
static_assert_no_msg(s_table[EJ_jnp] == INS_none);
1606+
static_assert_no_msg(s_table[EJ_jl] == INS_ccmpl);
1607+
static_assert_no_msg(s_table[EJ_jge] == INS_ccmpge);
1608+
static_assert_no_msg(s_table[EJ_jle] == INS_ccmple);
1609+
static_assert_no_msg(s_table[EJ_jg] == INS_ccmpg);
1610+
1611+
assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
1612+
return s_table[condition];
1613+
}
1614+
15751615
//------------------------------------------------------------------------
15761616
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node.
15771617
//
@@ -2260,6 +2300,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
22602300
// Do nothing; these nodes are simply markers for debug info.
22612301
break;
22622302

2303+
#if defined(TARGET_AMD64)
2304+
case GT_CCMP:
2305+
genCodeForCCMP(treeNode->AsCCMP());
2306+
break;
2307+
#endif
2308+
22632309
default:
22642310
{
22652311
#ifdef DEBUG
@@ -8933,6 +8979,58 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
89338979
regSet.verifyRegistersUsed(killMask);
89348980
}
89358981

8982+
insOpts CodeGen::OptsFromCFlags(insCflags flags)
8983+
{
8984+
unsigned opts = 0x0;
8985+
if (flags & INS_FLAGS_CF)
8986+
opts |= INS_OPTS_EVEX_dfv_cf;
8987+
if (flags & INS_FLAGS_ZF)
8988+
opts |= INS_OPTS_EVEX_dfv_zf;
8989+
if (flags & INS_FLAGS_SF)
8990+
opts |= INS_OPTS_EVEX_dfv_sf;
8991+
if (flags & INS_FLAGS_OF)
8992+
opts |= INS_OPTS_EVEX_dfv_of;
8993+
return (insOpts)opts;
8994+
}
8995+
8996+
#ifdef TARGET_AMD64
8997+
8998+
void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp)
8999+
{
9000+
emitter* emit = GetEmitter();
9001+
assert(emit->UsePromotedEVEXEncoding());
9002+
9003+
genConsumeOperands(ccmp);
9004+
GenTree* op1 = ccmp->gtGetOp1();
9005+
GenTree* op2 = ccmp->gtGetOp2();
9006+
var_types op1Type = genActualType(op1->TypeGet());
9007+
var_types op2Type = genActualType(op2->TypeGet());
9008+
emitAttr cmpSize = emitActualTypeSize(op1Type);
9009+
regNumber srcReg1 = op1->GetRegNum();
9010+
9011+
// No float support or swapping op1 and op2 to generate cmp reg, imm.
9012+
assert(!varTypeIsFloating(op2Type));
9013+
assert(!op1->isContainedIntOrIImmed());
9014+
9015+
// For the ccmp flags, invert the condition of the compare.
9016+
// For the condition, use the previous compare.
9017+
const GenConditionDesc& condDesc = GenConditionDesc::Get(ccmp->gtCondition);
9018+
instruction ccmpIns = JumpKindToCcmp(condDesc.jumpKind1);
9019+
insOpts opts = OptsFromCFlags(ccmp->gtFlagsVal);
9020+
9021+
if (op2->isContainedIntOrIImmed())
9022+
{
9023+
GenTreeIntConCommon* intConst = op2->AsIntConCommon();
9024+
emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts);
9025+
}
9026+
else
9027+
{
9028+
regNumber srcReg2 = op2->GetRegNum();
9029+
emit->emitIns_R_R(ccmpIns, cmpSize, srcReg1, srcReg2, opts);
9030+
}
9031+
}
9032+
#endif // TARGET_AMD64
9033+
89369034
#if defined(DEBUG) && defined(TARGET_AMD64)
89379035

89389036
/*****************************************************************************

src/coreclr/jit/compiler.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7199,6 +7199,9 @@ class Compiler
71997199
PhaseStatus optSwitchRecognition();
72007200
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
72017201
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
7202+
#if defined(TARGET_AMD64)
7203+
bool optSwitchDetectLikely(BasicBlock* firstBlock);
7204+
#endif
72027205

72037206
PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
72047207
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication

src/coreclr/jit/gentree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9562,7 +9562,7 @@ enum insCC : unsigned
95629562
};
95639563
#endif
95649564

9565-
#if defined(TARGET_ARM64)
9565+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
95669566
struct GenTreeCCMP final : public GenTreeOpCC
95679567
{
95689568
insCflags gtFlagsVal;

src/coreclr/jit/gtlist.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,16 @@ GTNODE(JCC , GenTreeCC ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI
244244
GTNODE(SETCC , GenTreeCC ,0,0,GTK_LEAF|DBK_NOTHIR)
245245
// Variant of SELECT that reuses flags computed by a previous node with the specified condition.
246246
GTNODE(SELECTCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR)
247-
#ifdef TARGET_ARM64
248-
// The arm64 ccmp instruction. If the specified condition is true, compares two
247+
248+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
249+
// The arm64 and x86 ccmp instruction. If the specified condition is true, compares two
249250
// operands and sets the condition flags according to the result. Otherwise
250251
// sets the condition flags to the specified immediate value.
251252
GTNODE(CCMP , GenTreeCCMP ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
253+
#endif
254+
255+
256+
#ifdef TARGET_ARM64
252257
// Maps to arm64 csinc/cinc instruction. Computes result = condition ? op1 : op2 + 1.
253258
// If op2 is null, computes result = condition ? op1 + 1 : op1.
254259
GTNODE(SELECT_INC , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR)

src/coreclr/jit/gtstructs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
116116
GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP)
117117
GTSTRUCT_1(ArrAddr , GT_ARR_ADDR)
118118
GTSTRUCT_2(CC , GT_JCC, GT_SETCC)
119-
#ifdef TARGET_ARM64
119+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
120120
GTSTRUCT_1(CCMP , GT_CCMP)
121+
#endif
122+
#ifdef TARGET_ARM64
121123
GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SELECT_INVCC, GT_SELECT_NEGCC)
122124
#else
123125
GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST)

src/coreclr/jit/jitconfigvalues.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ RELEASE_CONFIG_INTEGER(EnableArm64Sve, "EnableArm64Sve",
443443
RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, "EnableEmbeddedBroadcast", 1) // Allows embedded broadcasts to be disabled
444444
RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", 1) // Allows embedded masking to be disabled
445445
RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled
446+
RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining
446447

447448
// clang-format on
448449

src/coreclr/jit/lower.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4590,7 +4590,10 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select)
45904590
// Return Value:
45914591
// True if relop was transformed and is now right before 'parent'; otherwise false.
45924592
//
4593-
bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond)
4593+
bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent,
4594+
GenTree* condition,
4595+
GenCondition* cond,
4596+
bool allowMultipleFlagsChecks)
45944597
{
45954598
JITDUMP("Lowering condition:\n");
45964599
DISPTREERANGE(BlockRange(), condition);
@@ -4612,6 +4615,11 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition,
46124615
GenTree* relopOp2 = relop->gtGetOp2();
46134616

46144617
#ifdef TARGET_XARCH
4618+
if (!allowMultipleFlagsChecks && cond->IsFloat())
4619+
{
4620+
return false;
4621+
}
4622+
46154623
// Optimize FP x != x to only check parity flag. This is a common way of
46164624
// checking NaN and avoids two branches that we would otherwise emit.
46174625
if (optimizing && (cond->GetCode() == GenCondition::FNEU) && relopOp1->OperIsLocal() &&

src/coreclr/jit/lower.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class Lowering final : public Phase
8989
void ContainCheckReturnTrap(GenTreeOp* node);
9090
void ContainCheckLclHeap(GenTreeOp* node);
9191
void ContainCheckRet(GenTreeUnOp* ret);
92-
#ifdef TARGET_ARM64
92+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
9393
bool TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next);
9494
insCflags TruthifyingFlags(GenCondition cond);
9595
void ContainCheckConditionalCompare(GenTreeCCMP* ccmp);
@@ -161,7 +161,10 @@ class Lowering final : public Phase
161161
GenTree* LowerCompare(GenTree* cmp);
162162
GenTree* LowerJTrue(GenTreeOp* jtrue);
163163
GenTree* LowerSelect(GenTreeConditional* cond);
164-
bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* code);
164+
bool TryLowerConditionToFlagsNode(GenTree* parent,
165+
GenTree* condition,
166+
GenCondition* code,
167+
bool allowMultipleFlagChecks = true);
165168
GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition);
166169
void LowerJmpMethod(GenTree* jmp);
167170
void LowerRet(GenTreeOp* ret);

0 commit comments

Comments
 (0)