Skip to content

Commit 9e8a9e9

Browse files
[JIT] Enable conditional chaining for Intel APX (#111072)
* Enable conditional compare chaining for AMD64. * Reduce duplication from `optSwitchDetectLikely`. * Update src/coreclr/jit/lsrabuild.cpp Co-authored-by: Bruce Forstall <[email protected]> * Update src/coreclr/jit/lowerxarch.cpp Co-authored-by: Bruce Forstall <[email protected]> * Update src/coreclr/jit/lowerxarch.cpp Co-authored-by: Bruce Forstall <[email protected]> * Widen the potential candidates for ccmp folding. Also lifts GenConditionDesc into CodeGenInterface to better check which flag lowerings will produce multiple instructions. * Refactor some common code into lower.cpp. Some code will conflict with latest changes. I've squashed so we can discuss how to merge in properly. * Refactored common code out. * Review edits. * Fix build errors. * Formatting. --------- Co-authored-by: Bruce Forstall <[email protected]>
1 parent 513c52b commit 9e8a9e9

19 files changed

+463
-193
lines changed

src/coreclr/jit/codegen.h

Lines changed: 4 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -965,7 +965,7 @@ class CodeGen final : public CodeGenInterface
965965
void genIntToFloatCast(GenTree* treeNode);
966966
void genCkfinite(GenTree* treeNode);
967967
void genCodeForCompare(GenTreeOp* tree);
968-
#ifdef TARGET_ARM64
968+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
969969
void genCodeForCCMP(GenTreeCCMP* ccmp);
970970
#endif
971971
void genCodeForSelect(GenTreeOp* select);
@@ -1706,53 +1706,13 @@ class CodeGen final : public CodeGenInterface
17061706
static insOpts ShiftOpToInsOpts(genTreeOps op);
17071707
#elif defined(TARGET_XARCH)
17081708
static instruction JumpKindToCmov(emitJumpKind condition);
1709+
static instruction JumpKindToCcmp(emitJumpKind condition);
1710+
static insOpts OptsFromCFlags(insCflags flags);
17091711
#endif
1710-
1711-
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
1712-
// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
1713-
// such as X86's SETcc. A sequence of instructions rather than just a single one is required for
1714-
// certain floating point conditions.
1715-
// For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF,
1716-
// to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0
1717-
// and then jump if ZF is 1:
1718-
// JP fallThroughBlock
1719-
// JE jumpDestBlock
1720-
// fallThroughBlock:
1721-
// ...
1722-
// jumpDestBlock:
1723-
//
1724-
// This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so
1725-
// in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like
1726-
// pattern is used to encode the above:
1727-
// { EJ_jnp, GT_AND, EJ_je }
1728-
// { EJ_jp, GT_OR, EJ_jne }
1729-
//
1730-
// For more details check inst_JCC and inst_SETCC functions.
1731-
//
1732-
struct GenConditionDesc
1733-
{
1734-
emitJumpKind jumpKind1;
1735-
genTreeOps oper;
1736-
emitJumpKind jumpKind2;
1737-
char padTo4Bytes;
1738-
1739-
static const GenConditionDesc& Get(GenCondition condition)
1740-
{
1741-
assert(condition.GetCode() < ArrLen(map));
1742-
const GenConditionDesc& desc = map[condition.GetCode()];
1743-
assert(desc.jumpKind1 != EJ_NONE);
1744-
assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR));
1745-
assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE));
1746-
return desc;
1747-
}
1748-
1749-
private:
1750-
static const GenConditionDesc map[32];
1751-
};
1752-
17531712
void inst_JCC(GenCondition condition, BasicBlock* target);
17541713
void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg);
17551714

1715+
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
17561716
void genCodeForJcc(GenTreeCC* tree);
17571717
void genCodeForSetcc(GenTreeCC* setcc);
17581718
void genCodeForJTrue(GenTreeOp* jtrue);

src/coreclr/jit/codegenarmarch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4162,7 +4162,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
41624162
}
41634163

41644164
// clang-format off
4165-
const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
4165+
const GenConditionDesc GenConditionDesc::map[32]
41664166
{
41674167
{ }, // NONE
41684168
{ }, // 1

src/coreclr/jit/codegeninterface.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -828,4 +828,47 @@ class CodeGenInterface
828828
#endif
829829
};
830830

831+
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
832+
// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions
833+
// such as X86's SETcc. A sequence of instructions rather than just a single one is required for
834+
// certain floating point conditions.
835+
// For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF,
836+
// to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0
837+
// and then jump if ZF is 1:
838+
// JP fallThroughBlock
839+
// JE jumpDestBlock
840+
// fallThroughBlock:
841+
// ...
842+
// jumpDestBlock:
843+
//
844+
// This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so
845+
// in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like
846+
// pattern is used to encode the above:
847+
// { EJ_jnp, GT_AND, EJ_je }
848+
// { EJ_jp, GT_OR, EJ_jne }
849+
//
850+
// For more details check inst_JCC and inst_SETCC functions.
851+
//
852+
struct GenConditionDesc
853+
{
854+
emitJumpKind jumpKind1;
855+
genTreeOps oper;
856+
emitJumpKind jumpKind2;
857+
char padTo4Bytes;
858+
859+
static const GenConditionDesc& Get(GenCondition condition)
860+
{
861+
assert(condition.GetCode() < ArrLen(map));
862+
const GenConditionDesc& desc = map[condition.GetCode()];
863+
assert(desc.jumpKind1 != EJ_NONE);
864+
assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR));
865+
assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE));
866+
return desc;
867+
}
868+
869+
private:
870+
static const GenConditionDesc map[32];
871+
};
872+
#endif // !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
873+
831874
#endif // _CODEGEN_INTERFACE_H_

src/coreclr/jit/codegenxarch.cpp

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1575,6 +1575,46 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
15751575
return s_table[condition];
15761576
}
15771577

1578+
//------------------------------------------------------------------------
1579+
// JumpKindToCcmp:
1580+
// Convert an emitJumpKind to the corresponding ccmp instruction.
1581+
//
1582+
// Arguments:
1583+
// condition - the condition
1584+
//
1585+
// Returns:
1586+
// A ccmp instruction.
1587+
//
1588+
instruction CodeGen::JumpKindToCcmp(emitJumpKind condition)
1589+
{
1590+
static constexpr instruction s_table[EJ_COUNT] = {
1591+
INS_none, INS_none, INS_ccmpo, INS_ccmpno, INS_ccmpb, INS_ccmpae, INS_ccmpe, INS_ccmpne, INS_ccmpbe,
1592+
INS_ccmpa, INS_ccmps, INS_ccmpns, INS_none, INS_none, INS_ccmpl, INS_ccmpge, INS_ccmple, INS_ccmpg,
1593+
};
1594+
1595+
static_assert_no_msg(s_table[EJ_NONE] == INS_none);
1596+
static_assert_no_msg(s_table[EJ_jmp] == INS_none);
1597+
static_assert_no_msg(s_table[EJ_jo] == INS_ccmpo);
1598+
static_assert_no_msg(s_table[EJ_jno] == INS_ccmpno);
1599+
static_assert_no_msg(s_table[EJ_jb] == INS_ccmpb);
1600+
static_assert_no_msg(s_table[EJ_jae] == INS_ccmpae);
1601+
static_assert_no_msg(s_table[EJ_je] == INS_ccmpe);
1602+
static_assert_no_msg(s_table[EJ_jne] == INS_ccmpne);
1603+
static_assert_no_msg(s_table[EJ_jbe] == INS_ccmpbe);
1604+
static_assert_no_msg(s_table[EJ_ja] == INS_ccmpa);
1605+
static_assert_no_msg(s_table[EJ_js] == INS_ccmps);
1606+
static_assert_no_msg(s_table[EJ_jns] == INS_ccmpns);
1607+
static_assert_no_msg(s_table[EJ_jp] == INS_none);
1608+
static_assert_no_msg(s_table[EJ_jnp] == INS_none);
1609+
static_assert_no_msg(s_table[EJ_jl] == INS_ccmpl);
1610+
static_assert_no_msg(s_table[EJ_jge] == INS_ccmpge);
1611+
static_assert_no_msg(s_table[EJ_jle] == INS_ccmple);
1612+
static_assert_no_msg(s_table[EJ_jg] == INS_ccmpg);
1613+
1614+
assert((condition >= EJ_NONE) && (condition < EJ_COUNT));
1615+
return s_table[condition];
1616+
}
1617+
15781618
//------------------------------------------------------------------------
15791619
// genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node.
15801620
//
@@ -1671,7 +1711,7 @@ void CodeGen::genCodeForSelect(GenTreeOp* select)
16711711
}
16721712

16731713
// clang-format off
1674-
const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32]
1714+
const GenConditionDesc GenConditionDesc::map[32]
16751715
{
16761716
{ }, // NONE
16771717
{ }, // 1
@@ -2272,6 +2312,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
22722312
// Do nothing; these nodes are simply markers for debug info.
22732313
break;
22742314

2315+
#if defined(TARGET_AMD64)
2316+
case GT_CCMP:
2317+
genCodeForCCMP(treeNode->AsCCMP());
2318+
break;
2319+
#endif
2320+
22752321
default:
22762322
{
22772323
#ifdef DEBUG
@@ -8928,6 +8974,84 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
89288974
regSet.verifyRegistersUsed(killMask);
89298975
}
89308976

8977+
//-----------------------------------------------------------------------------------------
8978+
// OptsFromCFlags - Convert condition flags into approxpriate insOpts.
8979+
//
8980+
// Arguments:
8981+
// flags - The condition flags to be converted.
8982+
//
8983+
// Return Value:
8984+
// An insOpts value encoding the condition flags.
8985+
//
8986+
// Notes:
8987+
// This function maps the condition flags (e.g., CF, ZF, SF, OF) to the appropriate
8988+
// instruction options used for setting the default flag values in extneded EVEX
8989+
// encoding conditional instructions.
8990+
//
8991+
insOpts CodeGen::OptsFromCFlags(insCflags flags)
8992+
{
8993+
unsigned opts = 0x0;
8994+
if (flags & INS_FLAGS_CF)
8995+
opts |= INS_OPTS_EVEX_dfv_cf;
8996+
if (flags & INS_FLAGS_ZF)
8997+
opts |= INS_OPTS_EVEX_dfv_zf;
8998+
if (flags & INS_FLAGS_SF)
8999+
opts |= INS_OPTS_EVEX_dfv_sf;
9000+
if (flags & INS_FLAGS_OF)
9001+
opts |= INS_OPTS_EVEX_dfv_of;
9002+
return (insOpts)opts;
9003+
}
9004+
9005+
#ifdef TARGET_AMD64
9006+
9007+
//-----------------------------------------------------------------------------------------
9008+
// genCodeForCCMP - Generate code for a conditional compare (CCMP) node.
9009+
//
9010+
// Arguments:
9011+
// ccmp - The GenTreeCCMP node representing the conditional compare.
9012+
//
9013+
// Return Value:
9014+
// None.
9015+
//
9016+
// Notes:
9017+
// This function generates code for a conditional compare operation. On X86,
9018+
// comparisons using the extended EVEX encoding and ccmp instruction.
9019+
void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp)
9020+
{
9021+
emitter* emit = GetEmitter();
9022+
assert(emit->UsePromotedEVEXEncoding());
9023+
9024+
genConsumeOperands(ccmp);
9025+
GenTree* op1 = ccmp->gtGetOp1();
9026+
GenTree* op2 = ccmp->gtGetOp2();
9027+
var_types op1Type = genActualType(op1->TypeGet());
9028+
var_types op2Type = genActualType(op2->TypeGet());
9029+
emitAttr cmpSize = emitActualTypeSize(op1Type);
9030+
regNumber srcReg1 = op1->GetRegNum();
9031+
9032+
// No float support or swapping op1 and op2 to generate cmp reg, imm.
9033+
assert(!varTypeIsFloating(op2Type));
9034+
assert(!op1->isContainedIntOrIImmed());
9035+
9036+
// For the ccmp flags, invert the condition of the compare.
9037+
// For the condition, use the previous compare.
9038+
const GenConditionDesc& condDesc = GenConditionDesc::Get(ccmp->gtCondition);
9039+
instruction ccmpIns = JumpKindToCcmp(condDesc.jumpKind1);
9040+
insOpts opts = OptsFromCFlags(ccmp->gtFlagsVal);
9041+
9042+
if (op2->isContainedIntOrIImmed())
9043+
{
9044+
GenTreeIntConCommon* intConst = op2->AsIntConCommon();
9045+
emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts);
9046+
}
9047+
else
9048+
{
9049+
regNumber srcReg2 = op2->GetRegNum();
9050+
emit->emitIns_R_R(ccmpIns, cmpSize, srcReg1, srcReg2, opts);
9051+
}
9052+
}
9053+
#endif // TARGET_AMD64
9054+
89319055
#if defined(DEBUG) && defined(TARGET_AMD64)
89329056

89339057
/*****************************************************************************

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7016,7 +7016,7 @@ class Compiler
70167016
PhaseStatus optOptimizeBools();
70177017
PhaseStatus optRecognizeAndOptimizeSwitchJumps();
70187018
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
7019-
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
7019+
bool optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingForConversion = false);
70207020

70217021
PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
70227022
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication

src/coreclr/jit/emitxarch.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20116,6 +20116,14 @@ emitter::insFormat emitter::ExtractMemoryFormat(insFormat insFmt) const
2011620116
return IF_NONE;
2011720117
}
2011820118

20119+
#ifdef TARGET_AMD64
20120+
// true if this 'imm' can be encoded as a input operand to a ccmp instruction
20121+
/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm)
20122+
{
20123+
return (((INT32)imm) == imm);
20124+
}
20125+
#endif
20126+
2011920127
#if defined(DEBUG) || defined(LATE_DISASM)
2012020128

2012120129
//----------------------------------------------------------------------------------------

src/coreclr/jit/emitxarch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1327,4 +1327,9 @@ inline bool HasExtendedGPReg(const instrDesc* id) const;
13271327

13281328
inline bool HasMaskReg(const instrDesc* id) const;
13291329

1330+
#ifdef TARGET_AMD64
1331+
// true if this 'imm' can be encoded as a input operand to a ccmp instruction
1332+
static bool emitIns_valid_imm_for_ccmp(INT64 imm);
1333+
#endif // TARGET_AMD64
1334+
13301335
#endif // TARGET_XARCH

src/coreclr/jit/gentree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9417,7 +9417,7 @@ enum insCC : unsigned
94179417
};
94189418
#endif
94199419

9420-
#if defined(TARGET_ARM64)
9420+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
94219421
struct GenTreeCCMP final : public GenTreeOpCC
94229422
{
94239423
insCflags gtFlagsVal;

src/coreclr/jit/gtlist.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,11 +245,16 @@ GTNODE(JCC , GenTreeCC ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI
245245
GTNODE(SETCC , GenTreeCC ,0,0,GTK_LEAF|DBK_NOTHIR)
246246
// Variant of SELECT that reuses flags computed by a previous node with the specified condition.
247247
GTNODE(SELECTCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR)
248-
#ifdef TARGET_ARM64
249-
// The arm64 ccmp instruction. If the specified condition is true, compares two
248+
249+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
250+
// The arm64 and x86 ccmp instruction. If the specified condition is true, compares two
250251
// operands and sets the condition flags according to the result. Otherwise
251252
// sets the condition flags to the specified immediate value.
252253
GTNODE(CCMP , GenTreeCCMP ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
254+
#endif
255+
256+
257+
#ifdef TARGET_ARM64
253258
// Maps to arm64 csinc/cinc instruction. Computes result = condition ? op1 : op2 + 1.
254259
// If op2 is null, computes result = condition ? op1 + 1 : op1.
255260
GTNODE(SELECT_INC , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR)

src/coreclr/jit/gtstructs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
116116
GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP)
117117
GTSTRUCT_1(ArrAddr , GT_ARR_ADDR)
118118
GTSTRUCT_2(CC , GT_JCC, GT_SETCC)
119-
#ifdef TARGET_ARM64
119+
#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
120120
GTSTRUCT_1(CCMP , GT_CCMP)
121+
#endif
122+
#ifdef TARGET_ARM64
121123
GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SELECT_INVCC, GT_SELECT_NEGCC)
122124
#else
123125
GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST)

0 commit comments

Comments
 (0)