diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index fabf3ec922bf42..99cb565b68ea62 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -965,7 +965,7 @@ class CodeGen final : public CodeGenInterface void genIntToFloatCast(GenTree* treeNode); void genCkfinite(GenTree* treeNode); void genCodeForCompare(GenTreeOp* tree); -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) void genCodeForCCMP(GenTreeCCMP* ccmp); #endif void genCodeForSelect(GenTreeOp* select); @@ -1706,53 +1706,13 @@ class CodeGen final : public CodeGenInterface static insOpts ShiftOpToInsOpts(genTreeOps op); #elif defined(TARGET_XARCH) static instruction JumpKindToCmov(emitJumpKind condition); + static instruction JumpKindToCcmp(emitJumpKind condition); + static insOpts OptsFromCFlags(insCflags flags); #endif - -#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) - // Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions - // such as X86's SETcc. A sequence of instructions rather than just a single one is required for - // certain floating point conditions. - // For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF, - // to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0 - // and then jump if ZF is 1: - // JP fallThroughBlock - // JE jumpDestBlock - // fallThroughBlock: - // ... - // jumpDestBlock: - // - // This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so - // in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like - // pattern is used to encode the above: - // { EJ_jnp, GT_AND, EJ_je } - // { EJ_jp, GT_OR, EJ_jne } - // - // For more details check inst_JCC and inst_SETCC functions. - // - struct GenConditionDesc - { - emitJumpKind jumpKind1; - genTreeOps oper; - emitJumpKind jumpKind2; - char padTo4Bytes; - - static const GenConditionDesc& Get(GenCondition condition) - { - assert(condition.GetCode() < ArrLen(map)); - const GenConditionDesc& desc = map[condition.GetCode()]; - assert(desc.jumpKind1 != EJ_NONE); - assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR)); - assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE)); - return desc; - } - - private: - static const GenConditionDesc map[32]; - }; - void inst_JCC(GenCondition condition, BasicBlock* target); void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg); +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) void genCodeForJcc(GenTreeCC* tree); void genCodeForSetcc(GenTreeCC* setcc); void genCodeForJTrue(GenTreeOp* jtrue); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 1a09056ba5a91e..c4cbf359d87229 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4162,7 +4162,7 @@ void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, } // clang-format off -const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] +const GenConditionDesc GenConditionDesc::map[32] { { }, // NONE { }, // 1 diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index bdd95f2b761d94..61f056d93f9ac0 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -828,4 +828,47 @@ class CodeGenInterface #endif }; +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) +// Maps a GenCondition code to a sequence of conditional jumps or other conditional instructions +// such as X86's SETcc. A sequence of instructions rather than just a single one is required for +// certain floating point conditions. +// For example, X86's UCOMISS sets ZF to indicate equality but it also sets it, together with PF, +// to indicate an unordered result. So for GenCondition::FEQ we first need to check if PF is 0 +// and then jump if ZF is 1: +// JP fallThroughBlock +// JE jumpDestBlock +// fallThroughBlock: +// ... +// jumpDestBlock: +// +// This is very similar to the way shortcircuit evaluation of bool AND and OR operators works so +// in order to make the GenConditionDesc mapping tables easier to read, a bool expression-like +// pattern is used to encode the above: +// { EJ_jnp, GT_AND, EJ_je } +// { EJ_jp, GT_OR, EJ_jne } +// +// For more details check inst_JCC and inst_SETCC functions. +// +struct GenConditionDesc +{ + emitJumpKind jumpKind1; + genTreeOps oper; + emitJumpKind jumpKind2; + char padTo4Bytes; + + static const GenConditionDesc& Get(GenCondition condition) + { + assert(condition.GetCode() < ArrLen(map)); + const GenConditionDesc& desc = map[condition.GetCode()]; + assert(desc.jumpKind1 != EJ_NONE); + assert((desc.oper == GT_NONE) || (desc.oper == GT_AND) || (desc.oper == GT_OR)); + assert((desc.oper == GT_NONE) == (desc.jumpKind2 == EJ_NONE)); + return desc; + } + +private: + static const GenConditionDesc map[32]; +}; +#endif // !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) + #endif // _CODEGEN_INTERFACE_H_ diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index b02777d67ae517..30de4979515d03 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1573,6 +1573,46 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition) return s_table[condition]; } +//------------------------------------------------------------------------ +// JumpKindToCcmp: +// Convert an emitJumpKind to the corresponding ccmp instruction. +// +// Arguments: +// condition - the condition +// +// Returns: +// A ccmp instruction. +// +instruction CodeGen::JumpKindToCcmp(emitJumpKind condition) +{ + static constexpr instruction s_table[EJ_COUNT] = { + INS_none, INS_none, INS_ccmpo, INS_ccmpno, INS_ccmpb, INS_ccmpae, INS_ccmpe, INS_ccmpne, INS_ccmpbe, + INS_ccmpa, INS_ccmps, INS_ccmpns, INS_none, INS_none, INS_ccmpl, INS_ccmpge, INS_ccmple, INS_ccmpg, + }; + + static_assert_no_msg(s_table[EJ_NONE] == INS_none); + static_assert_no_msg(s_table[EJ_jmp] == INS_none); + static_assert_no_msg(s_table[EJ_jo] == INS_ccmpo); + static_assert_no_msg(s_table[EJ_jno] == INS_ccmpno); + static_assert_no_msg(s_table[EJ_jb] == INS_ccmpb); + static_assert_no_msg(s_table[EJ_jae] == INS_ccmpae); + static_assert_no_msg(s_table[EJ_je] == INS_ccmpe); + static_assert_no_msg(s_table[EJ_jne] == INS_ccmpne); + static_assert_no_msg(s_table[EJ_jbe] == INS_ccmpbe); + static_assert_no_msg(s_table[EJ_ja] == INS_ccmpa); + static_assert_no_msg(s_table[EJ_js] == INS_ccmps); + static_assert_no_msg(s_table[EJ_jns] == INS_ccmpns); + static_assert_no_msg(s_table[EJ_jp] == INS_none); + static_assert_no_msg(s_table[EJ_jnp] == INS_none); + static_assert_no_msg(s_table[EJ_jl] == INS_ccmpl); + static_assert_no_msg(s_table[EJ_jge] == INS_ccmpge); + static_assert_no_msg(s_table[EJ_jle] == INS_ccmple); + static_assert_no_msg(s_table[EJ_jg] == INS_ccmpg); + + assert((condition >= EJ_NONE) && (condition < EJ_COUNT)); + return s_table[condition]; +} + //------------------------------------------------------------------------ // genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node. // @@ -1669,7 +1709,7 @@ void CodeGen::genCodeForSelect(GenTreeOp* select) } // clang-format off -const CodeGen::GenConditionDesc CodeGen::GenConditionDesc::map[32] +const GenConditionDesc GenConditionDesc::map[32] { { }, // NONE { }, // 1 @@ -2270,6 +2310,12 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) // Do nothing; these nodes are simply markers for debug info. break; +#if defined(TARGET_AMD64) + case GT_CCMP: + genCodeForCCMP(treeNode->AsCCMP()); + break; +#endif + default: { #ifdef DEBUG @@ -8926,6 +8972,84 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regSet.verifyRegistersUsed(killMask); } +//----------------------------------------------------------------------------------------- +// OptsFromCFlags - Convert condition flags into approxpriate insOpts. +// +// Arguments: +// flags - The condition flags to be converted. +// +// Return Value: +// An insOpts value encoding the condition flags. +// +// Notes: +// This function maps the condition flags (e.g., CF, ZF, SF, OF) to the appropriate +// instruction options used for setting the default flag values in extneded EVEX +// encoding conditional instructions. +// +insOpts CodeGen::OptsFromCFlags(insCflags flags) +{ + unsigned opts = 0x0; + if (flags & INS_FLAGS_CF) + opts |= INS_OPTS_EVEX_dfv_cf; + if (flags & INS_FLAGS_ZF) + opts |= INS_OPTS_EVEX_dfv_zf; + if (flags & INS_FLAGS_SF) + opts |= INS_OPTS_EVEX_dfv_sf; + if (flags & INS_FLAGS_OF) + opts |= INS_OPTS_EVEX_dfv_of; + return (insOpts)opts; +} + +#ifdef TARGET_AMD64 + +//----------------------------------------------------------------------------------------- +// genCodeForCCMP - Generate code for a conditional compare (CCMP) node. +// +// Arguments: +// ccmp - The GenTreeCCMP node representing the conditional compare. +// +// Return Value: +// None. +// +// Notes: +// This function generates code for a conditional compare operation. On X86, +// comparisons using the extended EVEX encoding and ccmp instruction. +void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp) +{ + emitter* emit = GetEmitter(); + assert(emit->UsePromotedEVEXEncoding()); + + genConsumeOperands(ccmp); + GenTree* op1 = ccmp->gtGetOp1(); + GenTree* op2 = ccmp->gtGetOp2(); + var_types op1Type = genActualType(op1->TypeGet()); + var_types op2Type = genActualType(op2->TypeGet()); + emitAttr cmpSize = emitActualTypeSize(op1Type); + regNumber srcReg1 = op1->GetRegNum(); + + // No float support or swapping op1 and op2 to generate cmp reg, imm. + assert(!varTypeIsFloating(op2Type)); + assert(!op1->isContainedIntOrIImmed()); + + // For the ccmp flags, invert the condition of the compare. + // For the condition, use the previous compare. + const GenConditionDesc& condDesc = GenConditionDesc::Get(ccmp->gtCondition); + instruction ccmpIns = JumpKindToCcmp(condDesc.jumpKind1); + insOpts opts = OptsFromCFlags(ccmp->gtFlagsVal); + + if (op2->isContainedIntOrIImmed()) + { + GenTreeIntConCommon* intConst = op2->AsIntConCommon(); + emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts); + } + else + { + regNumber srcReg2 = op2->GetRegNum(); + emit->emitIns_R_R(ccmpIns, cmpSize, srcReg1, srcReg2, opts); + } +} +#endif // TARGET_AMD64 + #if defined(DEBUG) && defined(TARGET_AMD64) /***************************************************************************** diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4571efd26adb0b..3b2d67a55cede2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7013,7 +7013,7 @@ class Compiler PhaseStatus optOptimizeBools(); PhaseStatus optRecognizeAndOptimizeSwitchJumps(); bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest); - bool optSwitchDetectAndConvert(BasicBlock* firstBlock); + bool optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingForConversion = false); PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom. PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index b227f628f79693..84b97b1fd73833 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -20116,6 +20116,14 @@ emitter::insFormat emitter::ExtractMemoryFormat(insFormat insFmt) const return IF_NONE; } +#ifdef TARGET_AMD64 +// true if this 'imm' can be encoded as a input operand to a ccmp instruction +/*static*/ bool emitter::emitIns_valid_imm_for_ccmp(INT64 imm) +{ + return (((INT32)imm) == imm); +} +#endif + #if defined(DEBUG) || defined(LATE_DISASM) //---------------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 41f1b547935d2c..d7d9d6bfd31ac0 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -1327,4 +1327,9 @@ inline bool HasExtendedGPReg(const instrDesc* id) const; inline bool HasMaskReg(const instrDesc* id) const; +#ifdef TARGET_AMD64 +// true if this 'imm' can be encoded as a input operand to a ccmp instruction +static bool emitIns_valid_imm_for_ccmp(INT64 imm); +#endif // TARGET_AMD64 + #endif // TARGET_XARCH diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index e19de5dad0c844..f3fb94b09429e8 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -9417,7 +9417,7 @@ enum insCC : unsigned }; #endif -#if defined(TARGET_ARM64) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) struct GenTreeCCMP final : public GenTreeOpCC { insCflags gtFlagsVal; diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index ff7ed26f311e61..7a13212fa6eaf3 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -245,11 +245,16 @@ GTNODE(JCC , GenTreeCC ,0,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHI GTNODE(SETCC , GenTreeCC ,0,0,GTK_LEAF|DBK_NOTHIR) // Variant of SELECT that reuses flags computed by a previous node with the specified condition. GTNODE(SELECTCC , GenTreeOpCC ,0,0,GTK_BINOP|DBK_NOTHIR) -#ifdef TARGET_ARM64 -// The arm64 ccmp instruction. If the specified condition is true, compares two + +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) +// The arm64 and x86 ccmp instruction. If the specified condition is true, compares two // operands and sets the condition flags according to the result. Otherwise // sets the condition flags to the specified immediate value. GTNODE(CCMP , GenTreeCCMP ,0,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR) +#endif + + +#ifdef TARGET_ARM64 // Maps to arm64 csinc/cinc instruction. Computes result = condition ? op1 : op2 + 1. // If op2 is null, computes result = condition ? op1 + 1 : op1. GTNODE(SELECT_INC , GenTreeOp ,0,0,GTK_BINOP|DBK_NOTHIR) diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index 26f88d17909974..4fea465d726580 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -116,8 +116,10 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ) GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP) GTSTRUCT_1(ArrAddr , GT_ARR_ADDR) GTSTRUCT_2(CC , GT_JCC, GT_SETCC) -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) GTSTRUCT_1(CCMP , GT_CCMP) +#endif +#ifdef TARGET_ARM64 GTSTRUCT_N(OpCC , GT_SELECTCC, GT_SELECT_INCCC, GT_JCMP, GT_JTEST, GT_SELECT_INVCC, GT_SELECT_NEGCC) #else GTSTRUCT_3(OpCC , GT_SELECTCC, GT_JCMP, GT_JTEST) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 6988d11debe332..11a9dbb11cba60 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -446,6 +446,7 @@ RELEASE_CONFIG_INTEGER(EnableRiscV64Zbb, "EnableRiscV64Zbb", RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, "EnableEmbeddedBroadcast", 1) // Allows embedded broadcasts to be disabled RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", 1) // Allows embedded masking to be disabled RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled +RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining // clang-format on diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a859ade2eb780e..9784dd6d55bd5d 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -859,7 +859,7 @@ GenTree* Lowering::LowerArrLength(GenTreeArrCommon* node) * and LinearCodeGen will be responsible to generate downstream). * * This way there are no implicit temporaries. - * + * b) For small-sized switches, we will actually morph them into a series of conditionals of the form * if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case } * (For the default case conditional, we'll be constructing the exact same code as the jump table case one). @@ -4428,7 +4428,10 @@ GenTree* Lowering::LowerSelect(GenTreeConditional* select) // Return Value: // True if relop was transformed and is now right before 'parent'; otherwise false. // -bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* cond) +bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, + GenTree* condition, + GenCondition* cond, + bool allowMultipleFlagsChecks) { JITDUMP("Lowering condition:\n"); DISPTREERANGE(BlockRange(), condition); @@ -4460,6 +4463,18 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, } #endif +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) + if (!allowMultipleFlagsChecks) + { + const GenConditionDesc& desc = GenConditionDesc::Get(*cond); + + if (desc.oper != GT_NONE) + { + return false; + } + } +#endif + relop->gtType = TYP_VOID; relop->gtFlags |= GTF_SET_FLAGS; @@ -4498,7 +4513,7 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, { assert((condition->gtPrev->gtFlags & GTF_SET_FLAGS) != 0); GenTree* flagsDef = condition->gtPrev; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) // CCMP is a flag producing node that also consumes flags, so find the // "root" of the flags producers and move the entire range. // We limit this to 10 nodes look back to avoid quadratic behavior. @@ -4515,6 +4530,18 @@ bool Lowering::TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, *cond = condition->AsCC()->gtCondition; +#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) + if (!allowMultipleFlagsChecks) + { + const GenConditionDesc& desc = GenConditionDesc::Get(*cond); + + if (desc.oper != GT_NONE) + { + return false; + } + } +#endif + LIR::Range range = BlockRange().Remove(flagsDef, condition->gtPrev); BlockRange().InsertBefore(parent, std::move(range)); BlockRange().Remove(condition); @@ -11056,6 +11083,134 @@ bool Lowering::TryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode) return true; } +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +//------------------------------------------------------------------------ +// TryLowerAndOrToCCMP : Lower AND/OR of two conditions into test + CCMP + SETCC nodes. +// +// Arguments: +// tree - pointer to the node +// next - [out] Next node to lower if this function returns true +// +// Return Value: +// false if no changes were made +// +bool Lowering::TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next) +{ + assert(tree->OperIs(GT_AND, GT_OR)); + + if (!comp->opts.OptimizationEnabled()) + { + return false; + } + + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + + if ((op1->OperIsCmpCompare() && varTypeIsIntegralOrI(op1->gtGetOp1())) || + (op2->OperIsCmpCompare() && varTypeIsIntegralOrI(op2->gtGetOp1()))) + { + JITDUMP("[%06u] is a potential candidate for CCMP:\n", Compiler::dspTreeID(tree)); + DISPTREERANGE(BlockRange(), tree); + JITDUMP("\n"); + } + + // Find out whether an operand is eligible to be converted to a conditional + // compare. It must be a normal integral relop; for example, we cannot + // conditionally perform a floating point comparison and there is no "ctst" + // instruction that would allow us to conditionally implement + // TEST_EQ/TEST_NE. + // + // For the other operand we can allow more arbitrary operations that set + // the condition flags; the final transformation into the flags def is done + // by TryLowerConditionToFlagsNode. + // + GenCondition cond1; + if (op2->OperIsCmpCompare() && varTypeIsIntegralOrI(op2->gtGetOp1()) && IsInvariantInRange(op2, tree) && + (op2->gtGetOp1()->IsIntegralConst() || !op2->gtGetOp1()->isContained()) && + (op2->gtGetOp2() == nullptr || op2->gtGetOp2()->IsIntegralConst() || !op2->gtGetOp2()->isContained()) && + TryLowerConditionToFlagsNode(tree, op1, &cond1, false)) + { + // Fall through, converting op2 to the CCMP + } + else if (op1->OperIsCmpCompare() && varTypeIsIntegralOrI(op1->gtGetOp1()) && IsInvariantInRange(op1, tree) && + (op1->gtGetOp1()->IsIntegralConst() || !op1->gtGetOp1()->isContained()) && + (op1->gtGetOp2() == nullptr || op1->gtGetOp2()->IsIntegralConst() || !op1->gtGetOp2()->isContained()) && + TryLowerConditionToFlagsNode(tree, op2, &cond1, false)) + { + std::swap(op1, op2); + } + else + { + JITDUMP(" ..could not turn [%06u] or [%06u] into a def of flags, bailing\n", Compiler::dspTreeID(op1), + Compiler::dspTreeID(op2)); + return false; + } + + BlockRange().Remove(op2); + BlockRange().InsertBefore(tree, op2); + + GenCondition cond2 = GenCondition::FromRelop(op2); + op2->SetOper(GT_CCMP); + op2->gtType = TYP_VOID; + op2->gtFlags |= GTF_SET_FLAGS; + + op2->gtGetOp1()->ClearContained(); + op2->gtGetOp2()->ClearContained(); + + GenTreeCCMP* ccmp = op2->AsCCMP(); + + if (tree->OperIs(GT_AND)) + { + // If the first comparison succeeds then do the second comparison. + ccmp->gtCondition = cond1; + // Otherwise set the condition flags to something that makes the second + // one fail. + ccmp->gtFlagsVal = TruthifyingFlags(GenCondition::Reverse(cond2)); + } + else + { + // If the first comparison fails then do the second comparison. + ccmp->gtCondition = GenCondition::Reverse(cond1); + // Otherwise set the condition flags to something that makes the second + // one succeed. + ccmp->gtFlagsVal = TruthifyingFlags(cond2); + } + + ContainCheckConditionalCompare(ccmp); + + tree->SetOper(GT_SETCC); + tree->AsCC()->gtCondition = cond2; + + JITDUMP("Conversion was legal. Result:\n"); + DISPTREERANGE(BlockRange(), tree); + JITDUMP("\n"); + + *next = tree->gtNext; + return true; +} + +//------------------------------------------------------------------------ +// ContainCheckConditionalCompare: determine whether the source of a compare within a compare chain should be contained. +// +// Arguments: +// node - pointer to the node +// +void Lowering::ContainCheckConditionalCompare(GenTreeCCMP* cmp) +{ + GenTree* op2 = cmp->gtOp2; + + if (op2->IsCnsIntOrI() && !op2->AsIntCon()->ImmedValNeedsReloc(comp)) + { + target_ssize_t immVal = (target_ssize_t)op2->AsIntCon()->gtIconVal; + + if (emitter::emitIns_valid_imm_for_ccmp(immVal)) + { + MakeSrcContained(cmp, op2); + } + } +} +#endif + #if defined(FEATURE_HW_INTRINSICS) //---------------------------------------------------------------------------------------------- // Lowering::InsertNewSimdCreateScalarUnsafeNode: Inserts a new simd CreateScalarUnsafe node diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index d44880bd947554..388e136f3f7812 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -98,7 +98,7 @@ class Lowering final : public Phase void ContainCheckReturnTrap(GenTreeOp* node); void ContainCheckLclHeap(GenTreeOp* node); void ContainCheckRet(GenTreeUnOp* ret); -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) bool TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next); insCflags TruthifyingFlags(GenCondition cond); void ContainCheckConditionalCompare(GenTreeCCMP* ccmp); @@ -170,7 +170,10 @@ class Lowering final : public Phase GenTree* LowerCompare(GenTree* cmp); GenTree* LowerJTrue(GenTreeOp* jtrue); GenTree* LowerSelect(GenTreeConditional* cond); - bool TryLowerConditionToFlagsNode(GenTree* parent, GenTree* condition, GenCondition* code); + bool TryLowerConditionToFlagsNode(GenTree* parent, + GenTree* condition, + GenCondition* code, + bool allowMultipleFlagChecks = true); GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition); void LowerJmpMethod(GenTree* jmp); void LowerRet(GenTreeOp* ret); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index fdfef546fde5a4..019e1115f9bb13 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3158,111 +3158,6 @@ void Lowering::ContainCheckCompare(GenTreeOp* cmp) } #ifdef TARGET_ARM64 -//------------------------------------------------------------------------ -// TryLowerAndOrToCCMP : Lower AND/OR of two conditions into test + CCMP + SETCC nodes. -// -// Arguments: -// tree - pointer to the node -// next - [out] Next node to lower if this function returns true -// -// Return Value: -// false if no changes were made -// -bool Lowering::TryLowerAndOrToCCMP(GenTreeOp* tree, GenTree** next) -{ - assert(tree->OperIs(GT_AND, GT_OR)); - - if (!comp->opts.OptimizationEnabled()) - { - return false; - } - - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - - if ((op1->OperIsCmpCompare() && varTypeIsIntegralOrI(op1->gtGetOp1())) || - (op2->OperIsCmpCompare() && varTypeIsIntegralOrI(op2->gtGetOp1()))) - { - JITDUMP("[%06u] is a potential candidate for CCMP:\n", Compiler::dspTreeID(tree)); - DISPTREERANGE(BlockRange(), tree); - JITDUMP("\n"); - } - - // Find out whether an operand is eligible to be converted to a conditional - // compare. It must be a normal integral relop; for example, we cannot - // conditionally perform a floating point comparison and there is no "ctst" - // instruction that would allow us to conditionally implement - // TEST_EQ/TEST_NE. - // - // For the other operand we can allow more arbitrary operations that set - // the condition flags; the final transformation into the flags def is done - // by TryLowerConditionToFlagsNode. - // - GenCondition cond1; - if (op2->OperIsCmpCompare() && varTypeIsIntegralOrI(op2->gtGetOp1()) && IsInvariantInRange(op2, tree) && - (op2->gtGetOp1()->IsIntegralConst() || !op2->gtGetOp1()->isContained()) && - (op2->gtGetOp2() == nullptr || op2->gtGetOp2()->IsIntegralConst() || !op2->gtGetOp2()->isContained()) && - TryLowerConditionToFlagsNode(tree, op1, &cond1)) - { - // Fall through, converting op2 to the CCMP - } - else if (op1->OperIsCmpCompare() && varTypeIsIntegralOrI(op1->gtGetOp1()) && IsInvariantInRange(op1, tree) && - (op1->gtGetOp1()->IsIntegralConst() || !op1->gtGetOp1()->isContained()) && - (op1->gtGetOp2() == nullptr || op1->gtGetOp2()->IsIntegralConst() || !op1->gtGetOp2()->isContained()) && - TryLowerConditionToFlagsNode(tree, op2, &cond1)) - { - std::swap(op1, op2); - } - else - { - JITDUMP(" ..could not turn [%06u] or [%06u] into a def of flags, bailing\n", Compiler::dspTreeID(op1), - Compiler::dspTreeID(op2)); - return false; - } - - BlockRange().Remove(op2); - BlockRange().InsertBefore(tree, op2); - - GenCondition cond2 = GenCondition::FromRelop(op2); - op2->SetOper(GT_CCMP); - op2->gtType = TYP_VOID; - op2->gtFlags |= GTF_SET_FLAGS; - - op2->gtGetOp1()->ClearContained(); - op2->gtGetOp2()->ClearContained(); - - GenTreeCCMP* ccmp = op2->AsCCMP(); - - if (tree->OperIs(GT_AND)) - { - // If the first comparison succeeds then do the second comparison. - ccmp->gtCondition = cond1; - // Otherwise set the condition flags to something that makes the second - // one fail. - ccmp->gtFlagsVal = TruthifyingFlags(GenCondition::Reverse(cond2)); - } - else - { - // If the first comparison fails then do the second comparison. - ccmp->gtCondition = GenCondition::Reverse(cond1); - // Otherwise set the condition flags to something that makes the second - // one succeed. - ccmp->gtFlagsVal = TruthifyingFlags(cond2); - } - - ContainCheckConditionalCompare(ccmp); - - tree->SetOper(GT_SETCC); - tree->AsCC()->gtCondition = cond2; - - JITDUMP("Conversion was legal. Result:\n"); - DISPTREERANGE(BlockRange(), tree); - JITDUMP("\n"); - - *next = tree->gtNext; - return true; -} - //------------------------------------------------------------------------ // TruthifyingFlags: Get a flags immediate that will make a specified condition true. // @@ -3301,28 +3196,6 @@ insCflags Lowering::TruthifyingFlags(GenCondition condition) return INS_FLAGS_NONE; } } - -//------------------------------------------------------------------------ -// ContainCheckConditionalCompare: determine whether the source of a compare within a compare chain should be contained. -// -// Arguments: -// node - pointer to the node -// -void Lowering::ContainCheckConditionalCompare(GenTreeCCMP* cmp) -{ - GenTree* op2 = cmp->gtOp2; - - if (op2->IsCnsIntOrI() && !op2->AsIntCon()->ImmedValNeedsReloc(comp)) - { - target_ssize_t immVal = (target_ssize_t)op2->AsIntCon()->gtIconVal; - - if (emitter::emitIns_valid_imm_for_ccmp(immVal)) - { - MakeSrcContained(cmp, op2); - } - } -} - #endif // TARGET_ARM64 //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 0055b8693a32ad..79a72137e0362c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -320,9 +320,65 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp) ContainCheckBinary(binOp); +#ifdef TARGET_AMD64 + if (JitConfig.EnableApxConditionalChaining()) + { + if (binOp->OperIs(GT_AND, GT_OR)) + { + GenTree* next; + if (TryLowerAndOrToCCMP(binOp, &next)) + { + return next; + } + } + } +#endif // TARGET_AMD64 + return binOp->gtNext; } +#ifdef TARGET_AMD64 +//------------------------------------------------------------------------ +// TruthifyingFlags: Get a flags immediate that will make a specified condition true. +// +// Arguments: +// condition - the condition. +// +// Returns: +// A flags immediate that, if those flags were set, would cause the specified condition to be true. +// (NOTE: This just has to make the condition be true, i.e., if the condition calls for (SF ^ OF), then +// returning one will suffice +insCflags Lowering::TruthifyingFlags(GenCondition condition) +{ + switch (condition.GetCode()) + { + case GenCondition::EQ: + return INS_FLAGS_ZF; + case GenCondition::NE: + return INS_FLAGS_NONE; + case GenCondition::SGE: // !(SF ^ OF) + return INS_FLAGS_NONE; + case GenCondition::SGT: // !(SF ^ OF) && !ZF + return INS_FLAGS_NONE; + case GenCondition::SLE: // !(SF ^ OF) || ZF + return INS_FLAGS_ZF; + case GenCondition::SLT: // (SF ^ OF) + return INS_FLAGS_SF; + case GenCondition::UGE: // !CF + return INS_FLAGS_NONE; + case GenCondition::UGT: // !CF && !ZF + return INS_FLAGS_NONE; + case GenCondition::ULE: // CF || ZF + return INS_FLAGS_ZF; + case GenCondition::ULT: // CF + return INS_FLAGS_CF; + default: + NO_WAY("unexpected condition type"); + return INS_FLAGS_NONE; + } +} +#endif // TARGET_AMD64 + //------------------------------------------------------------------------ // LowerBlockStore: Lower a block store node // diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index d3145322ffa9b1..b6b01ca38e63f8 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4603,7 +4603,9 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // int LinearScan::BuildCmp(GenTree* tree) { -#if defined(TARGET_XARCH) +#if defined(TARGET_AMD64) + assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_BT, GT_CCMP)); +#elif defined(TARGET_X86) assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_BT)); #elif defined(TARGET_ARM64) assert(tree->OperIsCompare() || tree->OperIs(GT_CMP, GT_TEST, GT_JCMP, GT_JTEST, GT_CCMP)); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 3ef6952fb8aec5..6f92d25d2b23ed 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -441,6 +441,9 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CMP: case GT_TEST: case GT_BT: +#ifdef TARGET_AMD64 + case GT_CCMP: +#endif srcCount = BuildCmp(tree); break; diff --git a/src/coreclr/jit/optimizebools.cpp b/src/coreclr/jit/optimizebools.cpp index 3cd693dcbca587..07c82ebb4a54eb 100644 --- a/src/coreclr/jit/optimizebools.cpp +++ b/src/coreclr/jit/optimizebools.cpp @@ -1651,6 +1651,22 @@ PhaseStatus Compiler::optOptimizeBools() retry = true; numCond++; } +#elif defined(TARGET_AMD64) + // todo-xarch-apx: when we have proper CPUID (hardware) support, we can switch the below from an OR + // condition to an AND, for now, `JitConfig.JitEnableApxIfConv` will drive whether the optimization + // trigger or not + // else if ((compOpportunisticallyDependsOn(InstructionSet_APX) || JitConfig.JitEnableApxIfConv()) && + // optBoolsDsc.optOptimizeCompareChainCondBlock()) + else if (JitConfig.EnableApxConditionalChaining() && !optSwitchDetectAndConvert(b1, true) && + optBoolsDsc.optOptimizeCompareChainCondBlock()) + { + // The optimization will have merged b1 and b2. Retry the loop so that + // b1 and b2->bbNext can be tested. + change = true; + retry = true; + numCond++; + } + #endif } else diff --git a/src/coreclr/jit/switchrecognition.cpp b/src/coreclr/jit/switchrecognition.cpp index 739a8eda4ab1ac..6818a6b15b8987 100644 --- a/src/coreclr/jit/switchrecognition.cpp +++ b/src/coreclr/jit/switchrecognition.cpp @@ -157,11 +157,14 @@ bool IsConstantTestCondBlock(const BasicBlock* block, // // Arguments: // firstBlock - A block to start the search from +// testingForConversion - Test if its likely a switch conversion will happen. +// Used to prevent a pessimization when optimizing for conditional chaining. +// Done in this function to prevent maintaining the check in two places. // // Return Value: // True if the conversion was successful, false otherwise // -bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) +bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingForConversion) { assert(firstBlock->KindIs(BBJ_COND)); @@ -206,7 +209,8 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) { // Only the first conditional block can have multiple statements. // Stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } // Inspect secondary blocks @@ -216,25 +220,29 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) if (currTrueTarget != trueTarget) { // This blocks jumps to a different target, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } if (!GenTree::Compare(currVariableNode, variableNode->gtEffectiveVal())) { // A different variable node is used, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } if (currBb->GetUniquePred(this) != prevBlock) { // Multiple preds in a secondary block, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } if (!BasicBlock::sameEHRegion(prevBlock, currBb)) { // Current block is in a different EH region, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } // Ok we can work with that, add the test value to the list @@ -244,21 +252,27 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock) if (testValueIndex == SWITCH_MAX_DISTANCE) { // Too many suitable tests found - stop and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } if (isReversed) { // We only support reversed test (GT_NE) for the last block. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } + if (testingForConversion) + return true; + prevBlock = currBb; } else { // Current block is not a suitable test, stop searching and process what we already have. - return optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); + return !testingForConversion && + optSwitchConvert(firstBlock, testValueIndex, testValues, falseLikelihood, variableNode); } } }