diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 33dda8c734ca1b..21bb54e33d3802 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -10165,7 +10165,7 @@ JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree) { chars += printf("[CALL_M_NOGCCHECK]"); } - if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + if (call->IsSpecialIntrinsic()) { chars += printf("[CALL_M_SPECIAL_INTRINSIC]"); } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4a639c7e639f9e..f63800a6a65526 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4014,7 +4014,7 @@ class Compiler // false: we can add new tracked variables. // true: We cannot add new 'tracked' variable - bool lvaTrackedFixed = false; + bool lvaTrackedFixed = false; unsigned lvaCount; // total number of locals, which includes function arguments, // special arguments, IL local variables, and JIT temporary variables @@ -4763,7 +4763,8 @@ class Compiler R2RARG(CORINFO_CONST_LOOKUP* entryPoint), var_types callType, NamedIntrinsic intrinsicName, - bool tailCall); + bool tailCall, + bool* isSpecial); GenTree* impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, CorInfoType callJitType, @@ -6924,7 +6925,7 @@ class Compiler unsigned acdCount = 0; // Get the index to use as part of the AddCodeDsc key for sharing throw blocks - unsigned bbThrowIndex(BasicBlock* blk, AcdKeyDesignator* dsg); + unsigned bbThrowIndex(BasicBlock* blk, AcdKeyDesignator* dsg); struct AddCodeDscKey { @@ -6932,7 +6933,7 @@ class Compiler AddCodeDscKey(): acdKind(SCK_NONE), acdData(0) {} AddCodeDscKey(SpecialCodeKind kind, BasicBlock* block, Compiler* comp); AddCodeDscKey(AddCodeDsc* add); - + static bool Equals(const AddCodeDscKey& x, const AddCodeDscKey& y) { return (x.acdData == y.acdData) && (x.acdKind == y.acdKind); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 7c86705f00dd3e..7f72b807301ac0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -2005,66 +2005,74 @@ bool GenTreeCall::NeedsVzeroupper(Compiler* comp) } bool needsVzeroupper = false; + bool checkSignature = false; - if (IsPInvoke()) - { - // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states: - // Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean - // between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a - // VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX - // register) and before any call to an unknown function. + // The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states: + // Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean + // between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a + // VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX + // register) and before any call to an unknown function. - switch (gtCallType) + switch (gtCallType) + { + case CT_USER_FUNC: + case CT_INDIRECT: { - case CT_USER_FUNC: - case CT_INDIRECT: - { - // Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they - // may use the legacy encoding. This includes both CT_USER_FUNC and CT_INDIRECT + // Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they + // may use the legacy encoding. This includes both CT_USER_FUNC and CT_INDIRECT + if (IsPInvoke()) + { needsVzeroupper = true; - break; } - - case CT_HELPER: + else if (IsSpecialIntrinsic()) { - // Most helpers are well known to not use any floating-point or SIMD logic internally, but - // a few do exist so we need to ensure they are handled. They are identified by taking or - // returning a floating-point or SIMD type, regardless of how it is actually passed/returned. + checkSignature = true; + } + break; + } - if (varTypeUsesFloatReg(this)) + case CT_HELPER: + { + // A few special cases exist that can't be found by signature alone, so we handle + // those explicitly here instead. + needsVzeroupper = IsHelperCall(comp, CORINFO_HELP_BULK_WRITEBARRIER); + + // Most other helpers are well known to not use any floating-point or SIMD logic internally, but + // a few do exist so we need to ensure they are handled. They are identified by taking or + // returning a floating-point or SIMD type, regardless of how it is actually passed/returned but + // are excluded if we know they are implemented in managed. + checkSignature = !needsVzeroupper && !IsHelperCall(comp, CORINFO_HELP_DBL2INT_OVF) && + !IsHelperCall(comp, CORINFO_HELP_DBL2LNG_OVF) && + !IsHelperCall(comp, CORINFO_HELP_DBL2UINT_OVF) && + !IsHelperCall(comp, CORINFO_HELP_DBL2ULNG_OVF); + break; + } + + default: + { + unreached(); + } + } + + if (checkSignature) + { + if (varTypeUsesFloatReg(this)) + { + needsVzeroupper = true; + } + else + { + for (CallArg& arg : gtArgs.Args()) + { + if (varTypeUsesFloatReg(arg.GetSignatureType())) { needsVzeroupper = true; break; } - else - { - for (CallArg& arg : gtArgs.Args()) - { - if (varTypeUsesFloatReg(arg.GetSignatureType())) - { - needsVzeroupper = true; - break; - } - } - } - break; - } - - default: - { - unreached(); } } } - - // Other special cases - // - if (!needsVzeroupper && IsHelperCall(comp, CORINFO_HELP_BULK_WRITEBARRIER)) - { - needsVzeroupper = true; - } - return needsVzeroupper; } #endif // TARGET_XARCH @@ -13708,7 +13716,7 @@ GenTree* Compiler::gtFoldExprCall(GenTreeCall* call) assert(!call->gtArgs.AreArgsComplete()); // Can only fold calls to special intrinsics. - if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0) + if (!call->IsSpecialIntrinsic()) { return call; } @@ -17675,7 +17683,7 @@ Compiler::TypeProducerKind Compiler::gtGetTypeProducerKind(GenTree* tree) return TPK_Handle; } } - else if (tree->AsCall()->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + else if (tree->AsCall()->IsSpecialIntrinsic()) { if (lookupNamedIntrinsic(tree->AsCall()->gtCallMethHnd) == NI_System_Object_GetType) { @@ -19135,7 +19143,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b case GT_CALL: { GenTreeCall* call = obj->AsCall(); - if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + if (call->IsSpecialIntrinsic()) { NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd); if ((ni == NI_System_Array_Clone) || (ni == NI_System_Object_MemberwiseClone)) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 8407fd469eff6c..7635502f7cb5d0 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -1605,7 +1605,7 @@ PhaseStatus Compiler::fgVNBasedIntrinsicExpansion() // bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call) { - if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0) + if (!call->IsSpecialIntrinsic()) { return false; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 61021c74f4ea37..9ace7b78f72132 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1490,7 +1490,7 @@ var_types Compiler::impImportCall(OPCODE opcode, { spillStack = false; } - else if ((callNode->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0) + else if (callNode->IsSpecialIntrinsic()) { spillStack = false; } @@ -4090,7 +4090,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, if (impStackTop().val->OperIs(GT_RET_EXPR)) { GenTreeCall* call = impStackTop().val->AsRetExpr()->gtInlineCandidate->AsCall(); - if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + if (call->IsSpecialIntrinsic()) { if (lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Threading_Thread_get_CurrentThread) { @@ -4336,7 +4336,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, case NI_System_Math_Log2: case NI_System_Math_Log10: { - retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall); + retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall, &isSpecial); break; } @@ -4429,7 +4429,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, case NI_System_Math_Tanh: case NI_System_Math_Truncate: { - retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall); + retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall, &isSpecial); break; } @@ -9533,16 +9533,33 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig R2RARG(CORINFO_CONST_LOOKUP* entryPoint), var_types callType, NamedIntrinsic intrinsicName, - bool tailCall) + bool tailCall, + bool* isSpecial) { GenTree* op1; GenTree* op2; assert(callType != TYP_STRUCT); assert(IsMathIntrinsic(intrinsicName)); + assert(isSpecial != nullptr); op1 = nullptr; + bool isIntrinsicImplementedByUserCall = IsIntrinsicImplementedByUserCall(intrinsicName); + + if (isIntrinsicImplementedByUserCall) + { +#if defined(TARGET_XARCH) + // We want to track math intrinsics implemented as user calls as special + // to ensure we don't lose track of the fact it will call into native code + // + // This is used on xarch to track that it may need vzeroupper inserted to + // avoid the perf penalty on some hardware. + + *isSpecial = true; +#endif // TARGET_XARCH + } + #if !defined(TARGET_X86) // Intrinsics that are not implemented directly by target instructions will // be re-materialized as users calls in rationalizer. For prefixed tail calls, @@ -9550,12 +9567,12 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, // a) For back compatibility reasons on desktop .NET Framework 4.6 / 4.6.1 // b) It will be non-trivial task or too late to re-materialize a surviving // tail prefixed GT_INTRINSIC as tail call in rationalizer. - if (!IsIntrinsicImplementedByUserCall(intrinsicName) || !tailCall) + if (!isIntrinsicImplementedByUserCall || !tailCall) #else // On x86 RyuJIT, importing intrinsics that are implemented as user calls can cause incorrect calculation // of the depth of the stack if these intrinsics are used as arguments to another call. This causes bad // code generation for certain EH constructs. - if (!IsIntrinsicImplementedByUserCall(intrinsicName)) + if (!isIntrinsicImplementedByUserCall) #endif { CORINFO_ARG_LIST_HANDLE arg = sig->args; @@ -9588,7 +9605,7 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method, NO_WAY("Unsupported number of args for Math Intrinsic"); } - if (IsIntrinsicImplementedByUserCall(intrinsicName)) + if (isIntrinsicImplementedByUserCall) { op1->gtFlags |= GTF_CALL; } @@ -10073,7 +10090,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, #endif // FEATURE_HW_INTRINSICS && TARGET_XARCH // TODO-CQ: Returning this as an intrinsic blocks inlining and is undesirable - // return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall); + // return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall, isSpecial); return nullptr; } diff --git a/src/coreclr/jit/importervectorization.cpp b/src/coreclr/jit/importervectorization.cpp index 649b466a26f6e3..0f9ac5643e9a92 100644 --- a/src/coreclr/jit/importervectorization.cpp +++ b/src/coreclr/jit/importervectorization.cpp @@ -351,7 +351,7 @@ GenTreeStrCon* Compiler::impGetStrConFromSpan(GenTree* span) argCall = span->AsCall(); } - if ((argCall != nullptr) && ((argCall->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0)) + if ((argCall != nullptr) && argCall->IsSpecialIntrinsic()) { const NamedIntrinsic ni = lookupNamedIntrinsic(argCall->gtCallMethHnd); if ((ni == NI_System_MemoryExtensions_AsSpan) || (ni == NI_System_String_op_Implicit)) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 484bf1454e5fd3..d5928340c44514 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2764,7 +2764,7 @@ GenTree* Lowering::LowerCall(GenTree* node) #if defined(TARGET_AMD64) || defined(TARGET_ARM64) GenTree* nextNode = nullptr; - if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + if (call->IsSpecialIntrinsic()) { switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd)) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a787fece19fb6d..457c0fa5a4fbb7 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5021,7 +5021,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) #endif }; - if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) + if (call->IsSpecialIntrinsic()) { failTailCall("Might turn into an intrinsic"); return nullptr; @@ -6916,7 +6916,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) #endif } - if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0) + if (call->IsSpecialIntrinsic()) { if (lookupNamedIntrinsic(call->AsCall()->gtCallMethHnd) == NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8) @@ -7005,7 +7005,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) // Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag // // We need to do these before the arguments are morphed - if (!call->gtArgs.AreArgsComplete() && (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)) + if (!call->gtArgs.AreArgsComplete() && call->IsSpecialIntrinsic()) { // See if this is foldable GenTree* optTree = gtFoldExprCall(call); diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 708de9b8b8c341..572240b6298e55 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -9,13 +9,14 @@ // RewriteNodeAsCall : Replace the given tree node by a GT_CALL. // // Arguments: -// use - A pointer-to-a-pointer for the tree node -// sig - The signature info for callHnd -// parents - A pointer to tree walk data providing the context -// callHnd - The method handle of the call to be generated -// entryPoint - The method entrypoint of the call to be generated -// operands - The operand list of the call to be generated -// operandCount - The number of operands in the operand list +// use - A pointer-to-a-pointer for the tree node +// sig - The signature info for callHnd +// parents - A pointer to tree walk data providing the context +// callHnd - The method handle of the call to be generated +// entryPoint - The method entrypoint of the call to be generated +// operands - The operand list of the call to be generated +// operandCount - The number of operands in the operand list +// isSpecialIntrinsic - true if the GT_CALL should be marked as a special intrinsic // // Return Value: // None. @@ -29,7 +30,8 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, CORINFO_CONST_LOOKUP entryPoint, #endif // FEATURE_READYTORUN GenTree** operands, - size_t operandCount) + size_t operandCount, + bool isSpecialIntrinsic) { GenTree* const tree = *use; GenTree* const treeFirstNode = comp->fgGetFirstNode(tree); @@ -40,6 +42,18 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, // Create the call node GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->TypeGet()); + if (isSpecialIntrinsic) + { +#if defined(TARGET_XARCH) + // Mark this as having been a special intrinsic node + // + // This is used on xarch to track that it may need vzeroupper inserted to + // avoid the perf penalty on some hardware. + + call->gtCallMoreFlags |= GTF_CALL_M_SPECIAL_INTRINSIC; +#endif // TARGET_XARCH + } + assert(sig != nullptr); var_types retType = JITtype2varType(sig->retType); @@ -285,11 +299,15 @@ void Rationalizer::RewriteIntrinsicAsUserCall(GenTree** use, ArrayStackeeGetMethodSig(callHnd, &sigInfo); + // Regular Intrinsics often have their fallback in native and so + // should be treated as "special" once they become calls. + bool isSpecialIntrinsic = true; + RewriteNodeAsCall(use, &sigInfo, parents, callHnd, #if defined(FEATURE_READYTORUN) intrinsic->gtEntryPoint, #endif // FEATURE_READYTORUN - operands, operandCount); + operands, operandCount, isSpecialIntrinsic); } #if defined(FEATURE_HW_INTRINSICS) @@ -544,11 +562,15 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStackGetEntryPoint(), #endif // FEATURE_READYTORUN - operands, operandCount); + operands, operandCount, isSpecialIntrinsic); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/rationalize.h b/src/coreclr/jit/rationalize.h index 2d578583abe3fd..923e10665cb002 100644 --- a/src/coreclr/jit/rationalize.h +++ b/src/coreclr/jit/rationalize.h @@ -44,7 +44,8 @@ class Rationalizer final : public Phase CORINFO_CONST_LOOKUP entryPoint, #endif // FEATURE_READYTORUN GenTree** operands, - size_t operandCount); + size_t operandCount, + bool isSpecialIntrinsic); void RewriteIntrinsicAsUserCall(GenTree** use, Compiler::GenTreeStack& parents); #if defined(FEATURE_HW_INTRINSICS)