Skip to content

Commit 6f221b4

Browse files
Ensure that math calls into the CRT are tracked as needing vzeroupper (#112011)
* Ensure that math calls into the CRT are tracked as needing vzeroupper * Apply suggestions from code review * Don't force vzeroupper for helpers written in managed
1 parent bc191c9 commit 6f221b4

File tree

10 files changed

+125
-76
lines changed

10 files changed

+125
-76
lines changed

src/coreclr/jit/compiler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10156,7 +10156,7 @@ JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree)
1015610156
{
1015710157
chars += printf("[CALL_M_NOGCCHECK]");
1015810158
}
10159-
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
10159+
if (call->IsSpecialIntrinsic())
1016010160
{
1016110161
chars += printf("[CALL_M_SPECIAL_INTRINSIC]");
1016210162
}

src/coreclr/jit/compiler.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4765,7 +4765,8 @@ class Compiler
47654765
R2RARG(CORINFO_CONST_LOOKUP* entryPoint),
47664766
var_types callType,
47674767
NamedIntrinsic intrinsicName,
4768-
bool tailCall);
4768+
bool tailCall,
4769+
bool* isSpecial);
47694770
GenTree* impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
47704771
CORINFO_SIG_INFO* sig,
47714772
CorInfoType callJitType,

src/coreclr/jit/gentree.cpp

Lines changed: 56 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2005,66 +2005,74 @@ bool GenTreeCall::NeedsVzeroupper(Compiler* comp)
20052005
}
20062006

20072007
bool needsVzeroupper = false;
2008+
bool checkSignature = false;
20082009

2009-
if (IsPInvoke())
2010-
{
2011-
// The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
2012-
// Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
2013-
// between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a
2014-
// VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
2015-
// register) and before any call to an unknown function.
2010+
// The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
2011+
// Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
2012+
// between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a
2013+
// VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
2014+
// register) and before any call to an unknown function.
20162015

2017-
switch (gtCallType)
2016+
switch (gtCallType)
2017+
{
2018+
case CT_USER_FUNC:
2019+
case CT_INDIRECT:
20182020
{
2019-
case CT_USER_FUNC:
2020-
case CT_INDIRECT:
2021-
{
2022-
// Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they
2023-
// may use the legacy encoding. This includes both CT_USER_FUNC and CT_INDIRECT
2021+
// Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they
2022+
// may use the legacy encoding. This includes both CT_USER_FUNC and CT_INDIRECT
20242023

2024+
if (IsPInvoke())
2025+
{
20252026
needsVzeroupper = true;
2026-
break;
20272027
}
2028-
2029-
case CT_HELPER:
2028+
else if (IsSpecialIntrinsic())
20302029
{
2031-
// Most helpers are well known to not use any floating-point or SIMD logic internally, but
2032-
// a few do exist so we need to ensure they are handled. They are identified by taking or
2033-
// returning a floating-point or SIMD type, regardless of how it is actually passed/returned.
2030+
checkSignature = true;
2031+
}
2032+
break;
2033+
}
20342034

2035-
if (varTypeUsesFloatReg(this))
2035+
case CT_HELPER:
2036+
{
2037+
// A few special cases exist that can't be found by signature alone, so we handle
2038+
// those explicitly here instead.
2039+
needsVzeroupper = IsHelperCall(comp, CORINFO_HELP_BULK_WRITEBARRIER);
2040+
2041+
// Most other helpers are well known to not use any floating-point or SIMD logic internally, but
2042+
// a few do exist so we need to ensure they are handled. They are identified by taking or
2043+
// returning a floating-point or SIMD type, regardless of how it is actually passed/returned but
2044+
// are excluded if we know they are implemented in managed.
2045+
checkSignature = !needsVzeroupper && !IsHelperCall(comp, CORINFO_HELP_DBL2INT_OVF) &&
2046+
!IsHelperCall(comp, CORINFO_HELP_DBL2LNG_OVF) &&
2047+
!IsHelperCall(comp, CORINFO_HELP_DBL2UINT_OVF) &&
2048+
!IsHelperCall(comp, CORINFO_HELP_DBL2ULNG_OVF);
2049+
break;
2050+
}
2051+
2052+
default:
2053+
{
2054+
unreached();
2055+
}
2056+
}
2057+
2058+
if (checkSignature)
2059+
{
2060+
if (varTypeUsesFloatReg(this))
2061+
{
2062+
needsVzeroupper = true;
2063+
}
2064+
else
2065+
{
2066+
for (CallArg& arg : gtArgs.Args())
2067+
{
2068+
if (varTypeUsesFloatReg(arg.GetSignatureType()))
20362069
{
20372070
needsVzeroupper = true;
20382071
break;
20392072
}
2040-
else
2041-
{
2042-
for (CallArg& arg : gtArgs.Args())
2043-
{
2044-
if (varTypeUsesFloatReg(arg.GetSignatureType()))
2045-
{
2046-
needsVzeroupper = true;
2047-
break;
2048-
}
2049-
}
2050-
}
2051-
break;
2052-
}
2053-
2054-
default:
2055-
{
2056-
unreached();
20572073
}
20582074
}
20592075
}
2060-
2061-
// Other special cases
2062-
//
2063-
if (!needsVzeroupper && IsHelperCall(comp, CORINFO_HELP_BULK_WRITEBARRIER))
2064-
{
2065-
needsVzeroupper = true;
2066-
}
2067-
20682076
return needsVzeroupper;
20692077
}
20702078
#endif // TARGET_XARCH
@@ -13708,7 +13716,7 @@ GenTree* Compiler::gtFoldExprCall(GenTreeCall* call)
1370813716
assert(!call->gtArgs.AreArgsComplete());
1370913717

1371013718
// Can only fold calls to special intrinsics.
13711-
if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0)
13719+
if (!call->IsSpecialIntrinsic())
1371213720
{
1371313721
return call;
1371413722
}
@@ -17675,7 +17683,7 @@ Compiler::TypeProducerKind Compiler::gtGetTypeProducerKind(GenTree* tree)
1767517683
return TPK_Handle;
1767617684
}
1767717685
}
17678-
else if (tree->AsCall()->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
17686+
else if (tree->AsCall()->IsSpecialIntrinsic())
1767917687
{
1768017688
if (lookupNamedIntrinsic(tree->AsCall()->gtCallMethHnd) == NI_System_Object_GetType)
1768117689
{
@@ -19135,7 +19143,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b
1913519143
case GT_CALL:
1913619144
{
1913719145
GenTreeCall* call = obj->AsCall();
19138-
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
19146+
if (call->IsSpecialIntrinsic())
1913919147
{
1914019148
NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd);
1914119149
if ((ni == NI_System_Array_Clone) || (ni == NI_System_Object_MemberwiseClone))

src/coreclr/jit/helperexpansion.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1605,7 +1605,7 @@ PhaseStatus Compiler::fgVNBasedIntrinsicExpansion()
16051605
//
16061606
bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call)
16071607
{
1608-
if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0)
1608+
if (!call->IsSpecialIntrinsic())
16091609
{
16101610
return false;
16111611
}

src/coreclr/jit/importercalls.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1490,7 +1490,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
14901490
{
14911491
spillStack = false;
14921492
}
1493-
else if ((callNode->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0)
1493+
else if (callNode->IsSpecialIntrinsic())
14941494
{
14951495
spillStack = false;
14961496
}
@@ -4090,7 +4090,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
40904090
if (impStackTop().val->OperIs(GT_RET_EXPR))
40914091
{
40924092
GenTreeCall* call = impStackTop().val->AsRetExpr()->gtInlineCandidate->AsCall();
4093-
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
4093+
if (call->IsSpecialIntrinsic())
40944094
{
40954095
if (lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Threading_Thread_get_CurrentThread)
40964096
{
@@ -4336,7 +4336,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
43364336
case NI_System_Math_Log2:
43374337
case NI_System_Math_Log10:
43384338
{
4339-
retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall);
4339+
retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall, &isSpecial);
43404340
break;
43414341
}
43424342

@@ -4429,7 +4429,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
44294429
case NI_System_Math_Tanh:
44304430
case NI_System_Math_Truncate:
44314431
{
4432-
retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall);
4432+
retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall, &isSpecial);
44334433
break;
44344434
}
44354435

@@ -9533,29 +9533,46 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method,
95339533
CORINFO_SIG_INFO* sig R2RARG(CORINFO_CONST_LOOKUP* entryPoint),
95349534
var_types callType,
95359535
NamedIntrinsic intrinsicName,
9536-
bool tailCall)
9536+
bool tailCall,
9537+
bool* isSpecial)
95379538
{
95389539
GenTree* op1;
95399540
GenTree* op2;
95409541

95419542
assert(callType != TYP_STRUCT);
95429543
assert(IsMathIntrinsic(intrinsicName));
9544+
assert(isSpecial != nullptr);
95439545

95449546
op1 = nullptr;
95459547

9548+
bool isIntrinsicImplementedByUserCall = IsIntrinsicImplementedByUserCall(intrinsicName);
9549+
9550+
if (isIntrinsicImplementedByUserCall)
9551+
{
9552+
#if defined(TARGET_XARCH)
9553+
// We want to track math intrinsics implemented as user calls as special
9554+
// to ensure we don't lose track of the fact it will call into native code
9555+
//
9556+
// This is used on xarch to track that it may need vzeroupper inserted to
9557+
// avoid the perf penalty on some hardware.
9558+
9559+
*isSpecial = true;
9560+
#endif // TARGET_XARCH
9561+
}
9562+
95469563
#if !defined(TARGET_X86)
95479564
// Intrinsics that are not implemented directly by target instructions will
95489565
// be re-materialized as users calls in rationalizer. For prefixed tail calls,
95499566
// don't do this optimization, because
95509567
// a) For back compatibility reasons on desktop .NET Framework 4.6 / 4.6.1
95519568
// b) It will be non-trivial task or too late to re-materialize a surviving
95529569
// tail prefixed GT_INTRINSIC as tail call in rationalizer.
9553-
if (!IsIntrinsicImplementedByUserCall(intrinsicName) || !tailCall)
9570+
if (!isIntrinsicImplementedByUserCall || !tailCall)
95549571
#else
95559572
// On x86 RyuJIT, importing intrinsics that are implemented as user calls can cause incorrect calculation
95569573
// of the depth of the stack if these intrinsics are used as arguments to another call. This causes bad
95579574
// code generation for certain EH constructs.
9558-
if (!IsIntrinsicImplementedByUserCall(intrinsicName))
9575+
if (!isIntrinsicImplementedByUserCall)
95599576
#endif
95609577
{
95619578
CORINFO_ARG_LIST_HANDLE arg = sig->args;
@@ -9588,7 +9605,7 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method,
95889605
NO_WAY("Unsupported number of args for Math Intrinsic");
95899606
}
95909607

9591-
if (IsIntrinsicImplementedByUserCall(intrinsicName))
9608+
if (isIntrinsicImplementedByUserCall)
95929609
{
95939610
op1->gtFlags |= GTF_CALL;
95949611
}
@@ -10073,7 +10090,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
1007310090
#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH
1007410091

1007510092
// TODO-CQ: Returning this as an intrinsic blocks inlining and is undesirable
10076-
// return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall);
10093+
// return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall, isSpecial);
1007710094

1007810095
return nullptr;
1007910096
}

src/coreclr/jit/importervectorization.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ GenTreeStrCon* Compiler::impGetStrConFromSpan(GenTree* span)
351351
argCall = span->AsCall();
352352
}
353353

354-
if ((argCall != nullptr) && ((argCall->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0))
354+
if ((argCall != nullptr) && argCall->IsSpecialIntrinsic())
355355
{
356356
const NamedIntrinsic ni = lookupNamedIntrinsic(argCall->gtCallMethHnd);
357357
if ((ni == NI_System_MemoryExtensions_AsSpan) || (ni == NI_System_String_op_Implicit))

src/coreclr/jit/lower.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2817,7 +2817,7 @@ GenTree* Lowering::LowerCall(GenTree* node)
28172817

28182818
#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
28192819
GenTree* nextNode = nullptr;
2820-
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
2820+
if (call->IsSpecialIntrinsic())
28212821
{
28222822
switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd))
28232823
{

src/coreclr/jit/morph.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4975,7 +4975,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
49754975
#endif
49764976
};
49774977

4978-
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
4978+
if (call->IsSpecialIntrinsic())
49794979
{
49804980
failTailCall("Might turn into an intrinsic");
49814981
return nullptr;
@@ -6870,7 +6870,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call)
68706870
#endif
68716871
}
68726872

6873-
if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0)
6873+
if (call->IsSpecialIntrinsic())
68746874
{
68756875
if (lookupNamedIntrinsic(call->AsCall()->gtCallMethHnd) ==
68766876
NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8)
@@ -6959,7 +6959,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call)
69596959
// Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag
69606960
//
69616961
// We need to do these before the arguments are morphed
6962-
if (!call->gtArgs.AreArgsComplete() && (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
6962+
if (!call->gtArgs.AreArgsComplete() && call->IsSpecialIntrinsic())
69636963
{
69646964
// See if this is foldable
69656965
GenTree* optTree = gtFoldExprCall(call);

0 commit comments

Comments
 (0)