diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index df3da7a876b787..479de96b10df41 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -16,11 +16,11 @@ class AsmOffsets // Debug build offsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x1b90; + public const int SIZEOF__REGDISPLAY = 0x1c10; public const int OFFSETOF__REGDISPLAY__SP = 0x1b78; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b80; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0xbf0; + public const int SIZEOF__REGDISPLAY = 0xc70; public const int OFFSETOF__REGDISPLAY__SP = 0xbd8; public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; #endif // TARGET_UNIX @@ -82,7 +82,7 @@ class AsmOffsets // Release build offsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x1b80; + public const int SIZEOF__REGDISPLAY = 0x1c00; public const int OFFSETOF__REGDISPLAY__SP = 0x1b70; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b78; #else // TARGET_UNIX diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 3845048743a2de..06965d8ab2fd4f 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -27,6 +27,7 @@ #include "handletable.inl" #include "gcenv.inl" #include "gceventstatus.h" +#include #ifdef __INTELLISENSE__ #if defined(FEATURE_SVR_GC) @@ -143,6 +144,15 @@ bool g_built_with_svr_gc = true; bool g_built_with_svr_gc = false; #endif // FEATURE_SVR_GC +// Stores the ISA capability of the hardware +int cpuFeatures = 0; +#if defined(TARGET_AMD64) +inline bool IsAPXSupported() +{ + return (cpuFeatures & XArchIntrinsicConstants_Apx); +} +#endif // TARGET_AMD64 + #if defined(BUILDENV_DEBUG) uint8_t g_build_variant = 0; #elif defined(BUILDENV_CHECKED) @@ -14698,7 +14708,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, #endif // __linux__ #ifdef USE_VXSORT - InitSupportedInstructionSet ((int32_t)GCConfig::GetGCEnabledInstructionSets()); + InitSupportedInstructionSet ((int32_t)GCConfig::GetGCEnabledInstructionSets(), cpuFeatures); #endif if (!init_semi_shared()) @@ -49286,6 +49296,9 @@ HRESULT GCHeap::Initialize() return CLR_E_GC_BAD_HARD_LIMIT; } + // initialize the cpuFeatures from minipal + cpuFeatures = minipal_getcpufeatures(); + uint32_t nhp = 1; uint32_t nhp_from_config = 0; uint32_t max_nhp_from_config = (uint32_t)GCConfig::GetMaxHeapCount(); diff --git a/src/coreclr/gc/gcimpl.h b/src/coreclr/gc/gcimpl.h index 93c856a2145f0d..fa3996e6acf0df 100644 --- a/src/coreclr/gc/gcimpl.h +++ b/src/coreclr/gc/gcimpl.h @@ -40,6 +40,11 @@ extern bool g_fFinalizerRunOnShutDown; extern bool g_built_with_svr_gc; extern uint8_t g_build_variant; extern VOLATILE(int32_t) g_no_gc_lock; +// Stores the mask for supported instruction sets +extern int cpuFeatures; +#if defined(TARGET_AMD64) +extern inline bool IsAPXSupported(); +#endif // TARGET_AMD64 class GCHeap : public IGCHeapInternal { diff --git a/src/coreclr/gc/vxsort/do_vxsort.h b/src/coreclr/gc/vxsort/do_vxsort.h index edd803f310f492..6044aa6ae2e52c 100644 --- a/src/coreclr/gc/vxsort/do_vxsort.h +++ b/src/coreclr/gc/vxsort/do_vxsort.h @@ -8,7 +8,7 @@ enum class InstructionSet AVX512F = 1, }; -void InitSupportedInstructionSet (int32_t configSetting); +void InitSupportedInstructionSet (int32_t configSetting, int cpuFeatures); bool IsSupportedInstructionSet (InstructionSet instructionSet); void do_vxsort_avx2 (uint8_t** low, uint8_t** high, uint8_t *range_low, uint8_t *range_high); diff --git a/src/coreclr/gc/vxsort/isa_detection.cpp b/src/coreclr/gc/vxsort/isa_detection.cpp index b069c8be9bee04..5172b6a314d93c 100644 --- a/src/coreclr/gc/vxsort/isa_detection.cpp +++ b/src/coreclr/gc/vxsort/isa_detection.cpp @@ -13,9 +13,9 @@ enum class SupportedISA AVX512F = 1 << (int)InstructionSet::AVX512F }; -SupportedISA DetermineSupportedISA() +SupportedISA DetermineSupportedISA(int cpuFeatures) { - int cpuFeatures = minipal_getcpufeatures(); + // int cpuFeatures = minipal_getcpufeatures(); if ((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) { if ((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) @@ -39,9 +39,9 @@ bool IsSupportedInstructionSet (InstructionSet instructionSet) return ((int)s_supportedISA & (1 << (int)instructionSet)) != 0; } -void InitSupportedInstructionSet (int32_t configSetting) +void InitSupportedInstructionSet (int32_t configSetting, int cpuFeatures) { - s_supportedISA = (SupportedISA)((int)DetermineSupportedISA() & configSetting); + s_supportedISA = (SupportedISA)((int)DetermineSupportedISA(cpuFeatures) & configSetting); // we are assuming that AVX2 can be used if AVX512F can, // so if AVX2 is disabled, we need to disable AVX512F as well if (!((int)s_supportedISA & (int)SupportedISA::AVX2)) diff --git a/src/coreclr/gcinfo/gcinfodumper.cpp b/src/coreclr/gcinfo/gcinfodumper.cpp index c22850c2b0f10c..62cb3053eb7e01 100644 --- a/src/coreclr/gcinfo/gcinfodumper.cpp +++ b/src/coreclr/gcinfo/gcinfodumper.cpp @@ -131,6 +131,24 @@ BOOL GcInfoDumper::ReportPointerRecord ( REG(r13, R13), REG(r14, R14), REG(r15, R15), +#undef REG +#define REG(reg, field) { offsetof(Amd64VolatileContextPointer, field) } + REG(r16, R16), + REG(r17, R17), + REG(r18, R18), + REG(r19, R19), + REG(r20, R20), + REG(r21, R21), + REG(r22, R22), + REG(r23, R23), + REG(r24, R24), + REG(r25, R25), + REG(r26, R26), + REG(r27, R27), + REG(r28, R28), + REG(r29, R29), + REG(r30, R30), + REG(r31, R31), #elif defined(TARGET_ARM) #undef REG #define REG(reg, field) { offsetof(ArmVolatileContextPointer, field) } @@ -294,7 +312,7 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) BYTE* pContext = (BYTE*)&(pRD->volatileCurrContextPointers); -#else +#else // TARGET_ARM || TARGET_ARM64 || TARGET_RISCV64 || TARGET_LOONGARCH64 BYTE* pContext = (BYTE*)pRD->pCurrentContext; #endif @@ -390,7 +408,12 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this { continue; } -#endif +#elif defined(TARGET_AMD64) + if ((ctx != 0 && iEncodedReg > 15) || !IsAPXSupported()) + { + break; + } +#endif // TARGET_AMD64 { _ASSERTE(iReg < nCONTEXTRegisters); #ifdef TARGET_ARM @@ -414,6 +437,19 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this { pReg = (SIZE_T*)((BYTE*)pRD->pCurrentContext + rgRegisters[iReg].cbContextOffset); } +#elif defined(TARGET_AMD64) + if (ctx == 0 && iReg == 16) + { + pContext = (BYTE*)&(pRD->volatileCurrContextPointers); + } + if (ctx == 0 && iReg >= 16) + { + pReg = *(SIZE_T**)(pContext + rgRegisters[iReg].cbContextOffset); + } + else + { + pReg = (SIZE_T*)(pContext + rgRegisters[iReg].cbContextOffset); + } #else pReg = (SIZE_T*)(pContext + rgRegisters[iReg].cbContextOffset); #endif @@ -664,6 +700,16 @@ GcInfoDumper::EnumerateStateChangesResults GcInfoDumper::EnumerateStateChanges ( *(ppCurrentRax + iReg) = ®disp.pCurrentContext->Rax + iReg; *(ppCallerRax + iReg) = ®disp.pCallerContext ->Rax + iReg; } +#if defined(TARGET_UNIX) && defined(HOST_UNIX) + if (IsAPXSupported()) + { + ULONG64 **ppVolatileReg = ®disp.volatileCurrContextPointers.R16; + for (iReg = 0; iReg < 16; iReg++) + { + *(ppVolatileReg+iReg) = ®disp.pCurrentContext->R16 + iReg; + } + } +#endif // TARGET_UNIX #elif defined(TARGET_ARM) FILL_REGS(pCurrentContext->R0, 16); FILL_REGS(pCallerContext->R0, 16); diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index 7a5cf9d9d0cec4..0c976dc4fb8a25 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -197,6 +197,33 @@ typedef struct _Arm64VolatileContextPointer } Arm64VolatileContextPointer; #endif //TARGET_ARM64 +#if defined(TARGET_AMD64) +typedef struct _Amd64VolatileContextPointer +{ + union { + struct { + PDWORD64 R16; + PDWORD64 R17; + PDWORD64 R18; + PDWORD64 R19; + PDWORD64 R20; + PDWORD64 R21; + PDWORD64 R22; + PDWORD64 R23; + PDWORD64 R24; + PDWORD64 R25; + PDWORD64 R26; + PDWORD64 R27; + PDWORD64 R28; + PDWORD64 R29; + PDWORD64 R30; + PDWORD64 R31; + }; + PDWORD64 R[16]; + }; +} Amd64VolatileContextPointer; +#endif //TARGET_AMD64 + #if defined(TARGET_LOONGARCH64) typedef struct _LoongArch64VolatileContextPointer { @@ -253,6 +280,10 @@ struct REGDISPLAY : public REGDISPLAY_BASE { LoongArch64VolatileContextPointer volatileCurrContextPointers; #endif +#if defined(TARGET_AMD64) + Amd64VolatileContextPointer volatileCurrContextPointers; +#endif + #ifdef TARGET_RISCV64 RiscV64VolatileContextPointer volatileCurrContextPointers; #endif @@ -563,7 +594,11 @@ inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pC // Fill volatile context pointers. They can be used by GC in the case of the leaf frame for (int i=0; i < 18; i++) pRD->volatileCurrContextPointers.X[i] = &pctx->X[i]; -#elif defined(TARGET_LOONGARCH64) // TARGET_ARM64 +#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) && defined(HOST_UNIX) // TARGET_ARM64 + // Fill volatile context pointers. They can be used by GC in the case of the leaf frame + for (int i=0; i < 16; i++) + pRD->volatileCurrContextPointers.R[i] = &pctx->R[i]; +#elif defined(TARGET_LOONGARCH64) // TARGET_ADM64 && TARGET_UNIX && HOST_UNIX pRD->volatileCurrContextPointers.A0 = &pctx->A0; pRD->volatileCurrContextPointers.A1 = &pctx->A1; pRD->volatileCurrContextPointers.A2 = &pctx->A2; @@ -663,6 +698,16 @@ inline size_t * getRegAddr (unsigned regNum, PTR_CONTEXT regs) }; return (PTR_size_t)(PTR_BYTE(regs) + OFFSET_OF_REGISTERS[regNum]); +#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) && defined(HOST_UNIX) + _ASSERTE(regNum < 32); + if (regNum < 16) + { + return (size_t *)®s->Rax + regNum; + } + else + { + return (size_t *)®s->R16 + (regNum - 16); + } #elif defined(TARGET_AMD64) _ASSERTE(regNum < 16); return (size_t *)®s->Rax + regNum; diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h index afeb2a408851a4..945ec257cc4c64 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h @@ -8,7 +8,7 @@ // NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix #ifndef UNIX_AMD64_ABI -PLAT_ASM_SIZEOF(250, ExInfo) +PLAT_ASM_SIZEOF(2d0, ExInfo) PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) PLAT_ASM_OFFSET(10, ExInfo, m_exception) @@ -16,7 +16,7 @@ PLAT_ASM_OFFSET(18, ExInfo, m_kind) PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) -PLAT_ASM_OFFSET(240, ExInfo, m_notifyDebuggerSP) +PLAT_ASM_OFFSET(2c0, ExInfo, m_notifyDebuggerSP) PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) @@ -24,12 +24,12 @@ PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) -PLAT_ASM_SIZEOF(220, StackFrameIterator) +PLAT_ASM_SIZEOF(2A0, StackFrameIterator) PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) -PLAT_ASM_OFFSET(210, StackFrameIterator, m_OriginalControlPC) -PLAT_ASM_OFFSET(218, StackFrameIterator, m_pPreviousTransitionFrame) +PLAT_ASM_OFFSET(290, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(298, StackFrameIterator, m_pPreviousTransitionFrame) PLAT_ASM_SIZEOF(100, PAL_LIMITED_CONTEXT) PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP) @@ -56,10 +56,10 @@ PLAT_ASM_OFFSET(0d0, PAL_LIMITED_CONTEXT, Xmm13) PLAT_ASM_OFFSET(0e0, PAL_LIMITED_CONTEXT, Xmm14) PLAT_ASM_OFFSET(0f0, PAL_LIMITED_CONTEXT, Xmm15) -PLAT_ASM_SIZEOF(130, REGDISPLAY) -PLAT_ASM_OFFSET(78, REGDISPLAY, SP) -PLAT_ASM_OFFSET(80, REGDISPLAY, IP) -PLAT_ASM_OFFSET(88, REGDISPLAY, SSP) +PLAT_ASM_SIZEOF(1b0, REGDISPLAY) +PLAT_ASM_OFFSET(f8, REGDISPLAY, SP) +PLAT_ASM_OFFSET(100, REGDISPLAY, IP) +PLAT_ASM_OFFSET(108, REGDISPLAY, SSP) PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx) PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp) @@ -69,11 +69,11 @@ PLAT_ASM_OFFSET(58, REGDISPLAY, pR12) PLAT_ASM_OFFSET(60, REGDISPLAY, pR13) PLAT_ASM_OFFSET(68, REGDISPLAY, pR14) PLAT_ASM_OFFSET(70, REGDISPLAY, pR15) -PLAT_ASM_OFFSET(90, REGDISPLAY, Xmm) +PLAT_ASM_OFFSET(110, REGDISPLAY, Xmm) #else // !UNIX_AMD64_ABI -PLAT_ASM_SIZEOF(190, ExInfo) +PLAT_ASM_SIZEOF(210, ExInfo) PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) PLAT_ASM_OFFSET(10, ExInfo, m_exception) @@ -81,7 +81,7 @@ PLAT_ASM_OFFSET(18, ExInfo, m_kind) PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) -PLAT_ASM_OFFSET(188, ExInfo, m_notifyDebuggerSP) +PLAT_ASM_OFFSET(208, ExInfo, m_notifyDebuggerSP) PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) @@ -89,12 +89,12 @@ PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) -PLAT_ASM_SIZEOF(168, StackFrameIterator) +PLAT_ASM_SIZEOF(1e8, StackFrameIterator) PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) -PLAT_ASM_OFFSET(158, StackFrameIterator, m_OriginalControlPC) -PLAT_ASM_OFFSET(160, StackFrameIterator, m_pPreviousTransitionFrame) +PLAT_ASM_OFFSET(1d8, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(1e0, StackFrameIterator, m_pPreviousTransitionFrame) PLAT_ASM_SIZEOF(50, PAL_LIMITED_CONTEXT) PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP) @@ -110,8 +110,8 @@ PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R13) PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R14) PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R15) -PLAT_ASM_SIZEOF(88, REGDISPLAY) -PLAT_ASM_OFFSET(78, REGDISPLAY, SP) +PLAT_ASM_SIZEOF(108, REGDISPLAY) +PLAT_ASM_OFFSET(f8, REGDISPLAY, SP) PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx) PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp) diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 41eb41bf746975..83dcf8aa48daa4 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -4,9 +4,22 @@ #ifndef __regdisplay_h__ #define __regdisplay_h__ +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) +// This field is inspected from the generated code to determine what intrinsics are available. +EXTERN_C int g_cpuFeatures; +#endif + #if defined(TARGET_X86) || defined(TARGET_AMD64) #include "PalLimitedContext.h" // Fp128 +#include + +#if defined(TARGET_AMD64) +inline bool IsAPXSupported() +{ + return (g_cpuFeatures & XArchIntrinsicConstants_Apx); +} +#endif // TARGET_AMD64 struct REGDISPLAY { @@ -27,6 +40,22 @@ struct REGDISPLAY PTR_uintptr_t pR13; PTR_uintptr_t pR14; PTR_uintptr_t pR15; + PTR_uintptr_t pR16; + PTR_uintptr_t pR17; + PTR_uintptr_t pR18; + PTR_uintptr_t pR19; + PTR_uintptr_t pR20; + PTR_uintptr_t pR21; + PTR_uintptr_t pR22; + PTR_uintptr_t pR23; + PTR_uintptr_t pR24; + PTR_uintptr_t pR25; + PTR_uintptr_t pR26; + PTR_uintptr_t pR27; + PTR_uintptr_t pR28; + PTR_uintptr_t pR29; + PTR_uintptr_t pR30; + PTR_uintptr_t pR31; #endif // TARGET_AMD64 uintptr_t SP; diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index c80ae2069abe48..6ceea8eaade598 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -50,7 +50,7 @@ extern RhConfig * g_pRhConfig; #if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) // This field is inspected from the generated code to determine what intrinsics are available. -EXTERN_C int g_cpuFeatures; +// EXTERN_C int g_cpuFeatures; int g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 8da66ba362488a..50e2bfe729126a 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1464,24 +1464,29 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { M512 Zmm31; }; - struct + // XSTATE_APX + union { - DWORD64 R16; - DWORD64 R17; - DWORD64 R18; - DWORD64 R19; - DWORD64 R20; - DWORD64 R21; - DWORD64 R22; - DWORD64 R23; - DWORD64 R24; - DWORD64 R25; - DWORD64 R26; - DWORD64 R27; - DWORD64 R28; - DWORD64 R29; - DWORD64 R30; - DWORD64 R31; + struct + { + DWORD64 R16; + DWORD64 R17; + DWORD64 R18; + DWORD64 R19; + DWORD64 R20; + DWORD64 R21; + DWORD64 R22; + DWORD64 R23; + DWORD64 R24; + DWORD64 R25; + DWORD64 R26; + DWORD64 R27; + DWORD64 R28; + DWORD64 R29; + DWORD64 R30; + DWORD64 R31; + }; + DWORD64 R[16]; }; } CONTEXT, *PCONTEXT, *LPCONTEXT; diff --git a/src/coreclr/unwinder/amd64/unwinder.cpp b/src/coreclr/unwinder/amd64/unwinder.cpp index 7fadcefd758aa7..ca4bb14c674f9d 100644 --- a/src/coreclr/unwinder/amd64/unwinder.cpp +++ b/src/coreclr/unwinder/amd64/unwinder.cpp @@ -205,6 +205,8 @@ BOOL DacUnwindStackFrame(CONTEXT * pContext, KNONVOLATILE_CONTEXT_POINTERS* pCon if (res && pContextPointers) { + // TODO APX: this function restores the callee saved registers. + // As of now, this does not need to restore APX EGPRs. for (int i = 0; i < 16; i++) { *(&pContextPointers->Rax + i) = &pContext->Rax + i; diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index 9ddeafc268f6d9..65d9893c1f5413 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -59,6 +59,11 @@ void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD) pContextPointers->R9 = NULL; pContextPointers->R10 = NULL; pContextPointers->R11 = NULL; + +#if defined(TARGET_UNIX) + for (int i=0; i < 16; i++) + pRD->volatileCurrContextPointers.R[i] = NULL; +#endif // TARGET_UNIX } void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) @@ -227,6 +232,14 @@ void ResumableFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFlo pRD->pCurrentContextPointers->R14 = &m_Regs->R14; pRD->pCurrentContextPointers->R15 = &m_Regs->R15; +#if defined(TARGET_UNIX) + if (IsAPXSupported()) + { + for (int i = 0; i < 16; i++) + pRD->volatileCurrContextPointers.R[i] = &m_Regs->R[i]; + } +#endif // TARGET_UNIX + pRD->IsCallerContextValid = FALSE; pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 03aaed2aa9515f..3e302f9922c0ff 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -42,6 +42,9 @@ #include "perfmap.h" #endif +// cpufeatures for VM +int cpuFeatures = 0; + // Default number of jump stubs in a jump stub block #define DEFAULT_JUMPSTUBS_PER_BLOCK 32 @@ -1178,7 +1181,7 @@ void EEJitManager::SetCpuInfo() CORJIT_FLAGS CPUCompileFlags; - int cpuFeatures = minipal_getcpufeatures(); + cpuFeatures = minipal_getcpufeatures(); // Get the maximum bitwidth of Vector, rounding down to the nearest multiple of 128-bits uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128; diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 9720f029e30c2a..97acb3c9f41a83 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -70,6 +70,9 @@ Module Name: #ifdef TARGET_X86 #include "gc_unwind_x86.h" #endif +#ifdef TARGET_AMD64 +#include +#endif class MethodDesc; class ICorJitCompiler; @@ -88,6 +91,17 @@ typedef struct } EH_CLAUSE_ENUMERATOR; class EECodeInfo; +// Cache the cpufeatures for use in other parts of VM. +// This is mainly added here to use the variable in GC +// APX support checks +extern int cpuFeatures; +#if defined(TARGET_AMD64) +inline bool IsAPXSupported() +{ + return (cpuFeatures & XArchIntrinsicConstants_Apx); +} +#endif // TARGET_AMD64 + #define ROUND_DOWN_TO_PAGE(x) ( (size_t) (x) & ~((size_t)GetOsPageSize()-1)) #define ROUND_UP_TO_PAGE(x) (((size_t) (x) + (GetOsPageSize()-1)) & ~((size_t)GetOsPageSize()-1)) diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 00bfa6b96f6d63..2289b34dacdd9e 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -1488,18 +1488,23 @@ template OBJECTREF* TGcInfoDecoder::Ge PREGDISPLAY pRD ) { - _ASSERTE(regNum >= 0 && regNum <= 16); + _ASSERTE(regNum >= 0 && (regNum <= 16 || (IsAPXSupported() && regNum <=32))); _ASSERTE(regNum != 4); // rsp #ifdef FEATURE_NATIVEAOT PTR_uintptr_t* ppRax = &pRD->pRax; if (regNum > 4) regNum--; // rsp is skipped in NativeAOT RegDisplay -#else +#else // FEATURE_NATIVEAOT // The fields of KNONVOLATILE_CONTEXT_POINTERS are in the same order as // the processor encoding numbers. - ULONGLONG **ppRax = &pRD->pCurrentContextPointers->Rax; -#endif + if(regNum >= 16) + { + assert(IsAPXSupported()); + ppRax = &pRD->volatileCurrContextPointers.R16; + return (OBJECTREF*)*(ppRax + regNum - 16); + } +#endif // FEATURE_NATIVEAOT return (OBJECTREF*)*(ppRax + regNum); } @@ -1510,12 +1515,20 @@ template OBJECTREF* TGcInfoDecoder::Ge PREGDISPLAY pRD ) { +#if defined(TARGET_UNIX) + _ASSERTE(regNum >= 0 && regNum <= 32); +#else // TARGET_UNIX _ASSERTE(regNum >= 0 && regNum <= 16); +#endif // TARGET_UNIX _ASSERTE(regNum != 4); // rsp // The fields of CONTEXT are in the same order as // the processor encoding numbers. - + if (ExecutionManager::GetEEJitManager()->IsAPXSupported() && regNum >= 16) + { + ULONGLONG *pRax = &pRD->pCurrentContext->R16; + return (OBJECTREF*)(pRax + regNum - 16); + } ULONGLONG *pRax = &pRD->pCurrentContext->Rax; return (OBJECTREF*)(pRax + regNum); @@ -1524,10 +1537,14 @@ template OBJECTREF* TGcInfoDecoder::Ge template bool TGcInfoDecoder::IsScratchRegister(int regNum, PREGDISPLAY pRD) { +#if defined(TARGET_UNIX) + _ASSERTE(regNum >= 0 && regNum <= 32); +#else // TARGET_UNIX _ASSERTE(regNum >= 0 && regNum <= 16); +#endif // TARGET_UNIX _ASSERTE(regNum != 4); // rsp - UINT16 PreservedRegMask = + UINT32 PreservedRegMask = (1 << 3) // rbx | (1 << 5) // rbp #ifndef UNIX_AMD64_ABI @@ -1568,7 +1585,11 @@ template void TGcInfoDecoder::ReportRe { GCINFODECODER_CONTRACT; +#if defined(TARGET_UNIX) + _ASSERTE(regNum >= 0 && regNum <= 32); +#else // TARGET_UNIX _ASSERTE(regNum >= 0 && regNum <= 16); +#endif // TARGET_UNIX _ASSERTE(regNum != 4); // rsp LOG((LF_GCROOTS, LL_INFO1000, "Reporting " FMT_REG, regNum )); diff --git a/src/tests/JIT/Methodical/GC/Regress/GPRStressR16toR31.cs b/src/tests/JIT/Methodical/GC/Regress/GPRStressR16toR31.cs new file mode 100644 index 00000000000000..de115fab1a9c78 --- /dev/null +++ b/src/tests/JIT/Methodical/GC/Regress/GPRStressR16toR31.cs @@ -0,0 +1,40 @@ +using System; +using System.Runtime.CompilerServices; + +class GPRStressR16toR31 +{ + [MethodImpl(MethodImplOptions.NoInlining)] + static void StressRegisters() + { + // 32 reference variables to force JIT to use all GPRs + object o0 = new object(), o1 = new object(), o2 = new object(), o3 = new object(); + object o4 = new object(), o5 = new object(), o6 = new object(), o7 = new object(); + object o8 = new object(), o9 = new object(), o10 = new object(), o11 = new object(); + object o12 = new object(), o13 = new object(), o14 = new object(), o15 = new object(); + object o16 = new object(), o17 = new object(), o18 = new object(), o19 = new object(); + object o20 = new object(), o21 = new object(), o22 = new object(), o23 = new object(); + object o24 = new object(), o25 = new object(), o26 = new object(), o27 = new object(); + object o28 = new object(), o29 = new object(), o30 = new object(), o31 = new object(); + + // Use all variables in a way that prevents optimization + for (int i = 0; i < 10000; i++) + { + GC.Collect(); + GC.KeepAlive(o0); GC.KeepAlive(o1); GC.KeepAlive(o2); GC.KeepAlive(o3); + GC.KeepAlive(o4); GC.KeepAlive(o5); GC.KeepAlive(o6); GC.KeepAlive(o7); + GC.KeepAlive(o8); GC.KeepAlive(o9); GC.KeepAlive(o10); GC.KeepAlive(o11); + GC.KeepAlive(o12); GC.KeepAlive(o13); GC.KeepAlive(o14); GC.KeepAlive(o15); + GC.KeepAlive(o16); GC.KeepAlive(o17); GC.KeepAlive(o18); GC.KeepAlive(o19); + GC.KeepAlive(o20); GC.KeepAlive(o21); GC.KeepAlive(o22); GC.KeepAlive(o23); + GC.KeepAlive(o24); GC.KeepAlive(o25); GC.KeepAlive(o26); GC.KeepAlive(o27); + GC.KeepAlive(o28); GC.KeepAlive(o29); GC.KeepAlive(o30); GC.KeepAlive(o31); + } + } + + static int Main() + { + StressRegisters(); + Console.WriteLine("Test Passed"); + return 100; + } +} \ No newline at end of file