From a3b38d5690dcc566f2aca5f720bf84107d2714e3 Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 14 Sep 2020 18:11:25 +0200 Subject: [PATCH 1/2] Android & ARM64: FEATURE_EMULATED_TLS (#8323) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Android on ARM64 doesn't follow the recommended way how to implement Thread Local Storage (TLS). Instead they are using a custom solution when a __thread variable is used. This solution requires that internally always a C function needs to be called when a TLS variable is access. While the compiler and linker take care of all the c/c++ code the helper code written in assembler needs to be modified.  This implements the changes for the current thread and thunks data TLS variable. It would work on systems that follow the recommended implementation, too. But as it has a larger overhead compared to the current inlined solution it needs to be activated for systems with this issue by using FEATURE_EMULATED_TLS --- src/Native/Runtime/ThunksMapping.cpp | 4 - src/Native/Runtime/arm64/AllocFast.S | 12 +++ src/Native/Runtime/arm64/ExceptionHandling.S | 36 +++++++- .../Runtime/arm64/InteropThunksHelpers.S | 9 ++ src/Native/Runtime/arm64/PInvoke.S | 91 ++----------------- src/Native/Runtime/threadstore.cpp | 12 +++ src/Native/Runtime/unix/PalRedhawkUnix.cpp | 12 +++ .../Runtime/unix/unixasmmacrosarm64.inc | 72 ++++++++++++++- 8 files changed, 154 insertions(+), 94 deletions(-) diff --git a/src/Native/Runtime/ThunksMapping.cpp b/src/Native/Runtime/ThunksMapping.cpp index 7b30a3eb1e4..a9f3984b3c6 100644 --- a/src/Native/Runtime/ThunksMapping.cpp +++ b/src/Native/Runtime/ThunksMapping.cpp @@ -234,10 +234,6 @@ EXTERN_C REDHAWK_API void* __cdecl RhAllocateThunksMapping() // FEATURE_RX_THUNKS #elif FEATURE_FIXED_POOL_THUNKS - -// This thread local variable is used for delegate marshalling -DECLSPEC_THREAD intptr_t tls_thunkData; - // This is used by the thunk code to find the stub data for the called thunk slot extern "C" uintptr_t g_pThunkStubData; uintptr_t g_pThunkStubData = NULL; diff --git a/src/Native/Runtime/arm64/AllocFast.S b/src/Native/Runtime/arm64/AllocFast.S index 9c74faf8d25..1623a2b87ca 100644 --- a/src/Native/Runtime/arm64/AllocFast.S +++ b/src/Native/Runtime/arm64/AllocFast.S @@ -21,7 +21,11 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAll LEAF_ENTRY RhpNewFast, _TEXT // x1 = GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else INLINE_GETTHREAD x1 +#endif // // x0 contains EEType pointer @@ -135,7 +139,11 @@ NewOutOfMemory: // x1 == element count // x2 == string size +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_3 +#else INLINE_GETTHREAD x3 +#endif // Load potential new object address into x12. ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] @@ -196,7 +204,11 @@ StringSizeOverflow: // x1 == element count // x2 == array size +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_3 +#else INLINE_GETTHREAD x3 +#endif // Load potential new object address into x12. ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] diff --git a/src/Native/Runtime/arm64/ExceptionHandling.S b/src/Native/Runtime/arm64/ExceptionHandling.S index bb3f2bd9ae0..ace01d908b7 100644 --- a/src/Native/Runtime/arm64/ExceptionHandling.S +++ b/src/Native/Runtime/arm64/ExceptionHandling.S @@ -215,7 +215,11 @@ ALLOC_THROW_FRAME HARDWARE_EXCEPTION // x2 = GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else INLINE_GETTHREAD x2 +#endif add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null @@ -259,7 +263,11 @@ ALLOC_THROW_FRAME SOFTWARE_EXCEPTION // x2 = GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else INLINE_GETTHREAD x2 +#endif // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return // address could have been hijacked when we were in that C# code and we must remove the hijack and @@ -349,7 +357,11 @@ NotHijacked: ALLOC_THROW_FRAME SOFTWARE_EXCEPTION // x2 = GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else INLINE_GETTHREAD x2 +#endif add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null @@ -408,7 +420,11 @@ NotHijacked: // // clear the DoNotTriggerGc flag, trashes x4-x6 // - INLINE_GETTHREAD x5 // x5 <- Thread* +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_5 +#else + INLINE_GETTHREAD x5 +#endif ldr x4, [x5, #OFFSETOF__Thread__m_threadAbortException] sub x4, x4, x0 @@ -447,7 +463,11 @@ ClearSuccess_Catch: // @TODO: add debug-only validation code for ExInfo pop - INLINE_GETTHREAD x1 // x1 <- Thread* +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD x1 +#endif // We must unhijack the thread at this point because the section of stack where the hijack is applied // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. @@ -517,7 +537,11 @@ NoAbort: // // clear the DoNotTriggerGc flag, trashes x2-x4 // - INLINE_GETTHREAD x2 // x2 <- Thread* +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else + INLINE_GETTHREAD x2 +#endif add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags @@ -555,7 +579,11 @@ ClearSuccess: // // set the DoNotTriggerGc flag, trashes x1-x3 // - INLINE_GETTHREAD x2 // x2 <- Thread* +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else + INLINE_GETTHREAD x2 +#endif add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags SetRetry: diff --git a/src/Native/Runtime/arm64/InteropThunksHelpers.S b/src/Native/Runtime/arm64/InteropThunksHelpers.S index 34ffd58b42d..f350cd2ac42 100644 --- a/src/Native/Runtime/arm64/InteropThunksHelpers.S +++ b/src/Native/Runtime/arm64/InteropThunksHelpers.S @@ -21,7 +21,14 @@ POINTER_SIZE = 0x08 // Custom calling convention: // xip0 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) +#ifdef FEATURE_EMULATED_TLS + // This doesn't save and restore the floating point argument registers. If we encounter a + // target system that uses TLS emulation and modify these registers during this call we + // need to save and restore them, too + GETTHUNKDATA_ETLS_9 +#else INLINE_GET_TLS_VAR x9, tls_thunkData +#endif // x9 = base address of TLS data // xip0 = address of context cell in thunk's data @@ -45,6 +52,7 @@ POINTER_SIZE = 0x08 LEAF_END RhGetCommonStubAddress, _TEXT +#ifndef FEATURE_EMULATED_TLS // // IntPtr RhGetCurrentThunkContext() // @@ -57,3 +65,4 @@ POINTER_SIZE = 0x08 ret LEAF_END RhGetCurrentThunkContext, _TEXT +#endif //FEATURE_EMULATED_TLS diff --git a/src/Native/Runtime/arm64/PInvoke.S b/src/Native/Runtime/arm64/PInvoke.S index ad10690c4f5..6876404ee14 100644 --- a/src/Native/Runtime/arm64/PInvoke.S +++ b/src/Native/Runtime/arm64/PInvoke.S @@ -138,84 +138,6 @@ NoAbort: NESTED_END RhpWaitForGC, _TEXT -////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// -// RhpReversePInvoke -// -// IN: x9: address of reverse pinvoke frame -// 0: save slot for previous M->U transition frame -// 8: save slot for thread pointer to avoid re-calc in epilog sequence -// -// PRESERVES: x0 - x8 -- need to preserve these because the caller assumes they are not trashed -// -// TRASHES: x10, x11 -// -////////////////////////////////////////////////////////////////////////////////////////////////////////////// - LEAF_ENTRY RhpReversePInvoke, _TEXT - - INLINE_GETTHREAD x10 // x10 = Thread - str x10, [x9, #8] // save Thread pointer for RhpReversePInvokeReturn - - // x9 = reverse pinvoke frame - // x10 = thread - // x11 = scratch - - ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] - tbz x11, #TSF_Attached_Bit, AttachThread - -ThreadAttached: - // - // Check for the correct mode. This is accessible via various odd things that we cannot completely - // prevent such as : - // 1) Registering a reverse pinvoke entrypoint as a vectored exception handler - // 2) Performing a managed delegate invoke on a reverse pinvoke delegate. - // - ldr x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] - cbz x11, CheckBadTransition - - // Save previous TransitionFrame prior to making the mode transition so that it is always valid - // whenever we might attempt to hijack this thread. - str x11, [x9] - - str xzr, [x10, #OFFSETOF__Thread__m_pTransitionFrame] - dmb ish - - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 11 - - tbnz x11, #TrapThreadsFlags_TrapThreads_Bit, TrapThread - - ret - -CheckBadTransition: - // Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set. This allows us to have - // [NativeCallable] methods that are called via the "restricted GC callouts" as well as from native, - // which is necessary because the methods are CCW vtable methods on interfaces passed to native. - ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] - tbz x11, #TSF_DoNotTriggerGc_Bit, BadTransition - - // zero-out our 'previous transition frame' save slot - mov x11, #0 - str x11, [x9] - - // nothing more to do - ret - -TrapThread: - // put the previous frame back (sets us back to preemptive mode) - ldr x11, [x9] - str x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] - dmb ish - -AttachThread: - // passing address of reverse pinvoke frame in x9 - b RhpReversePInvokeAttachOrTrapThread - -BadTransition: - mov x0, lr // arg <- return address - b RhpReversePInvokeBadTransition - - LEAF_END RhpReversePInvoke, _TEXT - ////////////////////////////////////////////////////////////////////////////////////////////////////////////// // // RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke @@ -287,10 +209,15 @@ NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler str x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_Flags] // get TLS global variable address - // r0 = GetThread() - INLINE_GETTHREAD x10 - str x10, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] - str x0, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD x1 +#endif + + str x1, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + str x0, [x1, #OFFSETOF__Thread__m_pTransitionFrame] PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, 9 diff --git a/src/Native/Runtime/threadstore.cpp b/src/Native/Runtime/threadstore.cpp index bababb62549..2d147182d86 100644 --- a/src/Native/Runtime/threadstore.cpp +++ b/src/Native/Runtime/threadstore.cpp @@ -413,6 +413,18 @@ EXTERN_C DECLSPEC_THREAD ThreadBuffer tls_CurrentThread = 0, // all other fields are initialized by zeroes }; +#ifdef FEATURE_EMULATED_TLS + +// When there is no full TLS support it might be emulated by the compiler +// In this case we need a C function that allows the assembler code to ask for the correct value for the current thread + +EXTERN_C ThreadBuffer* RhpGetThread() +{ + return &tls_CurrentThread; +} + +#endif // FEATURE_EMULATED_TLS + #endif // !DACCESS_COMPILE #ifdef _WIN32 diff --git a/src/Native/Runtime/unix/PalRedhawkUnix.cpp b/src/Native/Runtime/unix/PalRedhawkUnix.cpp index f4e09873f0a..90a087ddbc0 100644 --- a/src/Native/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/Native/Runtime/unix/PalRedhawkUnix.cpp @@ -478,6 +478,18 @@ thread_local TlsDestructionMonitor tls_destructionMonitor; // This thread local variable is used for delegate marshalling DECLSPEC_THREAD intptr_t tls_thunkData; +#ifdef FEATURE_EMULATED_TLS +EXTERN_C intptr_t* RhpGetThunkData() +{ + return &tls_thunkData; +} + +EXTERN_C intptr_t RhGetCurrentThunkContext() +{ + return tls_thunkData; +} +#endif //FEATURE_EMULATED_TLS + // Attach thread to PAL. // It can be called multiple times for the same thread. // It fails fast if a different thread was already registered. diff --git a/src/Native/Runtime/unix/unixasmmacrosarm64.inc b/src/Native/Runtime/unix/unixasmmacrosarm64.inc index 262d88df109..123768cb953 100644 --- a/src/Native/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/Native/Runtime/unix/unixasmmacrosarm64.inc @@ -172,13 +172,77 @@ C_FUNC(\Name): add \target, \target, #:tprel_lo12_nc:\var .endm +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, tls_CurrentThread +.endm -.macro PREPARE_INLINE_GETTHREAD -.global tls_CurrentThread +.macro GETTHREAD_ETLS_1 + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 // ;; Push down stack pointer and store FP and LR + str x0, [sp, #0x10] + + bl RhpGetThread + mov x1, x0 + + ldr x0, [sp, #0x10] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32 .endm -.macro INLINE_GETTHREAD target - INLINE_GET_TLS_VAR \target, tls_CurrentThread +.macro GETTHREAD_ETLS_2 + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 // ;; Push down stack pointer and store FP and LR + stp x0, x1, [sp, #0x10] + + bl RhpGetThread + mov x2, x0 + + ldp x0, x1, [sp, #0x10] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32 +.endm + +.macro GETTHREAD_ETLS_3 + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-48 // ;; Push down stack pointer and store FP and LR + stp x0, x1, [sp, #0x10] + str x2, [sp, #0x20] + + bl RhpGetThread + mov x3, x0 + + ldp x0, x1, [sp, #0x10] + ldr x2, [sp, #0x20] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #48 +.endm + +.macro GETTHREAD_ETLS_5 + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-64 // ;; Push down stack pointer and store FP and LR + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + str x4, [sp, #0x30] + + bl RhpGetThread + mov x5, x0 + + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldr x4, [sp, #0x30] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #64 +.endm + +.macro GETTHUNKDATA_ETLS_9 + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-96 // ;; Push down stack pointer and store FP and LR + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, xip0, [sp, #0x50] + + bl RhpGetThunkData + mov x9, x0 + + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldp x8, xip0, [sp, #0x50] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #96 .endm .macro ArmInterlockedOperationBarrier From f916d5c108a34c9c4be2a570738294b15fb79e3f Mon Sep 17 00:00:00 2001 From: RalfKornmannEnvision Date: Mon, 14 Sep 2020 19:24:13 +0200 Subject: [PATCH 2/2] Fix removefeature:Globalization - Fix for the name of the libary that is used to remove the dependency to the native code --- src/ILCompiler/src/RemovingILProvider.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ILCompiler/src/RemovingILProvider.cs b/src/ILCompiler/src/RemovingILProvider.cs index 2f0a0cbc7ec..d5c92e989bc 100644 --- a/src/ILCompiler/src/RemovingILProvider.cs +++ b/src/ILCompiler/src/RemovingILProvider.cs @@ -185,7 +185,7 @@ owningType is Internal.TypeSystem.Ecma.EcmaType mdType && } // Make sure that there are no ICU dependencies left - if (method.IsPInvoke && method.GetPInvokeMethodMetadata().Module == "System.Globalization.Native") + if (method.IsPInvoke && method.GetPInvokeMethodMetadata().Module == "libSystem.Globalization.Native") { return RemoveAction.ConvertToThrow; }