-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Inline TLS field access for linux/osx x64/arm64 #87082
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 82 commits
9387a59
4b27a2d
909b8e6
773eb84
05aaa68
03d9c2c
a98c2cf
04b3cbc
46d8fc3
f44d745
67d33a9
7fb2f16
fcbebaa
7364faa
f257987
f85614b
5bdf881
b6d2ef0
75ec05a
fd200b5
4523864
4bef20a
1f437f6
b5394d7
dce8d91
e96530a
c4db025
ddc931f
a716411
1e14591
1632086
529c7f7
90e091d
694c9cc
e6044a8
ae76829
76db418
63ae9e8
e376109
e74fcd2
0d255d2
ac982cb
130bc14
eeb1a7a
d3cdf77
ec40932
368e8c5
c35938c
8a222d2
5235d37
a273623
6ef9e8d
6d31ec6
ce44ced
9292e2a
de5ada2
3fcec56
c39cf0e
9906f4e
cf3b8c0
ed0c6a7
5801bbf
7dca821
99dec18
bdca9fe
f1e5459
cb409f5
e3b7dc6
ab284c7
bcc0a55
f475b6d
4e0c211
47e087d
04420db
94f4d43
71d27eb
09ec151
e28401c
b89a55f
c6ae3e4
d237f0b
71b99dd
de5b3b7
c7864df
2429d75
8fa1e6f
d645f2e
d42fdce
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -478,36 +478,49 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* | |
| return false; | ||
| } | ||
|
|
||
| assert(!opts.IsReadyToRun()); | ||
kunalspathak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if (TargetOS::IsUnix) | ||
| { | ||
| #if defined(TARGET_ARM) || !defined(TARGET_64BIT) | ||
| // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such | ||
| // as MRC and MCR are used to access them. We do not support them and so should never optimize the | ||
| // field access using TLS. | ||
| assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); | ||
kunalspathak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| #endif | ||
| } | ||
| else | ||
| { | ||
| #ifdef TARGET_ARM | ||
| // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such | ||
| // as MRC and MCR are used to access them. We do not support them and so should never optimize the | ||
| // field access using TLS. | ||
| assert(!"Unsupported scenario of optimizing TLS access on Arm32"); | ||
| // On Arm, Thread execution blocks are accessed using co-processor registers and instructions such | ||
| // as MRC and MCR are used to access them. We do not support them and so should never optimize the | ||
| // field access using TLS. | ||
| assert(!"Unsupported scenario of optimizing TLS access on Windows Arm32"); | ||
kunalspathak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| #endif | ||
| } | ||
|
|
||
| JITDUMP("Expanding thread static local access for [%06d] in " FMT_BB ":\n", dspTreeID(call), block->bbNum); | ||
| DISPTREE(call); | ||
| JITDUMP("\n"); | ||
|
|
||
| bool isGCThreadStatic = | ||
| eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED; | ||
|
|
||
| CORINFO_THREAD_STATIC_BLOCKS_INFO threadStaticBlocksInfo; | ||
| info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); | ||
| memset(&threadStaticBlocksInfo, 0, sizeof(CORINFO_THREAD_STATIC_BLOCKS_INFO)); | ||
|
|
||
| uint32_t offsetOfMaxThreadStaticBlocksVal = 0; | ||
| uint32_t offsetOfThreadStaticBlocksVal = 0; | ||
| info.compCompHnd->getThreadLocalStaticBlocksInfo(&threadStaticBlocksInfo, isGCThreadStatic); | ||
|
|
||
| JITDUMP("getThreadLocalStaticBlocksInfo (%s)\n:", isGCThreadStatic ? "GC" : "Non-GC"); | ||
| offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; | ||
| offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; | ||
|
|
||
| JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); | ||
| JITDUMP("offsetOfThreadLocalStoragePointer= %u\n", threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer); | ||
| JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", offsetOfMaxThreadStaticBlocksVal); | ||
| JITDUMP("offsetOfThreadStaticBlocks= %u\n", offsetOfThreadStaticBlocksVal); | ||
| JITDUMP("tlsIndex= %u\n", (ssize_t)threadStaticBlocksInfo.tlsIndex.addr); | ||
kunalspathak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| JITDUMP("tlsGetAddrFtnPtr= %u\n", threadStaticBlocksInfo.tlsGetAddrFtnPtr); | ||
| JITDUMP("tlsIndexObject= %u\n", (size_t)threadStaticBlocksInfo.tlsIndexObject); | ||
| JITDUMP("threadVarsSection= %u\n", (size_t)threadStaticBlocksInfo.threadVarsSection); | ||
kunalspathak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| JITDUMP("offsetOfMaxThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks); | ||
| JITDUMP("offsetOfThreadStaticBlocks= %u\n", threadStaticBlocksInfo.offsetOfThreadStaticBlocks); | ||
| JITDUMP("offsetOfGCDataPointer= %u\n", threadStaticBlocksInfo.offsetOfGCDataPointer); | ||
kunalspathak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| assert(threadStaticBlocksInfo.tlsIndex.accessType == IAT_VALUE); | ||
| assert((eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED) || | ||
| (eeGetHelperNum(call->gtCallMethHnd) == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR_OPTIMIZED)); | ||
|
|
||
|
|
@@ -546,56 +559,135 @@ bool Compiler::fgExpandThreadLocalAccessForCall(BasicBlock** pBlock, Statement* | |
| gtUpdateStmtSideEffects(stmt); | ||
|
|
||
| GenTree* typeThreadStaticBlockIndexValue = call->gtArgs.GetArgByIndex(0)->GetNode(); | ||
| GenTree* tlsValue = nullptr; | ||
| unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); | ||
| lvaTable[tlsLclNum].lvType = TYP_I_IMPL; | ||
| GenTree* maxThreadStaticBlocksValue = nullptr; | ||
| GenTree* threadStaticBlocksValue = nullptr; | ||
| GenTree* tlsValueDef = nullptr; | ||
|
|
||
| if (TargetOS::IsWindows) | ||
| { | ||
| size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; | ||
| GenTree* dllRef = nullptr; | ||
|
|
||
| void** pIdAddr = nullptr; | ||
| if (tlsIndexValue != 0) | ||
| { | ||
| dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); | ||
| } | ||
|
|
||
| size_t tlsIndexValue = (size_t)threadStaticBlocksInfo.tlsIndex.addr; | ||
| GenTree* dllRef = nullptr; | ||
| // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] | ||
| tlsValue = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); | ||
| tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
|
|
||
| if (tlsIndexValue != 0) | ||
| { | ||
| dllRef = gtNewIconHandleNode(tlsIndexValue * TARGET_POINTER_SIZE, GTF_ICON_TLS_HDL); | ||
| if (dllRef != nullptr) | ||
| { | ||
| // Add the dllRef to produce thread local storage reference for coreclr | ||
| tlsValue = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, dllRef); | ||
| } | ||
|
|
||
| // Base of coreclr's thread local storage | ||
| tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
| } | ||
| else if (TargetOS::IsMacOS) | ||
| { | ||
| // For OSX x64/arm64, we need to get the address of relevant __thread_vars section of | ||
| // the thread local variable `t_ThreadStatics`. Address of `tlv_get_address` is stored | ||
| // in this entry, which we dereference and invoke it, passing the __thread_vars address | ||
| // present in `threadVarsSection`. | ||
| // | ||
| // Code sequence to access thread local variable on osx/x64: | ||
| // | ||
| // mov rdi, threadVarsSection | ||
| // call [rdi] | ||
| // | ||
| // Code sequence to access thread local variable on osx/arm64: | ||
| // | ||
| // mov x0, threadVarsSection | ||
| // mov x1, [x0] | ||
| // blr x1 | ||
| // | ||
| size_t threadVarsSectionVal = (size_t)threadStaticBlocksInfo.threadVarsSection; | ||
| GenTree* tls_get_addr_val = gtNewIconHandleNode(threadVarsSectionVal, GTF_ICON_FTN_ADDR); | ||
kunalspathak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| tls_get_addr_val = gtNewIndir(TYP_I_IMPL, tls_get_addr_val, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
|
|
||
| tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); | ||
| GenTreeCall* tlsRefCall = tlsValue->AsCall(); | ||
|
|
||
| // Mark this ICON as a TLS_HDL, codegen will use FS:[cns] or GS:[cns] | ||
| GenTree* tlsRef = gtNewIconHandleNode(threadStaticBlocksInfo.offsetOfThreadLocalStoragePointer, GTF_ICON_TLS_HDL); | ||
| // This is a call which takes an argument. | ||
| // Populate and set the ABI appropriately. | ||
| assert(threadVarsSectionVal != 0); | ||
| GenTree* tlsArg = gtNewIconNode(threadVarsSectionVal, TYP_I_IMPL); | ||
| tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); | ||
|
|
||
| tlsRef = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
| CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); | ||
| arg0->AbiInfo = CallArgABIInformation(); | ||
| arg0->AbiInfo.SetRegNum(0, REG_ARG_0); | ||
|
|
||
| if (dllRef != nullptr) | ||
| tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); | ||
| } | ||
| else if (TargetOS::IsUnix) | ||
| { | ||
| // Add the dllRef to produce thread local storage reference for coreclr | ||
| tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef); | ||
| #if defined(TARGET_AMD64) | ||
| // Code sequence to access thread local variable on linux/x64: | ||
| // | ||
| // mov rdi, 0x7FE5C418CD28 ; tlsIndexObject | ||
| // mov rax, 0x7FE5C47AFDB0 ; _tls_get_addr | ||
| // call rax | ||
| // | ||
| GenTree* tls_get_addr_val = | ||
| gtNewIconHandleNode((size_t)threadStaticBlocksInfo.tlsGetAddrFtnPtr, GTF_ICON_FTN_ADDR); | ||
| tlsValue = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL); | ||
| GenTreeCall* tlsRefCall = tlsValue->AsCall(); | ||
|
|
||
| // This is an indirect call which takes an argument. | ||
| // Populate and set the ABI appropriately. | ||
| assert(threadStaticBlocksInfo.tlsIndexObject != 0); | ||
| GenTree* tlsArg = gtNewIconNode((size_t)threadStaticBlocksInfo.tlsIndexObject, TYP_I_IMPL); | ||
kunalspathak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| tlsRefCall->gtArgs.InsertAfterThisOrFirst(this, NewCallArg::Primitive(tlsArg)); | ||
|
|
||
| CallArg* arg0 = tlsRefCall->gtArgs.GetArgByIndex(0); | ||
| arg0->AbiInfo = CallArgABIInformation(); | ||
| arg0->AbiInfo.SetRegNum(0, REG_ARG_0); | ||
|
|
||
| tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT); | ||
| #ifdef UNIX_X86_ABI | ||
| tlsRefCall->gtFlags &= ~GTF_CALL_POP_ARGS; | ||
| #endif // UNIX_X86_ABI | ||
| #elif defined(TARGET_ARM64) | ||
| // Code sequence to access thread local variable on linux/arm64: | ||
| // | ||
| // mrs xt, tpidr_elf0 | ||
| // mov xd, [xt+cns] | ||
| tlsValue = gtNewIconHandleNode(0, GTF_ICON_TLS_HDL); | ||
| #else | ||
| assert(!"Unsupported scenario of optimizing TLS access on Linux Arm32/x86"); | ||
| #endif | ||
| } | ||
|
|
||
| // Base of coreclr's thread local storage | ||
| GenTree* tlsValue = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
|
|
||
| // Cache the tls value | ||
| unsigned tlsLclNum = lvaGrabTemp(true DEBUGARG("TLS access")); | ||
| lvaTable[tlsLclNum].lvType = TYP_I_IMPL; | ||
| GenTree* tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); | ||
| GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); | ||
| tlsValueDef = gtNewStoreLclVarNode(tlsLclNum, tlsValue); | ||
| GenTree* tlsLclValueUse = gtNewLclVarNode(tlsLclNum); | ||
|
|
||
| size_t offsetOfThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfThreadStaticBlocks; | ||
| size_t offsetOfMaxThreadStaticBlocksVal = threadStaticBlocksInfo.offsetOfMaxThreadStaticBlocks; | ||
|
|
||
| // Create tree for "maxThreadStaticBlocks = tls[offsetOfMaxThreadStaticBlocks]" | ||
| GenTree* offsetOfMaxThreadStaticBlocks = gtNewIconNode(offsetOfMaxThreadStaticBlocksVal, TYP_I_IMPL); | ||
| GenTree* maxThreadStaticBlocksRef = | ||
| gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfMaxThreadStaticBlocks); | ||
| GenTree* maxThreadStaticBlocksValue = | ||
| gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
| maxThreadStaticBlocksValue = gtNewIndir(TYP_INT, maxThreadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
|
|
||
| GenTree* threadStaticBlocksRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), | ||
| gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL)); | ||
| threadStaticBlocksValue = gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
|
|
||
| // Create tree for "if (maxThreadStaticBlocks < typeIndex)" | ||
| GenTree* maxThreadStaticBlocksCond = | ||
| gtNewOperNode(GT_LT, TYP_INT, maxThreadStaticBlocksValue, gtCloneExpr(typeThreadStaticBlockIndexValue)); | ||
| maxThreadStaticBlocksCond = gtNewOperNode(GT_JTRUE, TYP_VOID, maxThreadStaticBlocksCond); | ||
|
|
||
| // Create tree for "threadStaticBlockBase = tls[offsetOfThreadStaticBlocks]" | ||
| GenTree* offsetOfThreadStaticBlocks = gtNewIconNode(offsetOfThreadStaticBlocksVal, TYP_I_IMPL); | ||
| GenTree* threadStaticBlocksRef = | ||
| gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(tlsLclValueUse), offsetOfThreadStaticBlocks); | ||
| GenTree* threadStaticBlocksValue = | ||
| gtNewIndir(TYP_I_IMPL, threadStaticBlocksRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT); | ||
|
|
||
| // Create tree to "threadStaticBlockValue = threadStaticBlockBase[typeIndex]" | ||
| typeThreadStaticBlockIndexValue = gtNewOperNode(GT_MUL, TYP_INT, gtCloneExpr(typeThreadStaticBlockIndexValue), | ||
| gtNewIconNode(TARGET_POINTER_SIZE, TYP_INT)); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.