Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ private static CastResult TryGet(nuint source, nuint target)
{
ref CastCacheEntry pEntry = ref Element(ref tableData, index);

// must read in this order: version -> entry parts -> version
// must read in this order: version -> [entry parts] -> version
// if version is odd or changes, the entry is inconsistent and thus ignored
int version = Volatile.Read(ref pEntry._version);
nuint entrySource = pEntry._source;
Expand All @@ -124,12 +124,19 @@ private static CastResult TryGet(nuint source, nuint target)

if (entrySource == source)
{
nuint entryTargetAndResult = Volatile.Read(ref pEntry._targetAndResult);
nuint entryTargetAndResult = pEntry._targetAndResult;
// target never has its lower bit set.
// a matching entryTargetAndResult would the have same bits, except for the lowest one, which is the result.
entryTargetAndResult ^= target;
if (entryTargetAndResult <= 1)
{
// make sure 'version' is loaded after 'source' and 'targetAndResults'
//
// We can either:
// - use acquires for both _source and _targetAndResults or
// - issue a load barrier before reading _version
// benchmarks on available hardware show that use of a read barrier is cheaper.
Interlocked.ReadMemoryBarrier();
if (version != pEntry._version)
{
// oh, so close, the entry is in inconsistent state.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,15 @@ public static long Read(ref long location) =>
[MethodImpl(MethodImplOptions.InternalCall)]
public static extern void MemoryBarrier();

/// <summary>
/// Synchronizes memory access as follows:
/// The processor that executes the current thread cannot reorder instructions in such a way that memory reads before
/// the call to <see cref="ReadMemoryBarrier"/> execute after memory accesses that follow the call to <see cref="ReadMemoryBarrier"/>.
/// </summary>
[Intrinsic]
[MethodImpl(MethodImplOptions.InternalCall)]
internal static extern void ReadMemoryBarrier();

[DllImport(RuntimeHelpers.QCall, CharSet = CharSet.Unicode)]
private static extern void _MemoryBarrierProcessWide();

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/inc/corinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,7 @@ enum CorInfoIntrinsics
CORINFO_INTRINSIC_InterlockedCmpXchg32,
CORINFO_INTRINSIC_InterlockedCmpXchg64,
CORINFO_INTRINSIC_MemoryBarrier,
CORINFO_INTRINSIC_MemoryBarrierLoad,
CORINFO_INTRINSIC_GetCurrentManagedThread,
CORINFO_INTRINSIC_GetManagedThreadId,
CORINFO_INTRINSIC_ByReference_Ctor,
Expand Down
21 changes: 21 additions & 0 deletions src/coreclr/src/inc/volatile.h
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,27 @@ void VolatileStoreWithoutBarrier(T* pt, T val)
#endif
}

//
// Memory ordering barrier that waits for loads in progress to complete.
// Any effects of loads or stores that appear after, in program order, will "happen after" relative to this.
// Other operations such as computation or instruction prefetch are not affected.
//
// Architectural mapping:
// arm64 : dmb ishld
// arm : dmb ish
// x86/64 : compiler fence
inline
void VolatileLoadBarrier()
{
#if defined(HOST_ARM64) && defined(__GNUC__)
asm volatile ("dmb ishld" : : : "memory");
#elif defined(HOST_ARM64) && defined(_MSC_VER)
__dmb(_ARM64_BARRIER_ISHLD);
#else
VOLATILE_MEMORY_BARRIER();
#endif
}

//
// Volatile<T> implements accesses with our volatile semantics over a variable of type T.
// Wherever you would have used a "volatile Foo" or, equivalently, "Foo volatile", use Volatile<Foo>
Expand Down
12 changes: 7 additions & 5 deletions src/coreclr/src/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -1465,11 +1465,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

void instGen_Return(unsigned stkArgSize);

#ifdef TARGET_ARM64
void instGen_MemoryBarrier(insBarrier barrierType = INS_BARRIER_ISH);
#else
void instGen_MemoryBarrier();
#endif
enum BarrierKind
{
BARRIER_FULL, // full barrier
BARRIER_LOAD_ONLY, // load barier
};

void instGen_MemoryBarrier(BarrierKind barrierKind = BARRIER_FULL);

void instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags flags = INS_FLAGS_DONT_CARE);

Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2665,8 +2665,8 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)

if (cpObjNode->gtFlags & GTF_BLK_VOLATILE)
{
// issue a INS_BARRIER_ISHLD after a volatile CpObj operation
instGen_MemoryBarrier(INS_BARRIER_ISHLD);
// issue a load barrier after a volatile CpObj operation
instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
}

// Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
Expand Down Expand Up @@ -2775,7 +2775,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
assert(!"Unexpected treeNode->gtOper");
}

instGen_MemoryBarrier(INS_BARRIER_ISH);
instGen_MemoryBarrier();
}
else
{
Expand Down Expand Up @@ -2855,7 +2855,7 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode)

GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg);

instGen_MemoryBarrier(INS_BARRIER_ISH);
instGen_MemoryBarrier();

gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
}
Expand Down Expand Up @@ -2904,7 +2904,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)
}
GetEmitter()->emitIns_R_R_R(INS_casal, dataSize, targetReg, dataReg, addrReg);

instGen_MemoryBarrier(INS_BARRIER_ISH);
instGen_MemoryBarrier();
}
else
{
Expand Down Expand Up @@ -2984,7 +2984,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode)

genDefineTempLabel(labelCompareFail);

instGen_MemoryBarrier(INS_BARRIER_ISH);
instGen_MemoryBarrier();

gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask());
}
Expand Down
32 changes: 14 additions & 18 deletions src/coreclr/src/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,13 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
break;

case GT_MEMORYBARRIER:
instGen_MemoryBarrier();
{
CodeGen::BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;

instGen_MemoryBarrier(barrierKind);
break;
}

#ifdef TARGET_ARM64
case GT_XCHG:
Expand Down Expand Up @@ -1944,11 +1949,9 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)

if (emitBarrier)
{
#ifdef TARGET_ARM64
instGen_MemoryBarrier(INS_BARRIER_ISHLD);
#else
instGen_MemoryBarrier();
#endif
// when INS_ldar* could not be used for a volatile load,
// we use an ordinary load followed by a load barrier.
instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
}

genProduceReg(tree);
Expand Down Expand Up @@ -1980,13 +1983,8 @@ void CodeGen::genCodeForCpBlkHelper(GenTreeBlk* cpBlkNode)

if (cpBlkNode->gtFlags & GTF_BLK_VOLATILE)
{
#ifdef TARGET_ARM64
// issue a INS_BARRIER_ISHLD after a volatile CpBlk operation
instGen_MemoryBarrier(INS_BARRIER_ISHLD);
#else
// issue a full memory barrier after a volatile CpBlk operation
instGen_MemoryBarrier();
#endif // TARGET_ARM64
// issue a load barrier after a volatile CpBlk operation
instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
}
}

Expand Down Expand Up @@ -2207,6 +2205,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)

if (node->IsVolatile())
{
// issue a full memory barrier before a volatile CpBlk operation
instGen_MemoryBarrier();
}

Expand Down Expand Up @@ -2304,11 +2303,8 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)

if (node->IsVolatile())
{
#ifdef TARGET_ARM64
instGen_MemoryBarrier(INS_BARRIER_ISHLD);
#else
instGen_MemoryBarrier();
#endif
// issue a load barrier after a volatile CpBlk operation
instGen_MemoryBarrier(BARRIER_LOAD_ONLY);
}
}

Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/src/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1862,8 +1862,13 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
break;

case GT_MEMORYBARRIER:
instGen_MemoryBarrier();
{
CodeGen::BarrierKind barrierKind =
treeNode->gtFlags & GTF_MEMORYBARRIER_LOAD ? BARRIER_LOAD_ONLY : BARRIER_FULL;

instGen_MemoryBarrier(barrierKind);
break;
}

case GT_CMPXCHG:
genCodeForCmpXchg(treeNode->AsCmpXchg());
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/src/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,8 @@ struct GenTree
#define GTF_CALL_POP_ARGS 0x04000000 // GT_CALL -- caller pop arguments?
#define GTF_CALL_HOISTABLE 0x02000000 // GT_CALL -- call is hoistable

#define GTF_MEMORYBARRIER_LOAD 0x40000000 // GT_MEMORYBARRIER -- Load barrier

#define GTF_NOP_DEATH 0x40000000 // GT_NOP -- operand dies here

#define GTF_FLD_VOLATILE 0x40000000 // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3627,11 +3627,20 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64)

case CORINFO_INTRINSIC_MemoryBarrier:
case CORINFO_INTRINSIC_MemoryBarrierLoad:

assert(sig->numArgs == 0);

op1 = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
op1->gtFlags |= GTF_GLOB_REF | GTF_ASG;

// On XARCH `CORINFO_INTRINSIC_MemoryBarrierLoad` fences need not be emitted.
// However, we still need to capture the effect on reordering.
if (intrinsicID == CORINFO_INTRINSIC_MemoryBarrierLoad)
{
op1->gtFlags |= GTF_MEMORYBARRIER_LOAD;
}

retNode = op1;
break;

Expand Down
15 changes: 9 additions & 6 deletions src/coreclr/src/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2364,11 +2364,7 @@ void CodeGen::instGen_Return(unsigned stkArgSize)
* Note: all MemoryBarriers instructions can be removed by
* SET COMPlus_JitNoMemoryBarriers=1
*/
#ifdef TARGET_ARM64
void CodeGen::instGen_MemoryBarrier(insBarrier barrierType)
#else
void CodeGen::instGen_MemoryBarrier()
#endif
void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
{
#ifdef DEBUG
if (JitConfig.JitNoMemoryBarriers() == 1)
Expand All @@ -2378,12 +2374,19 @@ void CodeGen::instGen_MemoryBarrier()
#endif // DEBUG

#if defined(TARGET_XARCH)
// only full barrier needs to be emitted on Xarch
if (barrierKind != BARRIER_FULL)
{
return;
}

instGen(INS_lock);
GetEmitter()->emitIns_I_AR(INS_or, EA_4BYTE, 0, REG_SPBASE, 0);
#elif defined(TARGET_ARM)
// ARM has only full barriers, so all barriers need to be emitted as full.
GetEmitter()->emitIns_I(INS_dmb, EA_4BYTE, 0xf);
#elif defined(TARGET_ARM64)
GetEmitter()->emitIns_BARR(INS_dmb, barrierType);
GetEmitter()->emitIns_BARR(INS_dmb, barrierKind == BARRIER_LOAD_ONLY ? INS_BARRIER_ISHLD : INS_BARRIER_ISH);
#else
#error "Unknown TARGET"
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ static IntrinsicHashtable InitializeIntrinsicHashtable()
table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_InterlockedCmpXchg32, "CompareExchange", "System.Threading", "Interlocked");
// table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_InterlockedCmpXchg64, "CompareExchange", "System.Threading", "Interlocked"); // ambiguous match
table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_MemoryBarrier, "MemoryBarrier", "System.Threading", "Interlocked");
table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_MemoryBarrierLoad, "LoadBarrier", "System.Threading", "Interlocked");
// table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_GetCurrentManagedThread, "GetCurrentThreadNative", "System", "Thread"); // not in .NET Core
// table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_GetManagedThreadId, "get_ManagedThreadId", "System", "Thread"); // not in .NET Core
table.Add(CorInfoIntrinsics.CORINFO_INTRINSIC_ByReference_Ctor, ".ctor", "System", "ByReference`1");
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/tools/Common/JitInterface/CorInfoTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ public enum CorInfoIntrinsics
CORINFO_INTRINSIC_InterlockedCmpXchg32,
CORINFO_INTRINSIC_InterlockedCmpXchg64,
CORINFO_INTRINSIC_MemoryBarrier,
CORINFO_INTRINSIC_MemoryBarrierLoad,
CORINFO_INTRINSIC_GetCurrentManagedThread,
CORINFO_INTRINSIC_GetManagedThreadId,
CORINFO_INTRINSIC_ByReference_Ctor,
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/src/vm/castcache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ TypeHandle::CastResult CastCache::TryGet(TADDR source, TADDR target)
{
CastCacheEntry* pEntry = &Elements(tableData)[index];

// must read in this order: version -> entry parts -> version
// must read in this order: version -> [entry parts] -> version
// if version is odd or changes, the entry is inconsistent and thus ignored
DWORD version1 = VolatileLoad(&pEntry->version);
TADDR entrySource = pEntry->source;
Expand All @@ -171,12 +171,14 @@ TypeHandle::CastResult CastCache::TryGet(TADDR source, TADDR target)

if (entrySource == source)
{
TADDR entryTargetAndResult = VolatileLoad(&pEntry->targetAndResult);
TADDR entryTargetAndResult = pEntry->targetAndResult;
// target never has its lower bit set.
// a matching entryTargetAndResult would have the same bits, except for the lowest one, which is the result.
entryTargetAndResult ^= target;
if (entryTargetAndResult <= 1)
{
// make sure 'version' is loaded after 'source' and 'targetAndResults'
VolatileLoadBarrier();
if (version1 != pEntry->version)
{
// oh, so close, the entry is in inconsistent state.
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/src/vm/comutilnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1799,6 +1799,15 @@ FCIMPL0(void, COMInterlocked::FCMemoryBarrier)
}
FCIMPLEND

FCIMPL0(void, COMInterlocked::FCMemoryBarrierLoad)
{
FCALL_CONTRACT;

VolatileLoadBarrier();
FC_GC_POLL();
}
FCIMPLEND

#include <optdefault.h>

void QCALLTYPE COMInterlocked::MemoryBarrierProcessWide()
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/vm/comutilnative.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ class COMInterlocked
static FCDECL2_IV(INT64, ExchangeAdd64, INT64 *location, INT64 value);

static FCDECL0(void, FCMemoryBarrier);
static FCDECL0(void, FCMemoryBarrierLoad);
static void QCALLTYPE MemoryBarrierProcessWide();
};

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/vm/ecalllist.h
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,7 @@ FCFuncStart(gInterlockedFuncs)
FCIntrinsicSig("ExchangeAdd", &gsig_SM_RefLong_Long_RetLong, COMInterlocked::ExchangeAdd64, CORINFO_INTRINSIC_InterlockedXAdd64)

FCIntrinsic("MemoryBarrier", COMInterlocked::FCMemoryBarrier, CORINFO_INTRINSIC_MemoryBarrier)
FCIntrinsic("ReadMemoryBarrier", COMInterlocked::FCMemoryBarrierLoad, CORINFO_INTRINSIC_MemoryBarrierLoad)
QCFuncElement("_MemoryBarrierProcessWide", COMInterlocked::MemoryBarrierProcessWide)
FCFuncEnd()

Expand Down