From 71b734446df9bf4af09727f5b85b534ffd851849 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Wed, 31 Jan 2024 22:44:24 +0100 Subject: [PATCH 01/13] Implement ARM32 atomic intrinsics --- .../System/Threading/Interlocked.CoreCLR.cs | 16 +- src/coreclr/jit/codegenarm.cpp | 242 ++++++++++++++++++ src/coreclr/jit/codegenarmarch.cpp | 4 +- src/coreclr/jit/emitarm.cpp | 137 ++++++++-- src/coreclr/jit/emitfmtsarm.h | 3 + src/coreclr/jit/importercalls.cpp | 13 +- src/coreclr/jit/instrsarm.h | 16 +- src/coreclr/jit/lsraarm.cpp | 68 +++++ src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 4 + .../nativeaot/Runtime/arm/Interlocked.S | 67 ----- .../src/System/Threading/Interlocked.cs | 12 +- .../src/System/Threading/Interlocked.cs | 6 +- 12 files changed, 469 insertions(+), 119 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs index e2d0a033b5d0c7..bb20adf30d8464 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs @@ -51,7 +51,7 @@ public static long Decrement(ref long location) => [MethodImpl(MethodImplOptions.AggressiveInlining)] public static byte Exchange(ref byte location1, byte value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -72,7 +72,7 @@ public static byte Exchange(ref byte location1, byte value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static short Exchange(ref short location1, short value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -93,7 +93,7 @@ public static short Exchange(ref short location1, short value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Exchange(ref int location1, int value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -172,7 +172,7 @@ public static T Exchange([NotNullIfNotNull(nameof(value))] ref T location1, T [MethodImpl(MethodImplOptions.AggressiveInlining)] public static byte CompareExchange(ref byte location1, byte value, byte comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -194,7 +194,7 @@ public static byte CompareExchange(ref byte location1, byte value, byte comparan [MethodImpl(MethodImplOptions.AggressiveInlining)] public static short CompareExchange(ref short location1, short value, short comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -216,7 +216,7 @@ public static short CompareExchange(ref short location1, short value, short comp [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int CompareExchange(ref int location1, int value, int comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -238,7 +238,7 @@ public static int CompareExchange(ref int location1, int value, int comparand) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long CompareExchange(ref long location1, long value, long comparand) { -#if TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -315,7 +315,7 @@ public static long Add(ref long location1, long value) => [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ExchangeAdd(ref int location1, int value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return ExchangeAdd(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 4a8c08a89858e8..a18d1ad7e7d5ae 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -61,6 +61,11 @@ bool CodeGen::genInstrWithConstant( { case INS_add: case INS_sub: + if (imm < 0) + { + imm = -imm; + ins = (ins == INS_add) ? INS_sub : INS_add; + } immFitsInIns = validImmForInstr(ins, (target_ssize_t)imm, flags); break; @@ -675,6 +680,243 @@ void CodeGen::genJumpTable(GenTree* treeNode) genProduceReg(treeNode); } +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. +// +// Arguments: +// treeNode - the GT_XADD/XCHG node +// +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) +{ + GenTree* data = treeNode->AsOp()->gtOp2; + GenTree* addr = treeNode->AsOp()->gtOp1; + regNumber targetReg = treeNode->GetRegNum(); + regNumber dataReg = data->GetRegNum(); + regNumber addrReg = addr->GetRegNum(); + + genConsumeAddress(addr); + genConsumeRegs(data); + + assert(!treeNode->OperIs(GT_XORR, GT_XAND)); + assert(treeNode->OperIs(GT_XCHG) || !varTypeIsSmall(treeNode->TypeGet())); + + emitAttr dataSize = emitActualTypeSize(data); + + regNumber tempReg = treeNode->ExtractTempReg(RBM_ALLINT); + regNumber loadReg = (targetReg != REG_NA) ? targetReg : treeNode->ExtractTempReg(RBM_ALLINT); + regNumber storeReg = dataReg; + + // Check allocator assumptions + // + // The register allocator should have extended the lifetimes of all input and internal registers so that + // none interfere with the target. + noway_assert(addrReg != targetReg); + + noway_assert(addrReg != loadReg); + noway_assert(dataReg != loadReg); + + noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); + + assert(addr->isUsedFromReg()); + noway_assert(tempReg != REG_NA); + noway_assert(tempReg != targetReg); + noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); + + // Store exclusive unpredictable cases must be avoided + noway_assert(tempReg != addrReg); + + // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input + // registers + // die at the first instruction generated by the node. This is not the case for these atomics as the input + // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); + + // Emit code like this: + // retry: + // ldrex loadReg, [addrReg] + // add storeReg, loadReg, dataReg # Only for GT_XADD + // # GT_XCHG storeReg === dataReg + // strex tempReg, storeReg, [addrReg] + // cmp tempReg, 0 + // bne retry + // dmb ish + + BasicBlock* labelRetry = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + + instruction insLd = INS_ldrex; + instruction insSt = INS_strex; + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldrexb; + insSt = INS_strexb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldrexh; + insSt = INS_strexh; + } + + // The following instruction includes a acquire half barrier + GetEmitter()->emitIns_R_R(insLd, dataSize, loadReg, addrReg); + + if (treeNode->OperGet() == GT_XADD) + { + storeReg = loadReg; + if (data->isContainedIntOrIImmed()) + { + genInstrWithConstant(INS_add, dataSize, storeReg, loadReg, data->AsIntConCommon()->IconValue(), + INS_FLAGS_DONT_CARE, tempReg); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_add, dataSize, storeReg, loadReg, dataReg); + } + } + + // The following instruction includes a release half barrier + GetEmitter()->emitIns_R_R_R(insSt, dataSize, tempReg, storeReg, addrReg); + + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, tempReg, 0); + GetEmitter()->emitIns_J(INS_bne, labelRetry); + + instGen_MemoryBarrier(); + + gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + + if (targetReg != REG_NA) + { + if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) + { + instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; + GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } + + genProduceReg(treeNode); + } +} + +//------------------------------------------------------------------------ +// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. +// +// Arguments: +// tree - the GT_CMPXCHG node +// +void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) +{ + assert(treeNode->OperIs(GT_CMPXCHG)); + + GenTree* addr = treeNode->Addr(); // arg1 + GenTree* data = treeNode->Data(); // arg2 + GenTree* comparand = treeNode->Comparand(); // arg3 + + regNumber targetReg = treeNode->GetRegNum(); + regNumber dataReg = data->GetRegNum(); + regNumber addrReg = addr->GetRegNum(); + regNumber comparandReg = comparand->GetRegNum(); + + genConsumeAddress(addr); + genConsumeRegs(data); + genConsumeRegs(comparand); + + emitAttr dataSize = emitActualTypeSize(data); + + regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT); + + // Check allocator assumptions + // + // The register allocator should have extended the lifetimes of all input and internal registers so that + // none interfere with the target. + noway_assert(addrReg != targetReg); + noway_assert(dataReg != targetReg); + noway_assert(comparandReg != targetReg); + noway_assert(addrReg != dataReg); + noway_assert(targetReg != REG_NA); + noway_assert(exResultReg != REG_NA); + noway_assert(exResultReg != targetReg); + + assert(addr->isUsedFromReg()); + assert(data->isUsedFromReg()); + assert(!comparand->isUsedFromMemory()); + + // Store exclusive unpredictable cases must be avoided + noway_assert(exResultReg != dataReg); + noway_assert(exResultReg != addrReg); + + // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input + // registers + // die at the first instruction generated by the node. This is not the case for these atomics as the input + // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); + + // Emit code like this: + // retry: + // ldrex targetReg, [addrReg] + // cmp targetReg, comparandReg + // bne compareFail + // strex exResult, dataReg, [addrReg] + // cmp exResult, 0 + // bne retry + // compareFail: + // dmb ish + + BasicBlock* labelRetry = genCreateTempLabel(); + BasicBlock* labelCompareFail = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + + instruction insLd = INS_ldrex; + instruction insSt = INS_strex; + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldrexb; + insSt = INS_strexb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldrexh; + insSt = INS_strexh; + } + + // The following instruction includes a acquire half barrier + GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); + + if (comparand->isContainedIntOrIImmed()) + { + assert(comparand->AsIntConCommon()->IconValue() <= INT32_MAX); + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, + (target_ssize_t)comparand->AsIntConCommon()->IconValue()); + } + else + { + GetEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, targetReg, comparandReg); + } + GetEmitter()->emitIns_J(INS_bne, labelCompareFail); + + // The following instruction includes a release half barrier + GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg); + + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, 0); + GetEmitter()->emitIns_J(INS_bne, labelRetry); + + genDefineTempLabel(labelCompareFail); + + instGen_MemoryBarrier(); + + gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + + if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) + { + instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; + GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } + + genProduceReg(treeNode); +} + //------------------------------------------------------------------------ // genGetInsForOper: Return instruction encoding of the operation tree. // diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index cb04cf702f2b94..e4cff6f9f0f60a 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -429,9 +429,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) } #ifdef TARGET_ARM64 - case GT_XCHG: case GT_XORR: case GT_XAND: +#endif // TARGET_ARM64 + case GT_XCHG: case GT_XADD: genLockedInstructions(treeNode->AsOp()); break; @@ -439,7 +440,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) case GT_CMPXCHG: genCodeForCmpXchg(treeNode->AsCmpXchg()); break; -#endif // TARGET_ARM64 case GT_RELOAD: // do nothing - reload is just a marker. diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index 3fa92b60d0e5b5..c14800a5fdeb21 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -425,6 +425,26 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(emitGetInsSC(id) < 0x100); break; + case IF_T2_O1: // T2_O1 ............nnnn ttttTTTT....dddd R1 R2 R3 R4 + assert(isGeneralRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + assert(isGeneralRegister(id->idReg4())); + break; + + case IF_T2_O2: // T2_O2 ............nnnn tttt........dddd R1 R2 R3 + assert(isGeneralRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + break; + + case IF_T2_O3: // T2_O3 ............nnnn ttttddddiiiiiiii R1 R2 R3 imm8 + assert(isGeneralRegister(id->idReg1())); + assert(isGeneralRegister(id->idReg2())); + assert(isGeneralRegister(id->idReg3())); + assert(emitGetInsSC(id) < 0x100); + break; + case IF_T1_K: // T1_K ....cccciiiiiiii Branch imm8, cond4 case IF_T1_M: // T1_M .....iiiiiiiiiii Branch imm11 case IF_T2_J1: // T2_J1 .....Scccciiiiii ..j.jiiiiiiiiiii Branch imm20, cond4 @@ -554,6 +574,9 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id) case IF_T2_H1: case IF_T2_K1: case IF_T2_K4: + case IF_T2_O1: + case IF_T2_O2: + case IF_T2_O3: // Some formats with "destination" or "target" registers are actually used for store instructions, for the // "source" value written to memory. // Similarly, PUSH has a target register, indicating the start of the set of registers to push. POP @@ -2441,21 +2464,19 @@ void emitter::emitIns_R_R( sf = INS_FLAGS_NOT_SET; break; - case INS_ldrexb: - case INS_strexb: - assert(size == EA_4BYTE); - assert(insDoesNotSetFlags(flags)); - fmt = IF_T2_E1; - sf = INS_FLAGS_NOT_SET; - break; + case INS_ldrex: + fmt = IF_T2_H1; + goto COMMON_THUMB2_EX; + case INS_ldrexb: case INS_ldrexh: - case INS_strexh: + fmt = IF_T2_E1; + COMMON_THUMB2_EX: assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); - fmt = IF_T2_E1; sf = INS_FLAGS_NOT_SET; break; + default: #ifdef DEBUG printf("did not expect instruction %s\n", codeGen->genInsName(ins)); @@ -2464,7 +2485,8 @@ void emitter::emitIns_R_R( } assert((fmt == IF_T1_D0) || (fmt == IF_T1_E) || (fmt == IF_T2_C3) || (fmt == IF_T2_C9) || (fmt == IF_T2_C10) || - (fmt == IF_T2_VFP2) || (fmt == IF_T2_VMOVD) || (fmt == IF_T2_VMOVS) || (fmt == IF_T2_E1)); + (fmt == IF_T2_VFP2) || (fmt == IF_T2_VMOVD) || (fmt == IF_T2_VMOVS) || (fmt == IF_T2_H1) || + (fmt == IF_T2_E1)); assert(sf != INS_FLAGS_DONT_CARE); @@ -3034,14 +3056,13 @@ void emitter::emitIns_R_R_I(instruction ins, break; case INS_ldrex: - case INS_strex: assert(insOptsNone(opt)); assert(insDoesNotSetFlags(flags)); sf = INS_FLAGS_NOT_SET; if ((imm & 0x03fc) == imm) { - fmt = IF_T2_H0; + fmt = IF_T2_H1; } else { @@ -3295,9 +3316,19 @@ void emitter::emitIns_R_R_R(instruction ins, break; case INS_ldrexd: - case INS_strexd: - assert(insDoesNotSetFlags(flags)); fmt = IF_T2_G1; + goto COMMON_THUMB2_EX; + + case INS_strex: + fmt = IF_T2_O3; + goto COMMON_THUMB2_EX; + + case INS_strexb: + case INS_strexh: + fmt = IF_T2_O2; + COMMON_THUMB2_EX: + assert(size == EA_4BYTE); + assert(insDoesNotSetFlags(flags)); sf = INS_FLAGS_NOT_SET; break; @@ -3305,7 +3336,7 @@ void emitter::emitIns_R_R_R(instruction ins, unreached(); } assert((fmt == IF_T1_H) || (fmt == IF_T2_C4) || (fmt == IF_T2_C5) || (fmt == IF_T2_VFP3) || (fmt == IF_T2_VMOVD) || - (fmt == IF_T2_G1)); + (fmt == IF_T2_G1) || (fmt == IF_T2_O2) || (fmt == IF_T2_O3)); assert(sf != INS_FLAGS_DONT_CARE); instrDesc* id = emitNewInstr(attr); @@ -3549,10 +3580,25 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_strex: + assert(insOptsNone(opt)); + assert(insDoesNotSetFlags(flags)); + sf = INS_FLAGS_NOT_SET; + + if ((imm & 0x03fc) == imm) + { + fmt = IF_T2_O3; + } + else + { + assert(!"Instruction cannot be encoded"); + } + break; + default: unreached(); } - assert((fmt == IF_T2_C0) || (fmt == IF_T2_E0) || (fmt == IF_T2_G0)); + assert((fmt == IF_T2_C0) || (fmt == IF_T2_E0) || (fmt == IF_T2_G0) || (fmt == IF_T2_O3)); assert(sf != INS_FLAGS_DONT_CARE); // 3-reg ops can't use the small instrdesc @@ -3586,7 +3632,6 @@ void emitter::emitIns_R_R_R_R( /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_smull: case INS_umull: case INS_smlal: @@ -3598,10 +3643,13 @@ void emitter::emitIns_R_R_R_R( case INS_mls: fmt = IF_T2_F2; break; + case INS_strexd: + fmt = IF_T2_O1; + break; default: unreached(); } - assert((fmt == IF_T2_F1) || (fmt == IF_T2_F2)); + assert((fmt == IF_T2_F1) || (fmt == IF_T2_F2) || (fmt == IF_T2_O1)); assert(reg1 != REG_PC); // VM debugging single stepper doesn't support PC register with this instruction. assert(reg2 != REG_PC); @@ -6262,6 +6310,37 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) emitHandlePCRelativeMov32((void*)(dst - 8), addr); break; + case IF_T2_O1: // T2_O1 ............nnnn ttttTTTT....dddd R1 R2 R3 R4 + sz = emitGetInstrDescSize(id); + code = emitInsCode(ins, fmt); + code |= insEncodeRegT2_M(id->idReg1()); + code |= insEncodeRegT2_D(id->idReg2()); + code |= insEncodeRegT2_T(id->idReg3()); + code |= insEncodeRegT2_N(id->idReg4()); + dst += emitOutput_Thumb2Instr(dst, code); + break; + + case IF_T2_O2: // T2_O2 ............nnnn tttt........dddd R1 R2 R3 + sz = emitGetInstrDescSize(id); + code = emitInsCode(ins, fmt); + code |= insEncodeRegT2_M(id->idReg1()); + code |= insEncodeRegT2_T(id->idReg2()); + code |= insEncodeRegT2_N(id->idReg3()); + dst += emitOutput_Thumb2Instr(dst, code); + break; + + case IF_T2_O3: // T2_O3 ............nnnn ttttddddiiiiiiii R1 R2 R3 imm8 + sz = emitGetInstrDescSize(id); + code = emitInsCode(ins, fmt); + imm = emitGetInsSC(id); + assert((imm & 0x00ff) == imm); + code |= insEncodeRegT2_D(id->idReg1()); + code |= insEncodeRegT2_T(id->idReg2()); + code |= insEncodeRegT2_N(id->idReg3()); + code |= imm; + dst += emitOutput_Thumb2Instr(dst, code); + break; + case IF_T2_VFP3: // these are the binary operators // d = n - m @@ -7411,6 +7490,28 @@ void emitter::emitDispInsHelp( } break; + case IF_T2_O1: + case IF_T2_O2: + case IF_T2_O3: + emitDispReg(id->idReg1(), attr, true); + emitDispReg(id->idReg2(), attr, true); + if (fmt == IF_T2_O1) + { + emitDispReg(id->idReg3(), attr, true); + emitDispAddrR(id->idReg4(), attr); + } + else if (fmt == IF_T2_O2) + { + emitDispAddrR(id->idReg3(), attr); + } + else + { + assert(fmt == IF_T2_O3); + imm = emitGetInsSC(id); + emitDispAddrRI(id->idReg3(), imm, attr); + } + break; + case IF_T1_J2: emitDispReg(id->idReg1(), attr, true); imm = emitGetInsSC(id); diff --git a/src/coreclr/jit/emitfmtsarm.h b/src/coreclr/jit/emitfmtsarm.h index a5c97364e64da0..75089af1203b81 100644 --- a/src/coreclr/jit/emitfmtsarm.h +++ b/src/coreclr/jit/emitfmtsarm.h @@ -136,6 +136,9 @@ IF_DEF(T2_N, IS_NONE, NONE) // T2_N .....i......iiii IF_DEF(T2_N1, IS_NONE, JMP) // T2_N1 .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt of a code address IF_DEF(T2_N2, IS_NONE, NONE) // T2_N2 .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt of a data address IF_DEF(T2_N3, IS_NONE, NONE) // T2_N3 .....i......iiii .iiiddddiiiiiiii R1 imm16 ; movw/movt (relocatable imm) +IF_DEF(T2_O1, IS_NONE, NONE) // T2_O1 ............nnnn ttttTTTT....dddd R1 R2 R3 R4 +IF_DEF(T2_O2, IS_NONE, NONE) // T2_O2 ............nnnn tttt........dddd R1 R2 R3 +IF_DEF(T2_O3, IS_NONE, NONE) // T2_O3 ............nnnn ttttddddiiiiiiii R1 R2 R3 imm8 IF_DEF(T2_VLDST, IS_NONE, NONE) // T2_VLDST 11101101UD0Lnnnn dddd101Ziiiiiiii D1 R2 imm(+-1020) IF_DEF(T2_VFP2, IS_NONE, NONE) // T2_VFP2 111011101D110--- dddd101Z--M0mmmm D1 D2 IF_DEF(T2_VFP3, IS_NONE, NONE) // T2_VFP3 11101110-D--nnnn dddd101ZN-M0mmmm D1 D2 D3 diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index a9dee3bf25744a..0c54f1f7dad722 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3433,8 +3433,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, } #endif // defined(TARGET_ARM64) || defined(TARGET_RISCV64) -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) - // TODO-ARM-CQ: reenable treating InterlockedCmpXchg32 operation as intrinsic +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_RISCV64) case NI_System_Threading_Interlocked_CompareExchange: { var_types retType = JITtype2varType(sig->retType); @@ -3443,12 +3442,12 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, { break; } -#if !defined(TARGET_XARCH) && !defined(TARGET_ARM64) +#if !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_ARM) else if (genTypeSize(retType) < 4) { break; } -#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) +#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_ARM) if ((retType == TYP_REF) && (impStackTop(1).val->IsIntegralConst(0) || impStackTop(1).val->IsIconHandle(GTF_ICON_OBJ_HDL))) @@ -3484,12 +3483,12 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, { break; } -#if !defined(TARGET_XARCH) && !defined(TARGET_ARM64) +#if !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_ARM) else if (genTypeSize(retType) < 4) { break; } -#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) +#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_ARM) if ((retType == TYP_REF) && (impStackTop().val->IsIntegralConst(0) || impStackTop().val->IsIconHandle(GTF_ICON_OBJ_HDL))) @@ -3515,7 +3514,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, callType, op1, op2); break; } -#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) +#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_RISCV64) case NI_System_Threading_Interlocked_MemoryBarrier: case NI_System_Threading_Interlocked_ReadMemoryBarrier: diff --git a/src/coreclr/jit/instrsarm.h b/src/coreclr/jit/instrsarm.h index 9356150d4b2e83..6d76209a68af0b 100644 --- a/src/coreclr/jit/instrsarm.h +++ b/src/coreclr/jit/instrsarm.h @@ -437,14 +437,14 @@ INST1(stmdb, "stmdb", 0,ST, IF_EN2D, 0xE9000000) // Rn{!}, T2_I0 1110100100W0nnnn 0r0rrrrrrrrrrrrr E900 0000 INST1(strd, "strd", 0,ST, IF_T2_G0, 0xE8400000) // Rt,RT,[Rn],+-i8{!}T2_G0 1110100PU1W0nnnn ttttTTTTiiiiiiii E840 0000 -INST1(strex, "strex", 0,ST, IF_T2_H1, 0xE8400F00) - // Rt,[Rn+i8] T2_H1 111010000100nnnn tttt1111iiiiiiii E840 0F00 imm(0-1020) -INST1(strexb, "strexb", 0,ST, IF_T2_E1, 0xE8C00F4F) - // Rt,[Rn] T2_E1 111010001100nnnn tttt111101001111 E8C0 0F4F -INST1(strexd, "strexd", 0,ST, IF_T2_G1, 0xE8C0007F) - // Rt,RT,[Rn] T2_G1 111010001100nnnn ttttTTTT01111111 E8C0 007F -INST1(strexh, "strexh", 0,ST, IF_T2_E1, 0xE8C00F5F) - // Rt,[Rn] T2_E1 111010001100nnnn tttt111101011111 E8C0 0F5F +INST1(strex, "strex", 0,ST, IF_T2_O3, 0xE8400000) + // Rd,Rt,[Rn+i8] T2_H1 111010000100nnnn ttttddddiiiiiiii E840 0F00 imm(0-255) +INST1(strexb, "strexb", 0,ST, IF_T2_O2, 0xE8C00F50) + // Rd,Rt,[Rn] T2_E1 111010001100nnnn tttt11110100dddd E8C0 0F4F +INST1(strexd, "strexd", 0,ST, IF_T2_O1, 0xE8C00070) + // Rd,Rt,RT,[Rn] T2_G1 111010001100nnnn ttttTTTT0111dddd E8C0 007F +INST1(strexh, "strexh", 0,ST, IF_T2_O2, 0xE8C00F50) + // Rd,Rt,[Rn] T2_E1 111010001100nnnn tttt11110101dddd E8C0 0F5F INST1(subw, "subw", 0, 0, IF_T2_M0, 0xF2A00000) // Rd,Rn,+i12 T2_M0 11110i101010nnnn 0iiiddddiiiiiiii F2A0 0000 imm(0-4095) INST1(tbb, "tbb", 0, 0, IF_T2_C9, 0xE8D0F000) diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 30991778868d62..0d65889ae349de 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -570,6 +570,74 @@ int LinearScan::BuildNode(GenTree* tree) BuildDef(tree); break; + case GT_CMPXCHG: + { + GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); + srcCount = cmpXchgNode->Comparand()->isContained() ? 2 : 3; + assert(dstCount == 1); + + // For ARM exclusives requires a single internal register + buildInternalIntRegisterDefForNode(tree); + + // For ARM exclusives the lifetime of the addr and data must be extended because + // it may be used multiple during retries + + RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->Addr()); + setDelayFree(locationUse); + RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->Data()); + setDelayFree(valueUse); + if (!cmpXchgNode->Comparand()->isContained()) + { + RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->Comparand()); + + // For ARM exclusives the lifetime of the comparand must be extended because + // it may be used multiple during retries + setDelayFree(comparandUse); + } + + // Internals may not collide with target + setInternalRegsDelayFree = true; + buildInternalRegisterUses(); + BuildDef(tree); + } + break; + + case GT_XADD: + case GT_XCHG: + { + assert(dstCount == (tree->TypeIs(TYP_VOID) ? 0 : 1)); + srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; + + buildInternalIntRegisterDefForNode(tree); + + assert(!tree->gtGetOp1()->isContained()); + RefPosition* op1Use = BuildUse(tree->gtGetOp1()); + RefPosition* op2Use = nullptr; + if (!tree->gtGetOp2()->isContained()) + { + op2Use = BuildUse(tree->gtGetOp2()); + } + + // For ARM exclusives the lifetime of the addr and data must be extended because + // it may be used multiple during retries + // Internals may not collide with target + if (dstCount == 1) + { + setDelayFree(op1Use); + if (op2Use != nullptr) + { + setDelayFree(op2Use); + } + setInternalRegsDelayFree = true; + } + buildInternalRegisterUses(); + if (dstCount == 1) + { + BuildDef(tree); + } + } + break; + case GT_CALL: srcCount = BuildCall(tree->AsCall()); if (tree->AsCall()->HasMultiRegRetVal()) diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index 242acc2f99e652..66216f8c654c70 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -286,9 +286,11 @@ EXTERN_C void * RhpCheckedAssignRefAVLocation; EXTERN_C void * RhpCheckedLockCmpXchgAVLocation; EXTERN_C void * RhpCheckedXchgAVLocation; #if !defined(HOST_AMD64) && !defined(HOST_ARM64) +#if !defined(HOST_X86) && !defined(HOST_ARM) EXTERN_C void * RhpLockCmpXchg8AVLocation; EXTERN_C void * RhpLockCmpXchg16AVLocation; EXTERN_C void * RhpLockCmpXchg32AVLocation; +#endif EXTERN_C void * RhpLockCmpXchg64AVLocation; #endif EXTERN_C void * RhpByRefAssignRefAVLocation1; @@ -312,9 +314,11 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP) (uintptr_t)&RhpCheckedLockCmpXchgAVLocation, (uintptr_t)&RhpCheckedXchgAVLocation, #if !defined(HOST_AMD64) && !defined(HOST_ARM64) +#if !defined(HOST_X86) && !defined(HOST_ARM) (uintptr_t)&RhpLockCmpXchg8AVLocation, (uintptr_t)&RhpLockCmpXchg16AVLocation, (uintptr_t)&RhpLockCmpXchg32AVLocation, +#endif (uintptr_t)&RhpLockCmpXchg64AVLocation, #endif (uintptr_t)&RhpByRefAssignRefAVLocation1, diff --git a/src/coreclr/nativeaot/Runtime/arm/Interlocked.S b/src/coreclr/nativeaot/Runtime/arm/Interlocked.S index 67a079863d9868..515c3accffc923 100644 --- a/src/coreclr/nativeaot/Runtime/arm/Interlocked.S +++ b/src/coreclr/nativeaot/Runtime/arm/Interlocked.S @@ -7,73 +7,6 @@ #include // generated by the build from AsmOffsets.cpp #include -// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg8AVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address -// r0 = destination address -// r1 = value -// r2 = comparand -LEAF_ENTRY RhpLockCmpXchg8, _TEXT - dmb -ALTERNATE_ENTRY RhpLockCmpXchg8AVLocation -LOCAL_LABEL(CmpXchg8Retry): - ldrexb r3, [r0] - cmp r2, r3 - bne LOCAL_LABEL(CmpXchg8Exit) - strexb r12, r1, [r0] - cmp r12, #0 - bne LOCAL_LABEL(CmpXchg8Retry) -LOCAL_LABEL(CmpXchg8Exit): - mov r0, r3 - dmb - bx lr -LEAF_END RhpLockCmpXchg8, _TEXT - -// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg16AVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address -// r0 = destination address -// r1 = value -// r2 = comparand -LEAF_ENTRY RhpLockCmpXchg16, _TEXT - uxth r2, r2 - dmb -ALTERNATE_ENTRY RhpLockCmpXchg16AVLocation -LOCAL_LABEL(CmpXchg16Retry): - ldrexh r3, [r0] - cmp r2, r3 - bne LOCAL_LABEL(CmpXchg16Exit) - strexh r12, r1, [r0] - cmp r12, #0 - bne LOCAL_LABEL(CmpXchg16Retry) -LOCAL_LABEL(CmpXchg16Exit): - sxth r0, r3 - dmb - bx lr -LEAF_END RhpLockCmpXchg16, _TEXT - -// WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg32AVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address -// r0 = destination address -// r1 = value -// r2 = comparand -LEAF_ENTRY RhpLockCmpXchg32, _TEXT - dmb -ALTERNATE_ENTRY RhpLockCmpXchg32AVLocation -LOCAL_LABEL(CmpXchg32Retry): - ldrex r3, [r0] - cmp r2, r3 - bne LOCAL_LABEL(CmpXchg32Exit) - strex r12, r1, [r0] - cmp r12, #0 - bne LOCAL_LABEL(CmpXchg32Retry) -LOCAL_LABEL(CmpXchg32Exit): - mov r0, r3 - dmb - bx lr -LEAF_END RhpLockCmpXchg32, _TEXT - // WARNING: Code in EHHelpers.cpp makes assumptions about this helper, in particular: // - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpLockCmpXchg64AVLocation // - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and LR contains the return address diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs index b4fd68cf7c7034..72751f677cedba 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Threading/Interlocked.cs @@ -14,7 +14,7 @@ public static partial class Interlocked [Intrinsic] public static byte CompareExchange(ref byte location1, byte value, byte comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return CompareExchange(ref location1, value, comparand); #else return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand); @@ -24,7 +24,7 @@ public static byte CompareExchange(ref byte location1, byte value, byte comparan [Intrinsic] public static short CompareExchange(ref short location1, short value, short comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return CompareExchange(ref location1, value, comparand); #else return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand); @@ -34,7 +34,7 @@ public static short CompareExchange(ref short location1, short value, short comp [Intrinsic] public static int CompareExchange(ref int location1, int value, int comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); #else return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand); @@ -73,7 +73,7 @@ public static T CompareExchange(ref T location1, T value, T comparand) where [Intrinsic] public static byte Exchange(ref byte location1, byte value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return Exchange(ref location1, value); #else byte oldValue; @@ -90,7 +90,7 @@ public static byte Exchange(ref byte location1, byte value) [Intrinsic] public static short Exchange(ref short location1, short value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return Exchange(ref location1, value); #else short oldValue; @@ -107,7 +107,7 @@ public static short Exchange(ref short location1, short value) [Intrinsic] public static int Exchange(ref int location1, int value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return Exchange(ref location1, value); #else int oldValue; diff --git a/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs b/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs index 12b3bb500d3a33..b81de24862a41b 100644 --- a/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs +++ b/src/coreclr/nativeaot/Test.CoreLib/src/System/Threading/Interlocked.cs @@ -21,7 +21,7 @@ public static IntPtr CompareExchange(ref IntPtr location1, IntPtr value, IntPtr [Intrinsic] public static byte CompareExchange(ref byte location1, byte value, byte comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return CompareExchange(ref location1, value, comparand); #else return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand); @@ -31,7 +31,7 @@ public static byte CompareExchange(ref byte location1, byte value, byte comparan [Intrinsic] public static short CompareExchange(ref short location1, short value, short comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return CompareExchange(ref location1, value, comparand); #else return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand); @@ -41,7 +41,7 @@ public static short CompareExchange(ref short location1, short value, short comp [Intrinsic] public static int CompareExchange(ref int location1, int value, int comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); #else return RuntimeImports.InterlockedCompareExchange(ref location1, value, comparand); From 0ee75a82307affacb7aadc714b0470f9464dae16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Wed, 31 Jan 2024 23:14:55 +0100 Subject: [PATCH 02/13] Fix formatting, revert long change --- .../src/System/Threading/Interlocked.CoreCLR.cs | 2 +- src/coreclr/jit/codegenarm.cpp | 2 +- src/coreclr/jit/emitarm.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs index bb20adf30d8464..1bedda28b70967 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs @@ -238,7 +238,7 @@ public static int CompareExchange(ref int location1, int value, int comparand) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static long CompareExchange(ref long location1, long value, long comparand) { -#if TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 +#if TARGET_AMD64 || TARGET_ARM64 || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index a18d1ad7e7d5ae..597160c89a7c1b 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -888,7 +888,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) { assert(comparand->AsIntConCommon()->IconValue() <= INT32_MAX); GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, - (target_ssize_t)comparand->AsIntConCommon()->IconValue()); + (target_ssize_t)comparand->AsIntConCommon()->IconValue()); } else { diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index c14800a5fdeb21..5901bf6f83dd67 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -2474,7 +2474,7 @@ void emitter::emitIns_R_R( COMMON_THUMB2_EX: assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); - sf = INS_FLAGS_NOT_SET; + sf = INS_FLAGS_NOT_SET; break; default: @@ -3329,7 +3329,7 @@ void emitter::emitIns_R_R_R(instruction ins, COMMON_THUMB2_EX: assert(size == EA_4BYTE); assert(insDoesNotSetFlags(flags)); - sf = INS_FLAGS_NOT_SET; + sf = INS_FLAGS_NOT_SET; break; default: From 26380416dfecda2699d9c3762dd616e8bc76c46a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= <35800402+MichalPetryka@users.noreply.github.com> Date: Thu, 1 Feb 2024 01:50:19 +0100 Subject: [PATCH 03/13] Use correct register --- src/coreclr/jit/codegenarm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 597160c89a7c1b..2c028e68b549f9 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -899,7 +899,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) // The following instruction includes a release half barrier GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg); - GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, 0); + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, exResultReg, 0); GetEmitter()->emitIns_J(INS_bne, labelRetry); genDefineTempLabel(labelCompareFail); From 1d24a02fd647e394b9c2d60d7faa7c4752387d60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Thu, 1 Feb 2024 23:03:35 +0100 Subject: [PATCH 04/13] Add missing barrier, fix codegen bugs, optimize codegen --- src/coreclr/jit/codegenarm.cpp | 39 ++++++++++++++++++++------------ src/coreclr/jit/instrsarm.h | 2 +- src/coreclr/jit/lower.cpp | 4 +++- src/coreclr/jit/lowerarmarch.cpp | 23 +++++++++++++------ src/coreclr/jit/lsraarm.cpp | 4 ++++ 5 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 2c028e68b549f9..26458a2a80b602 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -703,8 +703,8 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) emitAttr dataSize = emitActualTypeSize(data); regNumber tempReg = treeNode->ExtractTempReg(RBM_ALLINT); - regNumber loadReg = (targetReg != REG_NA) ? targetReg : treeNode->ExtractTempReg(RBM_ALLINT); - regNumber storeReg = dataReg; + regNumber storeReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT); + regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeReg; // Check allocator assumptions // @@ -743,9 +743,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) // bne retry // dmb ish - BasicBlock* labelRetry = genCreateTempLabel(); - genDefineTempLabel(labelRetry); - instruction insLd = INS_ldrex; instruction insSt = INS_strex; if (varTypeIsByte(treeNode->TypeGet())) @@ -759,12 +756,16 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) insSt = INS_strexh; } + instGen_MemoryBarrier(); + + BasicBlock* labelRetry = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + // The following instruction includes a acquire half barrier GetEmitter()->emitIns_R_R(insLd, dataSize, loadReg, addrReg); if (treeNode->OperGet() == GT_XADD) { - storeReg = loadReg; if (data->isContainedIntOrIImmed()) { genInstrWithConstant(INS_add, dataSize, storeReg, loadReg, data->AsIntConCommon()->IconValue(), @@ -864,10 +865,6 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) // compareFail: // dmb ish - BasicBlock* labelRetry = genCreateTempLabel(); - BasicBlock* labelCompareFail = genCreateTempLabel(); - genDefineTempLabel(labelRetry); - instruction insLd = INS_ldrex; instruction insSt = INS_strex; if (varTypeIsByte(treeNode->TypeGet())) @@ -881,20 +878,34 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) insSt = INS_strexh; } + instGen_MemoryBarrier(); + + BasicBlock* labelRetry = genCreateTempLabel(); + BasicBlock* labelCompareFail = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + // The following instruction includes a acquire half barrier GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); if (comparand->isContainedIntOrIImmed()) { - assert(comparand->AsIntConCommon()->IconValue() <= INT32_MAX); - GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, - (target_ssize_t)comparand->AsIntConCommon()->IconValue()); + if (comparand->IsIntegralConst(0)) + { + GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelCompareFail, targetReg); + } + else + { + assert(comparand->AsIntConCommon()->IconValue() <= INT32_MAX); + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, + (target_ssize_t)comparand->AsIntConCommon()->IconValue()); + GetEmitter()->emitIns_J(INS_bne, labelCompareFail); + } } else { GetEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, targetReg, comparandReg); + GetEmitter()->emitIns_J(INS_bne, labelCompareFail); } - GetEmitter()->emitIns_J(INS_bne, labelCompareFail); // The following instruction includes a release half barrier GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg); diff --git a/src/coreclr/jit/instrsarm.h b/src/coreclr/jit/instrsarm.h index 6d76209a68af0b..58a9c6f41b2e3e 100644 --- a/src/coreclr/jit/instrsarm.h +++ b/src/coreclr/jit/instrsarm.h @@ -439,7 +439,7 @@ INST1(strd, "strd", 0,ST, IF_T2_G0, 0xE8400000) // Rt,RT,[Rn],+-i8{!}T2_G0 1110100PU1W0nnnn ttttTTTTiiiiiiii E840 0000 INST1(strex, "strex", 0,ST, IF_T2_O3, 0xE8400000) // Rd,Rt,[Rn+i8] T2_H1 111010000100nnnn ttttddddiiiiiiii E840 0F00 imm(0-255) -INST1(strexb, "strexb", 0,ST, IF_T2_O2, 0xE8C00F50) +INST1(strexb, "strexb", 0,ST, IF_T2_O2, 0xE8C00F40) // Rd,Rt,[Rn] T2_E1 111010001100nnnn tttt11110100dddd E8C0 0F4F INST1(strexd, "strexd", 0,ST, IF_T2_O1, 0xE8C00070) // Rd,Rt,RT,[Rn] T2_G1 111010001100nnnn ttttTTTT0111dddd E8C0 007F diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c859bbe8761351..c4dd75f7e78c0b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -629,13 +629,15 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerStoreLocCommon(node->AsLclVarCommon()); break; -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_CMPXCHG: CheckImmedAndMakeContained(node, node->AsCmpXchg()->Comparand()); break; +#ifndef TARGET_ARM case GT_XORR: case GT_XAND: +#endif // TARGET_ARM case GT_XADD: CheckImmedAndMakeContained(node, node->AsOp()->gtOp2); break; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 5c0acbbdc40115..e04057a13ae1ba 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -82,20 +82,29 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const { case GT_ADD: case GT_SUB: -#ifdef TARGET_ARM64 +#ifdef TARGET_ARM + case GT_XADD: + return emitter::emitIns_valid_imm_for_add(immVal, flags); +#else return emitter::emitIns_valid_imm_for_add(immVal, size); - case GT_CMPXCHG: + + case GT_XADD: case GT_LOCKADD: case GT_XORR: case GT_XAND: - case GT_XADD: return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false : emitter::emitIns_valid_imm_for_add(immVal, size); -#elif defined(TARGET_ARM) - return emitter::emitIns_valid_imm_for_add(immVal, flags); -#endif - break; +#endif // TARGET_ARM + + case GT_CMPXCHG: +#ifdef TARGET_ARM + return emitter::emitIns_valid_imm_for_cmp(immVal, flags); +#else + return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) + ? false + : emitter::emitIns_valid_imm_for_cmp(immVal, size); +#endif // TARGET_ARM #ifdef TARGET_ARM64 case GT_EQ: diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 0d65889ae349de..0ca29a37fdec4d 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -609,6 +609,10 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; buildInternalIntRegisterDefForNode(tree); + if (tree->OperGet() != GT_XCHG) + { + buildInternalIntRegisterDefForNode(tree); + } assert(!tree->gtGetOp1()->isContained()); RefPosition* op1Use = BuildUse(tree->gtGetOp1()); From 62311c46a7ca1b64b5b3f2911ff16b79b802698e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Fri, 2 Feb 2024 00:07:02 +0100 Subject: [PATCH 05/13] Fix cmpxchg sign extension --- src/coreclr/jit/codegenarm.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 26458a2a80b602..566564b2cc55e6 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -887,6 +887,12 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) // The following instruction includes a acquire half barrier GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); + if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) + { + instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; + GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } + if (comparand->isContainedIntOrIImmed()) { if (comparand->IsIntegralConst(0)) @@ -919,12 +925,6 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); - if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) - { - instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; - GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); - } - genProduceReg(treeNode); } From 9910a94bfd504a0197bc9caa75b8d53049cd0587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Fri, 2 Feb 2024 01:21:04 +0100 Subject: [PATCH 06/13] Fix assertion --- src/coreclr/jit/codegenarm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 566564b2cc55e6..e4e93311955dec 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -895,7 +895,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) if (comparand->isContainedIntOrIImmed()) { - if (comparand->IsIntegralConst(0)) + if (comparand->IsIntegralConst(0) && emitter::isLowRegister(targetReg)) { GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelCompareFail, targetReg); } From 62f6a2ce2535d54c0e3083493f25237884436b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Tue, 20 Feb 2024 20:37:33 +0100 Subject: [PATCH 07/13] Simplify defines, optimize codegen --- .../src/System/Threading/Interlocked.CoreCLR.cs | 12 ++++++------ src/coreclr/jit/codegenarm.cpp | 12 ++++++------ src/coreclr/jit/importercalls.cpp | 12 ++++++------ src/coreclr/jit/lower.cpp | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs index 1bedda28b70967..76aabd24e32827 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs @@ -51,7 +51,7 @@ public static long Decrement(ref long location) => [MethodImpl(MethodImplOptions.AggressiveInlining)] public static byte Exchange(ref byte location1, byte value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -72,7 +72,7 @@ public static byte Exchange(ref byte location1, byte value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static short Exchange(ref short location1, short value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -93,7 +93,7 @@ public static short Exchange(ref short location1, short value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Exchange(ref int location1, int value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH || TARGET_RISCV64 return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -172,7 +172,7 @@ public static T Exchange([NotNullIfNotNull(nameof(value))] ref T location1, T [MethodImpl(MethodImplOptions.AggressiveInlining)] public static byte CompareExchange(ref byte location1, byte value, byte comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -194,7 +194,7 @@ public static byte CompareExchange(ref byte location1, byte value, byte comparan [MethodImpl(MethodImplOptions.AggressiveInlining)] public static short CompareExchange(ref short location1, short value, short comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -216,7 +216,7 @@ public static short CompareExchange(ref short location1, short value, short comp [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int CompareExchange(ref int location1, int value, int comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index e4e93311955dec..1bbde47defbd0c 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -887,12 +887,6 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) // The following instruction includes a acquire half barrier GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); - if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) - { - instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; - GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); - } - if (comparand->isContainedIntOrIImmed()) { if (comparand->IsIntegralConst(0) && emitter::isLowRegister(targetReg)) @@ -925,6 +919,12 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) + { + instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; + GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } + genProduceReg(treeNode); } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 42bad979e8e2ae..5b6b2ca1f304c6 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3438,7 +3438,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, } #endif // defined(TARGET_ARM64) || defined(TARGET_RISCV64) -#if defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_RISCV64) +#if defined(TARGET_XARCH) || defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) case NI_System_Threading_Interlocked_CompareExchange: { var_types retType = JITtype2varType(sig->retType); @@ -3447,12 +3447,12 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, { break; } -#if !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_ARM) +#if !defined(TARGET_XARCH) && !defined(TARGET_ARMARCH) else if (genTypeSize(retType) < 4) { break; } -#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_ARM) +#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARMARCH) if ((retType == TYP_REF) && (impStackTop(1).val->IsIntegralConst(0) || impStackTop(1).val->IsIconHandle(GTF_ICON_OBJ_HDL))) @@ -3493,12 +3493,12 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, { break; } -#if !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_ARM) +#if !defined(TARGET_XARCH) && !defined(TARGET_ARMARCH) else if (genTypeSize(retType) < 4) { break; } -#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) && !defined(TARGET_ARM) +#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARMARCH) if ((retType == TYP_REF) && (impStackTop().val->IsIntegralConst(0) || impStackTop().val->IsIconHandle(GTF_ICON_OBJ_HDL))) @@ -3524,7 +3524,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, callType, op1, op2); break; } -#endif // defined(TARGET_XARCH) || defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_RISCV64) +#endif // defined(TARGET_XARCH) || defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) case NI_System_Threading_Interlocked_MemoryBarrier: case NI_System_Threading_Interlocked_ReadMemoryBarrier: diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 72ed87e1eb3e83..96b44bff030bf1 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -629,7 +629,7 @@ GenTree* Lowering::LowerNode(GenTree* node) LowerStoreLocCommon(node->AsLclVarCommon()); break; -#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) case GT_CMPXCHG: CheckImmedAndMakeContained(node, node->AsCmpXchg()->Comparand()); break; From 745f9f71bf8877cc3aa5bfee3565665a1343e698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Tue, 20 Feb 2024 20:43:37 +0100 Subject: [PATCH 08/13] Revert invalid change --- .../src/System/Threading/Interlocked.CoreCLR.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs index 76aabd24e32827..1bedda28b70967 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs @@ -51,7 +51,7 @@ public static long Decrement(ref long location) => [MethodImpl(MethodImplOptions.AggressiveInlining)] public static byte Exchange(ref byte location1, byte value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -72,7 +72,7 @@ public static byte Exchange(ref byte location1, byte value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static short Exchange(ref short location1, short value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -93,7 +93,7 @@ public static short Exchange(ref short location1, short value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Exchange(ref int location1, int value) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return Exchange(ref location1, value); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -172,7 +172,7 @@ public static T Exchange([NotNullIfNotNull(nameof(value))] ref T location1, T [MethodImpl(MethodImplOptions.AggressiveInlining)] public static byte CompareExchange(ref byte location1, byte value, byte comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -194,7 +194,7 @@ public static byte CompareExchange(ref byte location1, byte value, byte comparan [MethodImpl(MethodImplOptions.AggressiveInlining)] public static short CompareExchange(ref short location1, short value, short comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) @@ -216,7 +216,7 @@ public static short CompareExchange(ref short location1, short value, short comp [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int CompareExchange(ref int location1, int value, int comparand) { -#if TARGET_X86 || TARGET_AMD64 || TARGET_ARMARCH || TARGET_RISCV64 +#if TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM || TARGET_RISCV64 return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else if (Unsafe.IsNullRef(ref location1)) From 1b037612e55b982cc94bfd5ab3bbfb4abba73de5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= <35800402+MichalPetryka@users.noreply.github.com> Date: Fri, 22 Mar 2024 03:24:50 +0100 Subject: [PATCH 09/13] Update Interlocked.CoreCLR.cs --- .../src/System/Threading/Interlocked.CoreCLR.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs index 59fe6c834d7d02..a1c1386392e0c0 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Threading/Interlocked.CoreCLR.cs @@ -42,7 +42,6 @@ public static long Decrement(ref long location) => #endregion #region Exchange - /// Sets a 32-bit signed integer to a specified value and returns the original value, as an atomic operation. /// The variable to set to the specified value. /// The value to which the parameter is set. From ba6519774e102d65f4c058a096b1402abec23d4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= <35800402+MichalPetryka@users.noreply.github.com> Date: Fri, 22 Mar 2024 03:27:10 +0100 Subject: [PATCH 10/13] Update Interlocked.cs --- .../src/System/Threading/Interlocked.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs b/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs index 45ebce76f7b0ee..b690ea38e5363d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Threading/Interlocked.cs @@ -80,7 +80,7 @@ public static short Exchange(ref short location1, short value) => [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe byte Exchange(ref byte location1, byte value) { -#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64) +#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM) return Exchange(ref location1, value); // Must expand intrinsic #else // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object @@ -119,7 +119,7 @@ public static unsafe byte Exchange(ref byte location1, byte value) [CLSCompliant(false)] public static unsafe ushort Exchange(ref ushort location1, ushort value) { -#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64) +#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM) return Exchange(ref location1, value); // Must expand intrinsic #else // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object @@ -258,7 +258,7 @@ public static short CompareExchange(ref short location1, short value, short comp [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe byte CompareExchange(ref byte location1, byte value, byte comparand) { -#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64) +#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM) return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object @@ -301,7 +301,7 @@ public static unsafe byte CompareExchange(ref byte location1, byte value, byte c [CLSCompliant(false)] public static unsafe ushort CompareExchange(ref ushort location1, ushort value, ushort comparand) { -#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64) +#if !MONO && (TARGET_X86 || TARGET_AMD64 || TARGET_ARM64 || TARGET_ARM) return CompareExchange(ref location1, value, comparand); // Must expand intrinsic #else // this relies on GC keeping 4B alignment for refs and on subtracting to such alignment being in the same object From 075d86a136b0f0da5e598b0a915cefa54ad1e304 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Sat, 15 Jun 2024 00:16:42 +0200 Subject: [PATCH 11/13] Unify arm64 and arm32 handling --- src/coreclr/jit/codegenarm.cpp | 248 ----------------- src/coreclr/jit/codegenarm64.cpp | 321 ---------------------- src/coreclr/jit/codegenarmarch.cpp | 419 +++++++++++++++++++++++++++++ src/coreclr/jit/lowerarmarch.cpp | 20 +- src/coreclr/jit/lsra.h | 3 + src/coreclr/jit/lsraarm.cpp | 70 +---- src/coreclr/jit/lsraarm64.cpp | 93 +------ src/coreclr/jit/lsraarmarch.cpp | 120 +++++++++ src/coreclr/jit/target.h | 14 +- 9 files changed, 570 insertions(+), 738 deletions(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index a5117870491c1d..ee284721710881 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -658,254 +658,6 @@ void CodeGen::genJumpTable(GenTree* treeNode) genProduceReg(treeNode); } -//------------------------------------------------------------------------ -// genLockedInstructions: Generate code for a GT_XADD or GT_XCHG node. -// -// Arguments: -// treeNode - the GT_XADD/XCHG node -// -void CodeGen::genLockedInstructions(GenTreeOp* treeNode) -{ - GenTree* data = treeNode->AsOp()->gtOp2; - GenTree* addr = treeNode->AsOp()->gtOp1; - regNumber targetReg = treeNode->GetRegNum(); - regNumber dataReg = data->GetRegNum(); - regNumber addrReg = addr->GetRegNum(); - - genConsumeAddress(addr); - genConsumeRegs(data); - - assert(!treeNode->OperIs(GT_XORR, GT_XAND)); - assert(treeNode->OperIs(GT_XCHG) || !varTypeIsSmall(treeNode->TypeGet())); - - emitAttr dataSize = emitActualTypeSize(data); - - regNumber tempReg = treeNode->ExtractTempReg(RBM_ALLINT); - regNumber storeReg = (treeNode->OperGet() == GT_XCHG) ? dataReg : treeNode->ExtractTempReg(RBM_ALLINT); - regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeReg; - - // Check allocator assumptions - // - // The register allocator should have extended the lifetimes of all input and internal registers so that - // none interfere with the target. - noway_assert(addrReg != targetReg); - - noway_assert(addrReg != loadReg); - noway_assert(dataReg != loadReg); - - noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); - - assert(addr->isUsedFromReg()); - noway_assert(tempReg != REG_NA); - noway_assert(tempReg != targetReg); - noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); - - // Store exclusive unpredictable cases must be avoided - noway_assert(tempReg != addrReg); - - // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input - // registers - // die at the first instruction generated by the node. This is not the case for these atomics as the input - // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until - // we are finished generating the code for this node. - - gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); - - // Emit code like this: - // retry: - // ldrex loadReg, [addrReg] - // add storeReg, loadReg, dataReg # Only for GT_XADD - // # GT_XCHG storeReg === dataReg - // strex tempReg, storeReg, [addrReg] - // cmp tempReg, 0 - // bne retry - // dmb ish - - instruction insLd = INS_ldrex; - instruction insSt = INS_strex; - if (varTypeIsByte(treeNode->TypeGet())) - { - insLd = INS_ldrexb; - insSt = INS_strexb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - insLd = INS_ldrexh; - insSt = INS_strexh; - } - - instGen_MemoryBarrier(); - - BasicBlock* labelRetry = genCreateTempLabel(); - genDefineTempLabel(labelRetry); - - // The following instruction includes a acquire half barrier - GetEmitter()->emitIns_R_R(insLd, dataSize, loadReg, addrReg); - - if (treeNode->OperGet() == GT_XADD) - { - if (data->isContainedIntOrIImmed()) - { - genInstrWithConstant(INS_add, dataSize, storeReg, loadReg, data->AsIntConCommon()->IconValue(), - INS_FLAGS_DONT_CARE, tempReg); - } - else - { - GetEmitter()->emitIns_R_R_R(INS_add, dataSize, storeReg, loadReg, dataReg); - } - } - - // The following instruction includes a release half barrier - GetEmitter()->emitIns_R_R_R(insSt, dataSize, tempReg, storeReg, addrReg); - - GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, tempReg, 0); - GetEmitter()->emitIns_J(INS_bne, labelRetry); - - instGen_MemoryBarrier(); - - gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); - - if (targetReg != REG_NA) - { - if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) - { - instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; - GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); - } - - genProduceReg(treeNode); - } -} - -//------------------------------------------------------------------------ -// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. -// -// Arguments: -// tree - the GT_CMPXCHG node -// -void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) -{ - assert(treeNode->OperIs(GT_CMPXCHG)); - - GenTree* addr = treeNode->Addr(); // arg1 - GenTree* data = treeNode->Data(); // arg2 - GenTree* comparand = treeNode->Comparand(); // arg3 - - regNumber targetReg = treeNode->GetRegNum(); - regNumber dataReg = data->GetRegNum(); - regNumber addrReg = addr->GetRegNum(); - regNumber comparandReg = comparand->GetRegNum(); - - genConsumeAddress(addr); - genConsumeRegs(data); - genConsumeRegs(comparand); - - emitAttr dataSize = emitActualTypeSize(data); - - regNumber exResultReg = treeNode->ExtractTempReg(RBM_ALLINT); - - // Check allocator assumptions - // - // The register allocator should have extended the lifetimes of all input and internal registers so that - // none interfere with the target. - noway_assert(addrReg != targetReg); - noway_assert(dataReg != targetReg); - noway_assert(comparandReg != targetReg); - noway_assert(addrReg != dataReg); - noway_assert(targetReg != REG_NA); - noway_assert(exResultReg != REG_NA); - noway_assert(exResultReg != targetReg); - - assert(addr->isUsedFromReg()); - assert(data->isUsedFromReg()); - assert(!comparand->isUsedFromMemory()); - - // Store exclusive unpredictable cases must be avoided - noway_assert(exResultReg != dataReg); - noway_assert(exResultReg != addrReg); - - // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input - // registers - // die at the first instruction generated by the node. This is not the case for these atomics as the input - // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until - // we are finished generating the code for this node. - - gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); - - // Emit code like this: - // retry: - // ldrex targetReg, [addrReg] - // cmp targetReg, comparandReg - // bne compareFail - // strex exResult, dataReg, [addrReg] - // cmp exResult, 0 - // bne retry - // compareFail: - // dmb ish - - instruction insLd = INS_ldrex; - instruction insSt = INS_strex; - if (varTypeIsByte(treeNode->TypeGet())) - { - insLd = INS_ldrexb; - insSt = INS_strexb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - insLd = INS_ldrexh; - insSt = INS_strexh; - } - - instGen_MemoryBarrier(); - - BasicBlock* labelRetry = genCreateTempLabel(); - BasicBlock* labelCompareFail = genCreateTempLabel(); - genDefineTempLabel(labelRetry); - - // The following instruction includes a acquire half barrier - GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); - - if (comparand->isContainedIntOrIImmed()) - { - if (comparand->IsIntegralConst(0) && emitter::isLowRegister(targetReg)) - { - GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelCompareFail, targetReg); - } - else - { - assert(comparand->AsIntConCommon()->IconValue() <= INT32_MAX); - GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, - (target_ssize_t)comparand->AsIntConCommon()->IconValue()); - GetEmitter()->emitIns_J(INS_bne, labelCompareFail); - } - } - else - { - GetEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, targetReg, comparandReg); - GetEmitter()->emitIns_J(INS_bne, labelCompareFail); - } - - // The following instruction includes a release half barrier - GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg); - - GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, exResultReg, 0); - GetEmitter()->emitIns_J(INS_bne, labelRetry); - - genDefineTempLabel(labelCompareFail); - - instGen_MemoryBarrier(); - - gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); - - if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) - { - instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; - GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); - } - - genProduceReg(treeNode); -} - //------------------------------------------------------------------------ // genGetInsForOper: Return instruction encoding of the operation tree. // diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 4ecbcc80a1ce75..2316551ca60840 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3820,327 +3820,6 @@ void CodeGen::genJumpTable(GenTree* treeNode) genProduceReg(treeNode); } -//------------------------------------------------------------------------ -// genLockedInstructions: Generate code for a GT_XADD, GT_XAND, GT_XORR or GT_XCHG node. -// -// Arguments: -// treeNode - the GT_XADD/XAND/XORR/XCHG node -// -void CodeGen::genLockedInstructions(GenTreeOp* treeNode) -{ - GenTree* data = treeNode->AsOp()->gtOp2; - GenTree* addr = treeNode->AsOp()->gtOp1; - regNumber targetReg = treeNode->GetRegNum(); - regNumber dataReg = data->GetRegNum(); - regNumber addrReg = addr->GetRegNum(); - - genConsumeAddress(addr); - genConsumeRegs(data); - - assert(treeNode->OperIs(GT_XCHG) || !varTypeIsSmall(treeNode->TypeGet())); - - emitAttr dataSize = emitActualTypeSize(data); - - if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - assert(!data->isContainedIntOrIImmed()); - - switch (treeNode->gtOper) - { - case GT_XORR: - GetEmitter()->emitIns_R_R_R(INS_ldsetal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, - addrReg); - break; - case GT_XAND: - { - // Grab a temp reg to perform `MVN` for dataReg first. - regNumber tempReg = internalRegisters.GetSingle(treeNode); - GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg); - GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg, - addrReg); - break; - } - case GT_XCHG: - { - instruction ins = INS_swpal; - if (varTypeIsByte(treeNode->TypeGet())) - { - ins = INS_swpalb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - ins = INS_swpalh; - } - GetEmitter()->emitIns_R_R_R(ins, dataSize, dataReg, targetReg, addrReg); - break; - } - case GT_XADD: - GetEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, - addrReg); - break; - default: - assert(!"Unexpected treeNode->gtOper"); - } - } - else - { - // These are imported normally if Atomics aren't supported. - assert(!treeNode->OperIs(GT_XORR, GT_XAND)); - - regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); - regNumber storeDataReg = - (treeNode->OperGet() == GT_XCHG) ? dataReg : internalRegisters.Extract(treeNode, RBM_ALLINT); - regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; - - // Check allocator assumptions - // - // The register allocator should have extended the lifetimes of all input and internal registers so that - // none interfere with the target. - noway_assert(addrReg != targetReg); - - noway_assert(addrReg != loadReg); - noway_assert(dataReg != loadReg); - - noway_assert(addrReg != storeDataReg); - noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); - - assert(addr->isUsedFromReg()); - noway_assert(exResultReg != REG_NA); - noway_assert(exResultReg != targetReg); - noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); - - // Store exclusive unpredictable cases must be avoided - noway_assert(exResultReg != storeDataReg); - noway_assert(exResultReg != addrReg); - - // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input - // registers - // die at the first instruction generated by the node. This is not the case for these atomics as the input - // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until - // we are finished generating the code for this node. - - gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); - - // Emit code like this: - // retry: - // ldxr loadReg, [addrReg] - // add storeDataReg, loadReg, dataReg # Only for GT_XADD - // # GT_XCHG storeDataReg === dataReg - // stxr exResult, storeDataReg, [addrReg] - // cbnz exResult, retry - // dmb ish - - BasicBlock* labelRetry = genCreateTempLabel(); - genDefineTempLabel(labelRetry); - - instruction insLd = INS_ldaxr; - instruction insSt = INS_stlxr; - if (varTypeIsByte(treeNode->TypeGet())) - { - insLd = INS_ldaxrb; - insSt = INS_stlxrb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - insLd = INS_ldaxrh; - insSt = INS_stlxrh; - } - - // The following instruction includes a acquire half barrier - GetEmitter()->emitIns_R_R(insLd, dataSize, loadReg, addrReg); - - switch (treeNode->OperGet()) - { - case GT_XADD: - if (data->isContainedIntOrIImmed()) - { - // Even though INS_add is specified here, the encoder will choose either - // an INS_add or an INS_sub and encode the immediate as a positive value - genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), - REG_NA); - } - else - { - GetEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg); - } - break; - case GT_XCHG: - assert(!data->isContained()); - storeDataReg = dataReg; - break; - default: - unreached(); - } - - // The following instruction includes a release half barrier - GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, storeDataReg, addrReg); - - GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); - - instGen_MemoryBarrier(); - - gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); - } - - if (targetReg != REG_NA) - { - if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) - { - instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; - GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); - } - - genProduceReg(treeNode); - } -} - -//------------------------------------------------------------------------ -// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. -// -// Arguments: -// tree - the GT_CMPXCHG node -// -void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) -{ - assert(treeNode->OperIs(GT_CMPXCHG)); - - GenTree* addr = treeNode->Addr(); // arg1 - GenTree* data = treeNode->Data(); // arg2 - GenTree* comparand = treeNode->Comparand(); // arg3 - - regNumber targetReg = treeNode->GetRegNum(); - regNumber dataReg = data->GetRegNum(); - regNumber addrReg = addr->GetRegNum(); - regNumber comparandReg = comparand->GetRegNum(); - - genConsumeAddress(addr); - genConsumeRegs(data); - genConsumeRegs(comparand); - - emitAttr dataSize = emitActualTypeSize(data); - - if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // casal use the comparand as the target reg - GetEmitter()->emitIns_Mov(INS_mov, dataSize, targetReg, comparandReg, /* canSkip */ true); - - // Catch case we destroyed data or address before use - noway_assert((addrReg != targetReg) || (targetReg == comparandReg)); - noway_assert((dataReg != targetReg) || (targetReg == comparandReg)); - - instruction ins = INS_casal; - if (varTypeIsByte(treeNode->TypeGet())) - { - ins = INS_casalb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - ins = INS_casalh; - } - GetEmitter()->emitIns_R_R_R(ins, dataSize, targetReg, dataReg, addrReg); - } - else - { - regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); - - // Check allocator assumptions - // - // The register allocator should have extended the lifetimes of all input and internal registers so that - // none interfere with the target. - noway_assert(addrReg != targetReg); - noway_assert(dataReg != targetReg); - noway_assert(comparandReg != targetReg); - noway_assert(addrReg != dataReg); - noway_assert(targetReg != REG_NA); - noway_assert(exResultReg != REG_NA); - noway_assert(exResultReg != targetReg); - - assert(addr->isUsedFromReg()); - assert(data->isUsedFromReg()); - assert(!comparand->isUsedFromMemory()); - - // Store exclusive unpredictable cases must be avoided - noway_assert(exResultReg != dataReg); - noway_assert(exResultReg != addrReg); - - // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input - // registers - // die at the first instruction generated by the node. This is not the case for these atomics as the input - // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until - // we are finished generating the code for this node. - - gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); - - // Emit code like this: - // retry: - // ldxr targetReg, [addrReg] - // cmp targetReg, comparandReg - // bne compareFail - // stxr exResult, dataReg, [addrReg] - // cbnz exResult, retry - // compareFail: - // dmb ish - - BasicBlock* labelRetry = genCreateTempLabel(); - BasicBlock* labelCompareFail = genCreateTempLabel(); - genDefineTempLabel(labelRetry); - - instruction insLd = INS_ldaxr; - instruction insSt = INS_stlxr; - if (varTypeIsByte(treeNode->TypeGet())) - { - insLd = INS_ldaxrb; - insSt = INS_stlxrb; - } - else if (varTypeIsShort(treeNode->TypeGet())) - { - insLd = INS_ldaxrh; - insSt = INS_stlxrh; - } - - // The following instruction includes a acquire half barrier - GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); - - if (comparand->isContainedIntOrIImmed()) - { - if (comparand->IsIntegralConst(0)) - { - GetEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg); - } - else - { - GetEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg, - comparand->AsIntConCommon()->IconValue()); - GetEmitter()->emitIns_J(INS_bne, labelCompareFail); - } - } - else - { - GetEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg); - GetEmitter()->emitIns_J(INS_bne, labelCompareFail); - } - - // The following instruction includes a release half barrier - GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg); - - GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); - - genDefineTempLabel(labelCompareFail); - - instGen_MemoryBarrier(); - - gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); - } - - if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) - { - instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; - GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); - } - - genProduceReg(treeNode); -} - instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type) { instruction ins = INS_BREAKPOINT; diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 13cfd1f193eb46..ad0502e7639afc 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -757,6 +757,425 @@ void CodeGen::genIntrinsic(GenTreeIntrinsic* treeNode) genProduceReg(treeNode); } +//------------------------------------------------------------------------ +// genLockedInstructions: Generate code for a GT_XADD, GT_XAND, GT_XORR or GT_XCHG node. +// +// Arguments: +// treeNode - the GT_XADD/XAND/XORR/XCHG node +// +void CodeGen::genLockedInstructions(GenTreeOp* treeNode) +{ + GenTree* data = treeNode->AsOp()->gtOp2; + GenTree* addr = treeNode->AsOp()->gtOp1; + regNumber targetReg = treeNode->GetRegNum(); + regNumber dataReg = data->GetRegNum(); + regNumber addrReg = addr->GetRegNum(); + + genConsumeAddress(addr); + genConsumeRegs(data); + + assert(treeNode->OperIs(GT_XCHG) || !varTypeIsSmall(treeNode->TypeGet())); + + emitAttr dataSize = emitActualTypeSize(data); + +#ifdef TARGET_ARM64 + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + assert(!data->isContainedIntOrIImmed()); + + switch (treeNode->gtOper) + { + case GT_XORR: + GetEmitter()->emitIns_R_R_R(INS_ldsetal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + addrReg); + break; + case GT_XAND: + { + // Grab a temp reg to perform `MVN` for dataReg first. + regNumber tempReg = internalRegisters.GetSingle(treeNode); + GetEmitter()->emitIns_R_R(INS_mvn, dataSize, tempReg, dataReg); + GetEmitter()->emitIns_R_R_R(INS_ldclral, dataSize, tempReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + addrReg); + break; + } + case GT_XCHG: + { + instruction ins = INS_swpal; + if (varTypeIsByte(treeNode->TypeGet())) + { + ins = INS_swpalb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + ins = INS_swpalh; + } + GetEmitter()->emitIns_R_R_R(ins, dataSize, dataReg, targetReg, addrReg); + break; + } + case GT_XADD: + GetEmitter()->emitIns_R_R_R(INS_ldaddal, dataSize, dataReg, (targetReg == REG_NA) ? REG_ZR : targetReg, + addrReg); + break; + default: + assert(!"Unexpected treeNode->gtOper"); + } + } + else +#endif // TARGET_ARM64 + { + // These are imported normally if Atomics aren't supported. + assert(!treeNode->OperIs(GT_XORR, GT_XAND)); + + regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); + regNumber storeDataReg = + (treeNode->OperGet() == GT_XCHG) ? dataReg : internalRegisters.Extract(treeNode, RBM_ALLINT); + regNumber loadReg = (targetReg != REG_NA) ? targetReg : storeDataReg; + + // Check allocator assumptions + // + // The register allocator should have extended the lifetimes of all input and internal registers so that + // none interfere with the target. + noway_assert(addrReg != targetReg); + + noway_assert(addrReg != loadReg); + noway_assert(dataReg != loadReg); + + noway_assert(addrReg != storeDataReg); + noway_assert((treeNode->OperGet() == GT_XCHG) || (addrReg != dataReg)); + + assert(addr->isUsedFromReg()); + noway_assert(exResultReg != REG_NA); + noway_assert(exResultReg != targetReg); + noway_assert((targetReg != REG_NA) || (treeNode->OperGet() != GT_XCHG)); + + // Store exclusive unpredictable cases must be avoided + noway_assert(exResultReg != storeDataReg); + noway_assert(exResultReg != addrReg); + + // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input + // registers + // die at the first instruction generated by the node. This is not the case for these atomics as the input + // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); + + // Emit code like this on arm64: + // retry: + // ldxr loadReg, [addrReg] + // add storeDataReg, loadReg, dataReg # Only for GT_XADD + // # GT_XCHG storeDataReg === dataReg + // stxr exResult, storeDataReg, [addrReg] + // cbnz exResult, retry + // dmb ish + + // Emit code like this on arm32: + // dmb ish + // retry: + // ldrex loadReg, [addrReg] + // add storeDataReg, loadReg, dataReg # Only for GT_XADD + // # GT_XCHG storeDataReg === dataReg + // strex tempReg, storeDataReg, [addrReg] + // cmp tempReg, 0 + // bne retry + // dmb ish + + +#ifndef TARGET_ARM64 + instGen_MemoryBarrier(); +#endif + + BasicBlock* labelRetry = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + + instruction insLd, insSt; +#ifdef TARGET_ARM64 + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldaxrb; + insSt = INS_stlxrb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldaxrh; + insSt = INS_stlxrh; + } + else + { + insLd = INS_ldaxr; + insSt = INS_stlxr; + } +#else + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldrexb; + insSt = INS_strexb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldrexh; + insSt = INS_strexh; + } + else + { + insLd = INS_ldrex; + insSt = INS_strex; + } +#endif + + // The following instruction includes a acquire half barrier + GetEmitter()->emitIns_R_R(insLd, dataSize, loadReg, addrReg); + + switch (treeNode->OperGet()) + { + case GT_XADD: + if (data->isContainedIntOrIImmed()) + { + // Even though INS_add is specified here, the encoder will choose either + // an INS_add or an INS_sub and encode the immediate as a positive value + genInstrWithConstant(INS_add, dataSize, storeDataReg, loadReg, data->AsIntConCommon()->IconValue(), +#ifdef TARGET_ARM + INS_FLAGS_DONT_CARE, +#endif + exResultReg); + } + else + { + GetEmitter()->emitIns_R_R_R(INS_add, dataSize, storeDataReg, loadReg, dataReg); + } + break; + case GT_XCHG: + assert(!data->isContained()); + storeDataReg = dataReg; + break; + default: + unreached(); + } + + // The following instruction includes a release half barrier + GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, storeDataReg, addrReg); + +#ifdef TARGET_ARM64 + GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); +#else + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, exResultReg, 0); + GetEmitter()->emitIns_J(INS_bne, labelRetry); +#endif // TARGET_ARM64 + + instGen_MemoryBarrier(); + + gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + } + + if (targetReg != REG_NA) + { + if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) + { + instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; + GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } + + genProduceReg(treeNode); + } +} + +//------------------------------------------------------------------------ +// genCodeForCmpXchg: Produce code for a GT_CMPXCHG node. +// +// Arguments: +// tree - the GT_CMPXCHG node +// +void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) +{ + assert(treeNode->OperIs(GT_CMPXCHG)); + + GenTree* addr = treeNode->Addr(); // arg1 + GenTree* data = treeNode->Data(); // arg2 + GenTree* comparand = treeNode->Comparand(); // arg3 + + regNumber targetReg = treeNode->GetRegNum(); + regNumber dataReg = data->GetRegNum(); + regNumber addrReg = addr->GetRegNum(); + regNumber comparandReg = comparand->GetRegNum(); + + genConsumeAddress(addr); + genConsumeRegs(data); + genConsumeRegs(comparand); + + emitAttr dataSize = emitActualTypeSize(data); + +#ifdef TARGET_ARM64 + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) + { + // casal use the comparand as the target reg + GetEmitter()->emitIns_Mov(INS_mov, dataSize, targetReg, comparandReg, /* canSkip */ true); + + // Catch case we destroyed data or address before use + noway_assert((addrReg != targetReg) || (targetReg == comparandReg)); + noway_assert((dataReg != targetReg) || (targetReg == comparandReg)); + + instruction ins = INS_casal; + if (varTypeIsByte(treeNode->TypeGet())) + { + ins = INS_casalb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + ins = INS_casalh; + } + GetEmitter()->emitIns_R_R_R(ins, dataSize, targetReg, dataReg, addrReg); + } + else +#endif // TARGET_ARM64 + { + regNumber exResultReg = internalRegisters.Extract(treeNode, RBM_ALLINT); + + // Check allocator assumptions + // + // The register allocator should have extended the lifetimes of all input and internal registers so that + // none interfere with the target. + noway_assert(addrReg != targetReg); + noway_assert(dataReg != targetReg); + noway_assert(comparandReg != targetReg); + noway_assert(addrReg != dataReg); + noway_assert(targetReg != REG_NA); + noway_assert(exResultReg != REG_NA); + noway_assert(exResultReg != targetReg); + + assert(addr->isUsedFromReg()); + assert(data->isUsedFromReg()); + assert(!comparand->isUsedFromMemory()); + + // Store exclusive unpredictable cases must be avoided + noway_assert(exResultReg != dataReg); + noway_assert(exResultReg != addrReg); + + // NOTE: `genConsumeAddress` marks the consumed register as not a GC pointer, as it assumes that the input + // registers + // die at the first instruction generated by the node. This is not the case for these atomics as the input + // registers are multiply-used. As such, we need to mark the addr register as containing a GC pointer until + // we are finished generating the code for this node. + + gcInfo.gcMarkRegPtrVal(addrReg, addr->TypeGet()); + + // Emit code like this on arm64: + // retry: + // ldxr targetReg, [addrReg] + // cmp targetReg, comparandReg + // bne compareFail + // stxr exResult, dataReg, [addrReg] + // cbnz exResult, retry + // compareFail: + // dmb ish + + // Emit code like this on arm32: + // dmb ish + // retry: + // ldrex targetReg, [addrReg] + // cmp targetReg, comparandReg + // bne compareFail + // strex exResult, dataReg, [addrReg] + // cmp exResult, 0 + // bne retry + // compareFail: + // dmb ish + +#ifndef TARGET_ARM64 + instGen_MemoryBarrier(); +#endif + + BasicBlock* labelRetry = genCreateTempLabel(); + BasicBlock* labelCompareFail = genCreateTempLabel(); + genDefineTempLabel(labelRetry); + + instruction insLd, insSt; +#ifdef TARGET_ARM64 + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldaxrb; + insSt = INS_stlxrb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldaxrh; + insSt = INS_stlxrh; + } + else + { + insLd = INS_ldaxr; + insSt = INS_stlxr; + } +#else + if (varTypeIsByte(treeNode->TypeGet())) + { + insLd = INS_ldrexb; + insSt = INS_strexb; + } + else if (varTypeIsShort(treeNode->TypeGet())) + { + insLd = INS_ldrexh; + insSt = INS_strexh; + } + else + { + insLd = INS_ldrex; + insSt = INS_strex; + } +#endif + + // The following instruction includes a acquire half barrier + GetEmitter()->emitIns_R_R(insLd, dataSize, targetReg, addrReg); + + if (comparand->isContainedIntOrIImmed()) + { + if (comparand->IsIntegralConst(0) +#ifdef TARGET_ARM + && emitter::isLowRegister(targetReg) +#endif + ) + { + GetEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg); + } + else + { + ssize_t value = comparand->AsIntConCommon()->IconValue(); + assert(value <= TARGET_SSIZE_MAX && value >= TARGET_SSIZE_MIN); + GetEmitter()->emitIns_R_I(INS_cmp, emitActualTypeSize(treeNode), targetReg, (target_ssize_t)value); + GetEmitter()->emitIns_J(INS_bne, labelCompareFail); + } + } + else + { + GetEmitter()->emitIns_R_R(INS_cmp, emitActualTypeSize(treeNode), targetReg, comparandReg); + GetEmitter()->emitIns_J(INS_bne, labelCompareFail); + } + + // The following instruction includes a release half barrier + GetEmitter()->emitIns_R_R_R(insSt, dataSize, exResultReg, dataReg, addrReg); + +#ifdef TARGET_ARM64 + GetEmitter()->emitIns_J_R(INS_cbnz, EA_4BYTE, labelRetry, exResultReg); +#else + GetEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, exResultReg, 0); + GetEmitter()->emitIns_J(INS_bne, labelRetry); +#endif // TARGET_ARM64 + + genDefineTempLabel(labelCompareFail); + + instGen_MemoryBarrier(); + + gcInfo.gcMarkRegSetNpt(addr->gtGetRegMask()); + } + + if (varTypeIsSmall(treeNode->TypeGet()) && varTypeIsSigned(treeNode->TypeGet())) + { + instruction mov = varTypeIsShort(treeNode->TypeGet()) ? INS_sxth : INS_sxtb; + GetEmitter()->emitIns_Mov(mov, EA_4BYTE, targetReg, targetReg, /* canSkip */ false); + } + + genProduceReg(treeNode); +} + //--------------------------------------------------------------------- // genPutArgStk - generate code for a GT_PUTARG_STK node // diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index b2f8949b490819..2455586f6328ce 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -72,9 +72,10 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const // TODO-CrossBitness: we wouldn't need the cast below if GenTreeIntCon::gtIconVal had target_ssize_t type. target_ssize_t immVal = (target_ssize_t)childNode->AsIntCon()->gtIconVal; - emitAttr attr = emitActualTypeSize(childNode->TypeGet()); - emitAttr size = EA_SIZE(attr); -#ifdef TARGET_ARM +#ifdef TARGET_ARM64 + emitAttr attr = emitActualTypeSize(childNode->TypeGet()); + emitAttr size = EA_SIZE(attr); +#else insFlags flags = parentNode->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE; #endif @@ -83,11 +84,17 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const case GT_ADD: case GT_SUB: #ifdef TARGET_ARM - case GT_XADD: return emitter::emitIns_valid_imm_for_add(immVal, flags); #else return emitter::emitIns_valid_imm_for_add(immVal, size); +#endif +#ifdef TARGET_ARM + case GT_XADD: + return emitter::emitIns_valid_imm_for_add(immVal, flags); + case GT_CMPXCHG: + return emitter::emitIns_valid_imm_for_cmp(immVal, flags); +#else case GT_XADD: case GT_LOCKADD: case GT_XORR: @@ -95,12 +102,7 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false : emitter::emitIns_valid_imm_for_add(immVal, size); -#endif // TARGET_ARM - case GT_CMPXCHG: -#ifdef TARGET_ARM - return emitter::emitIns_valid_imm_for_cmp(immVal, flags); -#else return comp->compOpportunisticallyDependsOn(InstructionSet_Atomics) ? false : emitter::emitIns_valid_imm_for_cmp(immVal, size); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5d49673ffc4bee..884dd20eb7d710 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2028,6 +2028,9 @@ class LinearScan : public LinearScanInterface int BuildIndir(GenTreeIndir* indirTree); int BuildGCWriteBarrier(GenTree* tree); int BuildCast(GenTreeCast* cast); +#ifdef TARGET_ARMARCH + int BuildAtomic(GenTree* atomic, int dstCount); +#endif #if defined(TARGET_XARCH) // returns true if the tree can use the read-modify-write memory instruction form diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 63e6951bd07ae0..f3bc57042eb4b5 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -571,76 +571,10 @@ int LinearScan::BuildNode(GenTree* tree) break; case GT_CMPXCHG: - { - GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); - srcCount = cmpXchgNode->Comparand()->isContained() ? 2 : 3; - assert(dstCount == 1); - - // For ARM exclusives requires a single internal register - buildInternalIntRegisterDefForNode(tree); - - // For ARM exclusives the lifetime of the addr and data must be extended because - // it may be used multiple during retries - - RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->Addr()); - setDelayFree(locationUse); - RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->Data()); - setDelayFree(valueUse); - if (!cmpXchgNode->Comparand()->isContained()) - { - RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->Comparand()); - - // For ARM exclusives the lifetime of the comparand must be extended because - // it may be used multiple during retries - setDelayFree(comparandUse); - } - - // Internals may not collide with target - setInternalRegsDelayFree = true; - buildInternalRegisterUses(); - BuildDef(tree); - } - break; - case GT_XADD: case GT_XCHG: - { - assert(dstCount == (tree->TypeIs(TYP_VOID) ? 0 : 1)); - srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; - - buildInternalIntRegisterDefForNode(tree); - if (tree->OperGet() != GT_XCHG) - { - buildInternalIntRegisterDefForNode(tree); - } - - assert(!tree->gtGetOp1()->isContained()); - RefPosition* op1Use = BuildUse(tree->gtGetOp1()); - RefPosition* op2Use = nullptr; - if (!tree->gtGetOp2()->isContained()) - { - op2Use = BuildUse(tree->gtGetOp2()); - } - - // For ARM exclusives the lifetime of the addr and data must be extended because - // it may be used multiple during retries - // Internals may not collide with target - if (dstCount == 1) - { - setDelayFree(op1Use); - if (op2Use != nullptr) - { - setDelayFree(op2Use); - } - setInternalRegsDelayFree = true; - } - buildInternalRegisterUses(); - if (dstCount == 1) - { - BuildDef(tree); - } - } - break; + srcCount = BuildAtomic(tree, dstCount); + break; case GT_CALL: srcCount = BuildCall(tree->AsCall()); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 34092933aaffe6..dd1a5381d4e574 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -961,101 +961,14 @@ int LinearScan::BuildNode(GenTree* tree) buildInternalRegisterUses(); break; - case GT_CMPXCHG: - { - GenTreeCmpXchg* cmpXchgNode = tree->AsCmpXchg(); - srcCount = cmpXchgNode->Comparand()->isContained() ? 2 : 3; - assert(dstCount == 1); - - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // For ARMv8 exclusives requires a single internal register - buildInternalIntRegisterDefForNode(tree); - } - - // For ARMv8 exclusives the lifetime of the addr and data must be extended because - // it may be used used multiple during retries - - // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent - // them being reused as the target register which must be destroyed early - - RefPosition* locationUse = BuildUse(tree->AsCmpXchg()->Addr()); - setDelayFree(locationUse); - RefPosition* valueUse = BuildUse(tree->AsCmpXchg()->Data()); - setDelayFree(valueUse); - if (!cmpXchgNode->Comparand()->isContained()) - { - RefPosition* comparandUse = BuildUse(tree->AsCmpXchg()->Comparand()); - - // For ARMv8 exclusives the lifetime of the comparand must be extended because - // it may be used used multiple during retries - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - setDelayFree(comparandUse); - } - } - - // Internals may not collide with target - setInternalRegsDelayFree = true; - buildInternalRegisterUses(); - BuildDef(tree); - } - break; - case GT_LOCKADD: case GT_XORR: case GT_XAND: case GT_XADD: case GT_XCHG: - { - assert(dstCount == (tree->TypeIs(TYP_VOID) ? 0 : 1)); - srcCount = tree->gtGetOp2()->isContained() ? 1 : 2; - - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // GT_XCHG requires a single internal register; the others require two. - buildInternalIntRegisterDefForNode(tree); - if (tree->OperGet() != GT_XCHG) - { - buildInternalIntRegisterDefForNode(tree); - } - } - else if (tree->OperIs(GT_XAND)) - { - // for ldclral we need an internal register. - buildInternalIntRegisterDefForNode(tree); - } - - assert(!tree->gtGetOp1()->isContained()); - RefPosition* op1Use = BuildUse(tree->gtGetOp1()); - RefPosition* op2Use = nullptr; - if (!tree->gtGetOp2()->isContained()) - { - op2Use = BuildUse(tree->gtGetOp2()); - } - - // For ARMv8 exclusives the lifetime of the addr and data must be extended because - // it may be used used multiple during retries - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) - { - // Internals may not collide with target - if (dstCount == 1) - { - setDelayFree(op1Use); - if (op2Use != nullptr) - { - setDelayFree(op2Use); - } - setInternalRegsDelayFree = true; - } - } - buildInternalRegisterUses(); - if (dstCount == 1) - { - BuildDef(tree); - } - } - break; + case GT_CMPXCHG: + srcCount = BuildAtomic(tree, dstCount); + break; #if FEATURE_ARG_SPLIT case GT_PUTARG_SPLIT: diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index a99ef06cc3e578..e120e9f103bd64 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -893,6 +893,126 @@ int LinearScan::BuildCast(GenTreeCast* cast) return srcCount; } +//------------------------------------------------------------------------ +// BuildCast: Set the NodeInfo for a GT_CAST. +// +// Arguments: +// cast - The GT_CAST node +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildAtomic(GenTree* atomic, int dstCount) +{ + int srcCount; + switch (atomic->OperGet()) + { + case GT_CMPXCHG: + { + GenTreeCmpXchg* cmpXchgNode = atomic->AsCmpXchg(); + srcCount = cmpXchgNode->Comparand()->isContained() ? 2 : 3; + assert(dstCount == 1); + +#ifdef TARGET_ARM64 + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif + { + // For legacy exclusives requires a single internal register + buildInternalIntRegisterDefForNode(atomic); + } + + // For legacy exclusives the lifetime of the addr and data must be extended because + // it may be used multiple during retries + + // For ARMv8.1 atomic cas the lifetime of the addr and data must be extended to prevent + // them being reused as the target register which must be destroyed early + + RefPosition* locationUse = BuildUse(atomic->AsCmpXchg()->Addr()); + setDelayFree(locationUse); + RefPosition* valueUse = BuildUse(atomic->AsCmpXchg()->Data()); + setDelayFree(valueUse); + if (!cmpXchgNode->Comparand()->isContained()) + { + RefPosition* comparandUse = BuildUse(atomic->AsCmpXchg()->Comparand()); + + // For legacy exclusives the lifetime of the comparand must be extended because + // it may be used multiple during retries +#ifdef TARGET_ARM64 + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif + { + setDelayFree(comparandUse); + } + } + + // Internals may not collide with target + setInternalRegsDelayFree = true; + buildInternalRegisterUses(); + BuildDef(atomic); + } + break; + + case GT_LOCKADD: + case GT_XORR: + case GT_XAND: + case GT_XADD: + case GT_XCHG: + { + assert(dstCount == (atomic->TypeIs(TYP_VOID) ? 0 : 1)); + srcCount = atomic->gtGetOp2()->isContained() ? 1 : 2; + +#ifdef TARGET_ARM64 + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif + { + buildInternalIntRegisterDefForNode(atomic); + } + + // GT_XCHG requires a single internal register; the others require two. + if (atomic->OperGet() != GT_XCHG) + { + buildInternalIntRegisterDefForNode(atomic); + } + + assert(!atomic->gtGetOp1()->isContained()); + RefPosition* op1Use = BuildUse(atomic->gtGetOp1()); + RefPosition* op2Use = nullptr; + if (!atomic->gtGetOp2()->isContained()) + { + op2Use = BuildUse(atomic->gtGetOp2()); + } + + // For legacy exclusives the lifetime of the addr and data must be extended because + // it may be used multiple during retries +#ifdef TARGET_ARM64 + if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) +#endif + { + // Internals may not collide with target + if (dstCount == 1) + { + setDelayFree(op1Use); + if (op2Use != nullptr) + { + setDelayFree(op2Use); + } + setInternalRegsDelayFree = true; + } + } + buildInternalRegisterUses(); + if (dstCount == 1) + { + BuildDef(atomic); + } + } + break; + + default: + unreached(); + } + return srcCount; +} + //------------------------------------------------------------------------ // BuildSelect: Build RefPositions for a GT_SELECT node. // diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index d5073aa9ce7f4e..ec466a5a4d9ade 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -1164,13 +1164,23 @@ C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE); #ifdef TARGET_64BIT typedef uint64_t target_size_t; typedef int64_t target_ssize_t; -#define TARGET_SIGN_BIT (1ULL << 63) +#define TARGET_SIGN_BIT (1ULL << 63) + +#define TARGET_SIZE_MAX UINT64_MAX +#define TARGET_SSIZE_MAX INT64_MAX +#define TARGET_SIZE_MIN UINT64_MIN +#define TARGET_SSIZE_MIN INT64_MIN #else // !TARGET_64BIT typedef unsigned int target_size_t; typedef int target_ssize_t; -#define TARGET_SIGN_BIT (1ULL << 31) +#define TARGET_SIGN_BIT (1ULL << 31) + +#define TARGET_SIZE_MAX UINT32_MAX +#define TARGET_SSIZE_MAX INT32_MAX +#define TARGET_SIZE_MIN UINT32_MIN +#define TARGET_SSIZE_MIN INT32_MIN #endif // !TARGET_64BIT C_ASSERT(sizeof(target_size_t) == TARGET_POINTER_SIZE); From 916c10a3bf92c16fab3a5da5ed0558cf8dda4e7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Sat, 15 Jun 2024 00:31:01 +0200 Subject: [PATCH 12/13] Fix Atomics LSRA --- src/coreclr/jit/lsraarmarch.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index e120e9f103bd64..79eed39eac17fb 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -962,16 +962,24 @@ int LinearScan::BuildAtomic(GenTree* atomic, int dstCount) srcCount = atomic->gtGetOp2()->isContained() ? 1 : 2; #ifdef TARGET_ARM64 - if (!compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) -#endif + if (compiler->compOpportunisticallyDependsOn(InstructionSet_Atomics)) { - buildInternalIntRegisterDefForNode(atomic); + if (atomic->OperIs(GT_XAND)) + { + // for ldclral we need an internal register. + buildInternalIntRegisterDefForNode(atomic); + } } - - // GT_XCHG requires a single internal register; the others require two. - if (atomic->OperGet() != GT_XCHG) + else +#endif { buildInternalIntRegisterDefForNode(atomic); + + // GT_XCHG requires a single internal register; the others require two. + if (atomic->OperGet() != GT_XCHG) + { + buildInternalIntRegisterDefForNode(atomic); + } } assert(!atomic->gtGetOp1()->isContained()); From 0fcd4e9ad5dab06ac4a6c5e13fafb058c4c0ee9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Petryka?= Date: Sat, 15 Jun 2024 01:18:51 +0200 Subject: [PATCH 13/13] Format --- src/coreclr/jit/codegenarmarch.cpp | 3 +-- src/coreclr/jit/target.h | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index ad0502e7639afc..ab6ac93f9cff93 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -880,7 +880,6 @@ void CodeGen::genLockedInstructions(GenTreeOp* treeNode) // bne retry // dmb ish - #ifndef TARGET_ARM64 instGen_MemoryBarrier(); #endif @@ -1132,7 +1131,7 @@ void CodeGen::genCodeForCmpXchg(GenTreeCmpXchg* treeNode) #ifdef TARGET_ARM && emitter::isLowRegister(targetReg) #endif - ) + ) { GetEmitter()->emitIns_J_R(INS_cbnz, emitActualTypeSize(treeNode), labelCompareFail, targetReg); } diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index ec466a5a4d9ade..2742c274c1cb61 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -1165,7 +1165,7 @@ C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE); typedef uint64_t target_size_t; typedef int64_t target_ssize_t; -#define TARGET_SIGN_BIT (1ULL << 63) +#define TARGET_SIGN_BIT (1ULL << 63) #define TARGET_SIZE_MAX UINT64_MAX #define TARGET_SSIZE_MAX INT64_MAX @@ -1175,7 +1175,7 @@ typedef int64_t target_ssize_t; typedef unsigned int target_size_t; typedef int target_ssize_t; -#define TARGET_SIGN_BIT (1ULL << 31) +#define TARGET_SIGN_BIT (1ULL << 31) #define TARGET_SIZE_MAX UINT32_MAX #define TARGET_SSIZE_MAX INT32_MAX