From f4367120332a3b19cae889fd474ebc0cb103ec66 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 6 May 2024 11:43:40 -0700 Subject: [PATCH 01/10] Convert regMaskTP for ARM64 to struct with single field --- src/coreclr/jit/compiler.hpp | 65 +++++++++++++- src/coreclr/jit/emit.cpp | 8 +- src/coreclr/jit/gcencode.cpp | 2 +- src/coreclr/jit/lsra.cpp | 4 + src/coreclr/jit/lsra.h | 4 +- src/coreclr/jit/lsraarm64.cpp | 10 +-- src/coreclr/jit/lsrabuild.cpp | 4 + src/coreclr/jit/regset.cpp | 2 +- src/coreclr/jit/target.h | 162 ++++++++++++++++++++++++++++++++-- 9 files changed, 239 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index d8844c2e35afb8..e43b2dac42249a 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -99,6 +99,33 @@ inline bool genExactlyOneBit(T value) return ((value != 0) && genMaxOneBit(value)); } +#ifdef TARGET_ARM64 +inline regMaskTP genFindLowestBit(regMaskTP value) +{ + return regMaskTP(genFindLowestBit(value.getLow())); +} + +/***************************************************************************** + * + * Return true if the given value has exactly zero or one bits set. + */ + +inline bool genMaxOneBit(regMaskTP value) +{ + return PopCount(value) <= 1; +} + +/***************************************************************************** + * + * Return true if the given value has exactly one bit set. + */ + +inline bool genExactlyOneBit(regMaskTP value) +{ + return PopCount(value) == 1; +} +#endif + /***************************************************************************** * * Given a value that has exactly one bit set, return the position of that @@ -147,6 +174,13 @@ inline unsigned genCountBits(uint64_t bits) return BitOperations::PopCount(bits); } +#ifdef TARGET_ARM64 +inline unsigned genCountBits(regMaskTP mask) +{ + return BitOperations::PopCount(mask.getLow()); +} +#endif + /***************************************************************************** * * A rather simple routine that counts the number of bits in a given number. @@ -914,11 +948,18 @@ inline regNumber genRegNumFromMask(regMaskTP mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)genLog2(mask); +#ifdef TARGET_ARM64 + regNumber regNum = (regNumber)genLog2(mask.getLow()); /* Make sure we got it right */ + assert(genRegMask(regNum) == mask.getLow()); +#else + regNumber regNum = (regNumber)genLog2(mask); + + /* Make sure we got it right */ assert(genRegMask(regNum) == mask); +#endif return regNum; } @@ -940,8 +981,8 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); - mask ^= genRegMask(regNum); + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask.getLow()); + mask ^= regNum; return regNum; } @@ -962,7 +1003,7 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask.getLow()); return regNum; } @@ -4463,7 +4504,11 @@ inline void* operator new[](size_t sz, Compiler* compiler, CompMemKind cmk) inline void printRegMask(regMaskTP mask) { +#ifdef TARGET_ARM64 + printf(REG_MASK_ALL_FMT, mask.getLow()); +#else printf(REG_MASK_ALL_FMT, mask); +#endif } inline char* regMaskToString(regMaskTP mask, Compiler* context) @@ -4471,14 +4516,22 @@ inline char* regMaskToString(regMaskTP mask, Compiler* context) const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; +#ifdef TARGET_ARM64 + sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask.getLow()); +#else sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask); +#endif return regmask; } inline void printRegMaskInt(regMaskTP mask) { +#ifdef TARGET_ARM64 + printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT).getLow()); +#else printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT)); +#endif } inline char* regMaskIntToString(regMaskTP mask, Compiler* context) @@ -4486,7 +4539,11 @@ inline char* regMaskIntToString(regMaskTP mask, Compiler* context) const size_t cchRegMask = 24; char* regmask = new (context, CMK_Unknown) char[cchRegMask]; +#ifdef TARGET_ARM64 + sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT).getLow()); +#else sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT)); +#endif return regmask; } diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index f8b320c07d44cd..df2b3030beafc4 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -3470,7 +3470,7 @@ void emitter::emitDispRegSet(regMaskTP regs) continue; } - regs -= curReg; + regs ^= curReg; if (sp) { @@ -3828,8 +3828,8 @@ void emitter::emitDispGCRegDelta(const char* title, regMaskTP prevRegs, regMaskT { emitDispGCDeltaTitle(title); regMaskTP sameRegs = prevRegs & curRegs; - regMaskTP removedRegs = prevRegs - sameRegs; - regMaskTP addedRegs = curRegs - sameRegs; + regMaskTP removedRegs = prevRegs ^ sameRegs; + regMaskTP addedRegs = curRegs ^ sameRegs; if (removedRegs != RBM_NONE) { printf(" -"); @@ -8920,7 +8920,7 @@ void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr) emitGCregDeadUpd(reg, addr); } - chg -= bit; + chg ^= bit; } while (chg); assert(emitThisXXrefRegs == regs); diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index e21fe864984ef7..2b89fa946c95cf 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4667,7 +4667,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, } // Turn the bit we've just generated off and continue. - regMask -= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI + regMask ^= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI } } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 685186550793b7..29add9e3c217bd 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -13609,7 +13609,11 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current &overallLimitCandidates); assert(limitConsecutiveResult != RBM_NONE); +#ifdef TARGET_ARM64 + unsigned startRegister = BitScanForward(limitConsecutiveResult); +#else unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult); +#endif regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1; candidates |= (registersNeededMask << startRegister); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 09f8b8d63e581a..21972a02416003 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -794,8 +794,8 @@ class LinearScan : public LinearScanInterface static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); - static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 6ea3ad27706cea..7a26eb2d3a30d9 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -180,7 +180,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates) { - if (BitOperations::PopCount(candidates) < registersNeeded) + if (PopCount(candidates) < registersNeeded) { // There is no way the register demanded can be satisfied for this RefPosition // based on the candidates from which it can allocate a register. @@ -205,7 +205,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(currAvailableRegs)); + regAvailableStartIndex = BitScanForward(currAvailableRegs); regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. @@ -223,7 +223,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } else { - regAvailableEndIndex = BitOperations::BitScanForward(static_cast(maskProcessed)); + regAvailableEndIndex = BitScanForward(maskProcessed); } regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; @@ -335,7 +335,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(unprocessedRegs)); + regAvailableStartIndex = BitScanForward(unprocessedRegs); // For the current range, find how many registers are free vs. busy regMaskTP maskForCurRange = RBM_NONE; @@ -370,7 +370,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC // In the given range, there are some free registers available. Calculate how many registers // will need spilling if this range is picked. - int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange); + int curSpillRegs = registersNeeded - PopCount(maskForCurRange); if (curSpillRegs < maxSpillRegs) { consecutiveResultForBusy = 1ULL << regAvailableStartIndex; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 41b375acd43929..cea25b5c5f3e08 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2898,7 +2898,11 @@ void LinearScan::stressSetRandomParameterPreferences() // Select a random register from all possible parameter registers // (of the right type). Preference this parameter to that register. +#ifdef TARGET_ARM64 + unsigned numBits = PopCount(*regs); +#else unsigned numBits = BitOperations::PopCount(*regs); +#endif if (numBits == 0) { continue; diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 12975850a404ba..e706c0c7ca7366 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -960,7 +960,7 @@ regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) regMaskSmall res = 0; for (int i = 0; i < CNT_CALLEE_SAVED; i++) { - if ((calleeSaveMask & ((regMaskTP)1 << i)) != 0) + if ((calleeSaveMask & (1 << i)) != 0) { res |= raRbmCalleeSaveOrder[i]; } diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 06777fa9d5f709..8d24faf37dd864 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -208,8 +208,163 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +typedef _regNumber_enum regNumber; +typedef unsigned char regNumberSmall; + +#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; +#elif defined(TARGET_ARM64) +typedef unsigned __int64 regMaskSmall; +struct _regMaskAll +{ + unsigned __int64 low; + + _regMaskAll(unsigned __int64 regMask) + : low(regMask) + { + } + + _regMaskAll() + : low(RBM_NONE) + { + } + + FORCEINLINE operator _regMaskAll() const + { + return _regMaskAll{static_cast(low)}; + } + + FORCEINLINE explicit operator bool() const + { + return low != 0; + } + + FORCEINLINE explicit operator regMaskSmall() const + { + return (regMaskSmall)low; + } + + FORCEINLINE explicit operator unsigned int() const + { + return (unsigned int)low; + } + + FORCEINLINE unsigned __int64 getLow() const + { + return low; + } +}; +typedef _regMaskAll regMaskTP; + +FORCEINLINE static uint32_t PopCount(const _regMaskAll& value) +{ + return BitOperations::PopCount(value.getLow()); +} + +FORCEINLINE static uint32_t BitScanForward(regMaskTP mask) +{ + return BitOperations::BitScanForward(mask.low); +} + +FORCEINLINE regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) +{ + regMaskTP result(first.low ^ second.getLow()); + return result; +} + +FORCEINLINE regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) +{ + regMaskTP result(first.low & second.getLow()); + return result; +} + +FORCEINLINE regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) +{ + regMaskTP result(first.low | second.getLow()); + return result; +} + +FORCEINLINE regMaskTP operator<<(const regMaskTP& first, const int b) +{ + regMaskTP result(first.low << b); + return result; +} + +FORCEINLINE regMaskTP operator>>(const regMaskTP& first, const int b) +{ + regMaskTP result(first.low >> b); + return result; +} + +FORCEINLINE regMaskTP& operator>>=(regMaskTP& first, const int b) +{ + first = first >> b; + return first; +} + +FORCEINLINE regMaskTP& operator|=(regMaskTP& first, const regMaskTP& second) +{ + first.low |= second.low; + return first; +} + +FORCEINLINE regMaskTP& operator^=(regMaskTP& first, const regMaskTP& second) +{ + first.low ^= second.low; + return first; +} + +FORCEINLINE regMaskSmall operator^=(regMaskSmall& first, const regMaskTP& second) +{ + first ^= second.low; + return first; +} + +FORCEINLINE regMaskSmall operator&=(regMaskSmall& first, const regMaskTP& second) +{ + first &= second.low; + return first; +} + +FORCEINLINE regMaskSmall operator|=(regMaskSmall& first, const regMaskTP& second) +{ + first |= second.low; + return first; +} + +FORCEINLINE regMaskTP& operator<<=(regMaskTP& first, const regMaskTP& second) +{ + first.low <<= second.low; + return first; +} + +FORCEINLINE regMaskTP& operator&=(regMaskTP& first, const regMaskTP& second) +{ + first.low &= second.low; + return first; +} + +FORCEINLINE constexpr bool operator==(const regMaskTP& first, const regMaskTP& second) +{ + return (first.low == second.low); +} + +FORCEINLINE constexpr bool operator!=(const regMaskTP& first, const regMaskTP& second) +{ + return (first.low != second.low); +} + +FORCEINLINE constexpr bool operator>(const regMaskTP& first, const regMaskTP& second) +{ + return (first.low > second.low); +} + +FORCEINLINE regMaskTP operator~(const regMaskTP& first) +{ + regMaskTP result(~first.low); + return result; +} + #else typedef unsigned regMaskTP; #endif @@ -232,9 +387,6 @@ typedef unsigned __int64 regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif -typedef _regNumber_enum regNumber; -typedef unsigned char regNumberSmall; - /*****************************************************************************/ #ifdef DEBUG @@ -558,7 +710,7 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) } else { - // Can be TYP_STRUCT for HFA. It's not clear that's correct; what about + // Can be TYP_STRUCT for HFfirst. It's not clear that's correct; what about // HFA of double? We wouldn't be asserting the right alignment, and // callers like genRegMaskFloat() wouldn't be generating the right mask. From 8ff7508b8ae53210928852d19d3653e29a0b0a8d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 6 May 2024 11:54:57 -0700 Subject: [PATCH 02/10] Fix genFirstRegNumFromMaskAndToggle() and genFirstRegNumFromMask() --- src/coreclr/jit/compiler.hpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index e43b2dac42249a..7f488f4f6aa645 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -981,7 +981,12 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitOperations::BitScanForward(mask.getLow()); +#ifdef TARGET_ARM64 + regNumber regNum = (regNumber)BitScanForward(mask); +#else + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#endif + mask ^= regNum; return regNum; @@ -1003,7 +1008,11 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitOperations::BitScanForward(mask.getLow()); +#ifdef TARGET_ARM64 + regNumber regNum = (regNumber)BitScanForward(mask); +#else + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); +#endif return regNum; } From 7ac4137fefeeb3c4bb7bff2c31927c453ab40bde Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 6 May 2024 15:50:13 -0700 Subject: [PATCH 03/10] minor fix --- src/coreclr/jit/compiler.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 7f488f4f6aa645..336ae1fd6a9392 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -987,7 +987,7 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); #endif - mask ^= regNum; + mask ^= genRegMask(regNum); return regNum; } From 0144cb0116159f47019363ef0a794fe14e545605 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 8 May 2024 14:44:32 -0700 Subject: [PATCH 04/10] review feedback --- src/coreclr/jit/compiler.hpp | 6 +++++ src/coreclr/jit/target.h | 51 ++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 336ae1fd6a9392..10f3a500757dba 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5076,6 +5076,12 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func) return BasicBlockVisit::Continue; } +//regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) +//{ +// regMaskTP result(first.low ^ second.getLow()); +// return result; +//} + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 8d24faf37dd864..a5efd14e9e61cf 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -215,25 +215,19 @@ typedef unsigned char regNumberSmall; typedef unsigned __int64 regMaskTP; #elif defined(TARGET_ARM64) typedef unsigned __int64 regMaskSmall; -struct _regMaskAll +struct regMaskTP { unsigned __int64 low; - _regMaskAll(unsigned __int64 regMask) + regMaskTP(unsigned __int64 regMask) : low(regMask) { } - _regMaskAll() - : low(RBM_NONE) + regMaskTP() { } - FORCEINLINE operator _regMaskAll() const - { - return _regMaskAll{static_cast(low)}; - } - FORCEINLINE explicit operator bool() const { return low != 0; @@ -254,112 +248,111 @@ struct _regMaskAll return low; } }; -typedef _regMaskAll regMaskTP; -FORCEINLINE static uint32_t PopCount(const _regMaskAll& value) +static uint32_t PopCount(const regMaskTP& value) { return BitOperations::PopCount(value.getLow()); } -FORCEINLINE static uint32_t BitScanForward(regMaskTP mask) +static uint32_t BitScanForward(regMaskTP mask) { return BitOperations::BitScanForward(mask.low); } -FORCEINLINE regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.low ^ second.getLow()); return result; } -FORCEINLINE regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.low & second.getLow()); return result; } -FORCEINLINE regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.low | second.getLow()); return result; } -FORCEINLINE regMaskTP operator<<(const regMaskTP& first, const int b) +static regMaskTP operator<<(const regMaskTP& first, const int b) { regMaskTP result(first.low << b); return result; } -FORCEINLINE regMaskTP operator>>(const regMaskTP& first, const int b) +static regMaskTP operator>>(const regMaskTP& first, const int b) { regMaskTP result(first.low >> b); return result; } -FORCEINLINE regMaskTP& operator>>=(regMaskTP& first, const int b) +static regMaskTP& operator>>=(regMaskTP& first, const int b) { first = first >> b; return first; } -FORCEINLINE regMaskTP& operator|=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator|=(regMaskTP& first, const regMaskTP& second) { first.low |= second.low; return first; } -FORCEINLINE regMaskTP& operator^=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator^=(regMaskTP& first, const regMaskTP& second) { first.low ^= second.low; return first; } -FORCEINLINE regMaskSmall operator^=(regMaskSmall& first, const regMaskTP& second) +static regMaskSmall operator^=(regMaskSmall& first, const regMaskTP& second) { first ^= second.low; return first; } -FORCEINLINE regMaskSmall operator&=(regMaskSmall& first, const regMaskTP& second) +static regMaskSmall operator&=(regMaskSmall& first, const regMaskTP& second) { first &= second.low; return first; } -FORCEINLINE regMaskSmall operator|=(regMaskSmall& first, const regMaskTP& second) +static regMaskSmall operator|=(regMaskSmall& first, const regMaskTP& second) { first |= second.low; return first; } -FORCEINLINE regMaskTP& operator<<=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator<<=(regMaskTP& first, const regMaskTP& second) { first.low <<= second.low; return first; } -FORCEINLINE regMaskTP& operator&=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator&=(regMaskTP& first, const regMaskTP& second) { first.low &= second.low; return first; } -FORCEINLINE constexpr bool operator==(const regMaskTP& first, const regMaskTP& second) +static constexpr bool operator==(const regMaskTP& first, const regMaskTP& second) { return (first.low == second.low); } -FORCEINLINE constexpr bool operator!=(const regMaskTP& first, const regMaskTP& second) +static constexpr bool operator!=(const regMaskTP& first, const regMaskTP& second) { return (first.low != second.low); } -FORCEINLINE constexpr bool operator>(const regMaskTP& first, const regMaskTP& second) +static constexpr bool operator>(const regMaskTP& first, const regMaskTP& second) { return (first.low > second.low); } -FORCEINLINE regMaskTP operator~(const regMaskTP& first) +static regMaskTP operator~(const regMaskTP& first) { regMaskTP result(~first.low); return result; From 81b26c6e68aafafd4989753b417391aeb944953b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 8 May 2024 16:10:37 -0700 Subject: [PATCH 05/10] fix the TP regression from 1.5% -> 0.5% --- src/coreclr/jit/compiler.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 10f3a500757dba..c403107fea7beb 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -112,7 +112,7 @@ inline regMaskTP genFindLowestBit(regMaskTP value) inline bool genMaxOneBit(regMaskTP value) { - return PopCount(value) <= 1; + return genMaxOneBit(value.low); } /***************************************************************************** @@ -122,7 +122,7 @@ inline bool genMaxOneBit(regMaskTP value) inline bool genExactlyOneBit(regMaskTP value) { - return PopCount(value) == 1; + return genExactlyOneBit(value.low); } #endif From 261ab5d1697a420d600fed9b3874bc1f9489cd01 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 8 May 2024 16:41:37 -0700 Subject: [PATCH 06/10] Pass by value --- src/coreclr/jit/target.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index a5efd14e9e61cf..20ece4b820714b 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -249,7 +249,7 @@ struct regMaskTP } }; -static uint32_t PopCount(const regMaskTP& value) +static uint32_t PopCount(regMaskTP value) { return BitOperations::PopCount(value.getLow()); } @@ -259,31 +259,31 @@ static uint32_t BitScanForward(regMaskTP mask) return BitOperations::BitScanForward(mask.low); } -static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator^(regMaskTP first, regMaskTP second) { regMaskTP result(first.low ^ second.getLow()); return result; } -static regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator&(regMaskTP first, regMaskTP second) { regMaskTP result(first.low & second.getLow()); return result; } -static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator|(regMaskTP first, regMaskTP second) { regMaskTP result(first.low | second.getLow()); return result; } -static regMaskTP operator<<(const regMaskTP& first, const int b) +static regMaskTP operator<<(regMaskTP first, const int b) { regMaskTP result(first.low << b); return result; } -static regMaskTP operator>>(const regMaskTP& first, const int b) +static regMaskTP operator>>(regMaskTP first, const int b) { regMaskTP result(first.low >> b); return result; @@ -295,64 +295,64 @@ static regMaskTP& operator>>=(regMaskTP& first, const int b) return first; } -static regMaskTP& operator|=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator|=(regMaskTP& first, regMaskTP second) { first.low |= second.low; return first; } -static regMaskTP& operator^=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator^=(regMaskTP& first, regMaskTP second) { first.low ^= second.low; return first; } -static regMaskSmall operator^=(regMaskSmall& first, const regMaskTP& second) +static regMaskSmall operator^=(regMaskSmall& first, regMaskTP second) { first ^= second.low; return first; } -static regMaskSmall operator&=(regMaskSmall& first, const regMaskTP& second) +static regMaskSmall operator&=(regMaskSmall& first, regMaskTP second) { first &= second.low; return first; } -static regMaskSmall operator|=(regMaskSmall& first, const regMaskTP& second) +static regMaskSmall operator|=(regMaskSmall& first, regMaskTP second) { first |= second.low; return first; } -static regMaskTP& operator<<=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator<<=(regMaskTP& first, regMaskTP second) { first.low <<= second.low; return first; } -static regMaskTP& operator&=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) { first.low &= second.low; return first; } -static constexpr bool operator==(const regMaskTP& first, const regMaskTP& second) +static bool operator==(regMaskTP first, regMaskTP second) { return (first.low == second.low); } -static constexpr bool operator!=(const regMaskTP& first, const regMaskTP& second) +static bool operator!=(regMaskTP first, regMaskTP second) { return (first.low != second.low); } -static constexpr bool operator>(const regMaskTP& first, const regMaskTP& second) +static bool operator>(regMaskTP first, regMaskTP second) { return (first.low > second.low); } -static regMaskTP operator~(const regMaskTP& first) +static regMaskTP operator~(regMaskTP first) { regMaskTP result(~first.low); return result; From 639290c75c8b504f8a8c17322e1f603a708b2cbb Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 8 May 2024 17:09:10 -0700 Subject: [PATCH 07/10] jit format --- src/coreclr/jit/compiler.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index c403107fea7beb..6e5d2ac7c645fb 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -5076,11 +5076,11 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func) return BasicBlockVisit::Continue; } -//regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) +// regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) //{ -// regMaskTP result(first.low ^ second.getLow()); -// return result; -//} +// regMaskTP result(first.low ^ second.getLow()); +// return result; +// } /*****************************************************************************/ #endif //_COMPILER_HPP_ From 28b457d5e5a5efa4b7aff63654d1647c3874ea5c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 May 2024 07:12:41 -0700 Subject: [PATCH 08/10] review feedback --- src/coreclr/jit/codegencommon.cpp | 9 ++-- src/coreclr/jit/compiler.hpp | 18 +------ src/coreclr/jit/lsra.cpp | 4 -- src/coreclr/jit/lsra.h | 4 +- src/coreclr/jit/lsrabuild.cpp | 4 -- src/coreclr/jit/target.h | 84 ++++++++++++++++--------------- src/coreclr/jit/utils.cpp | 5 -- 7 files changed, 52 insertions(+), 76 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 6dcbba13b48533..c24fd038d133fd 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3829,9 +3829,9 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& // Iterate through float/double registers and initialize them to 0 or // copy from already initialized register of the same type. - regMaskTP regMask = genRegMask(REG_FP_FIRST); - for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1) + for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg)) { + regMaskTP regMask = genRegMask(reg); if (regMask & initFltRegs) { // Do we have a float register already set to 0? @@ -5732,10 +5732,9 @@ void CodeGen::genFnProlog() if (initRegs) { - regMaskTP regMask = 0x1; - - for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1) + for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg)) { + regMaskTP regMask = genRegMask(reg); if (regMask & initRegs) { // Check if we have already zeroed this register diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 6e5d2ac7c645fb..f9c30f43612f7b 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -112,7 +112,7 @@ inline regMaskTP genFindLowestBit(regMaskTP value) inline bool genMaxOneBit(regMaskTP value) { - return genMaxOneBit(value.low); + return genMaxOneBit(value.getLow()); } /***************************************************************************** @@ -122,7 +122,7 @@ inline bool genMaxOneBit(regMaskTP value) inline bool genExactlyOneBit(regMaskTP value) { - return genExactlyOneBit(value.low); + return genExactlyOneBit(value.getLow()); } #endif @@ -981,11 +981,7 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) /* Convert the mask to a register number */ -#ifdef TARGET_ARM64 regNumber regNum = (regNumber)BitScanForward(mask); -#else - regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); -#endif mask ^= genRegMask(regNum); @@ -1008,11 +1004,7 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask) /* Convert the mask to a register number */ -#ifdef TARGET_ARM64 regNumber regNum = (regNumber)BitScanForward(mask); -#else - regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); -#endif return regNum; } @@ -5076,12 +5068,6 @@ BasicBlockVisit FlowGraphNaturalLoop::VisitRegularExitBlocks(TFunc func) return BasicBlockVisit::Continue; } -// regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) -//{ -// regMaskTP result(first.low ^ second.getLow()); -// return result; -// } - /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 29add9e3c217bd..73de489b852d18 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -13609,11 +13609,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current &overallLimitCandidates); assert(limitConsecutiveResult != RBM_NONE); -#ifdef TARGET_ARM64 unsigned startRegister = BitScanForward(limitConsecutiveResult); -#else - unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult); -#endif regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1; candidates |= (registersNeededMask << startRegister); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 21972a02416003..9f6db34034800b 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -794,8 +794,8 @@ class LinearScan : public LinearScanInterface static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) - const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); - const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + static constexpr regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + static constexpr regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index cea25b5c5f3e08..ed7aa1c29aaf85 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2898,11 +2898,7 @@ void LinearScan::stressSetRandomParameterPreferences() // Select a random register from all possible parameter registers // (of the right type). Preference this parameter to that register. -#ifdef TARGET_ARM64 unsigned numBits = PopCount(*regs); -#else - unsigned numBits = BitOperations::PopCount(*regs); -#endif if (numBits == 0) { continue; diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 20ece4b820714b..2cf5de94e6f436 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -217,9 +217,11 @@ typedef unsigned __int64 regMaskTP; typedef unsigned __int64 regMaskSmall; struct regMaskTP { - unsigned __int64 low; +private: + uint64_t low; - regMaskTP(unsigned __int64 regMask) +public: + constexpr regMaskTP(uint64_t regMask) : low(regMask) { } @@ -230,7 +232,7 @@ struct regMaskTP FORCEINLINE explicit operator bool() const { - return low != 0; + return low != RBM_NONE; } FORCEINLINE explicit operator regMaskSmall() const @@ -243,49 +245,44 @@ struct regMaskTP return (unsigned int)low; } - FORCEINLINE unsigned __int64 getLow() const + FORCEINLINE uint64_t getLow() const { return low; } -}; -static uint32_t PopCount(regMaskTP value) -{ - return BitOperations::PopCount(value.getLow()); -} - -static uint32_t BitScanForward(regMaskTP mask) -{ - return BitOperations::BitScanForward(mask.low); -} + FORCEINLINE void setLow(uint64_t _low) + { + low = _low; + } +}; static regMaskTP operator^(regMaskTP first, regMaskTP second) { - regMaskTP result(first.low ^ second.getLow()); + regMaskTP result(first.getLow() ^ second.getLow()); return result; } static regMaskTP operator&(regMaskTP first, regMaskTP second) { - regMaskTP result(first.low & second.getLow()); + regMaskTP result(first.getLow() & second.getLow()); return result; } static regMaskTP operator|(regMaskTP first, regMaskTP second) { - regMaskTP result(first.low | second.getLow()); + regMaskTP result(first.getLow() | second.getLow()); return result; } static regMaskTP operator<<(regMaskTP first, const int b) { - regMaskTP result(first.low << b); + regMaskTP result(first.getLow() << b); return result; } static regMaskTP operator>>(regMaskTP first, const int b) { - regMaskTP result(first.low >> b); + regMaskTP result(first.getLow() >> b); return result; } @@ -297,64 +294,53 @@ static regMaskTP& operator>>=(regMaskTP& first, const int b) static regMaskTP& operator|=(regMaskTP& first, regMaskTP second) { - first.low |= second.low; + first.setLow(first.getLow() | second.getLow()); return first; } static regMaskTP& operator^=(regMaskTP& first, regMaskTP second) { - first.low ^= second.low; + first.setLow(first.getLow() ^ second.getLow()); return first; } static regMaskSmall operator^=(regMaskSmall& first, regMaskTP second) { - first ^= second.low; + first ^= second.getLow(); return first; } static regMaskSmall operator&=(regMaskSmall& first, regMaskTP second) { - first &= second.low; + first &= second.getLow(); return first; } static regMaskSmall operator|=(regMaskSmall& first, regMaskTP second) { - first |= second.low; - return first; -} - -static regMaskTP& operator<<=(regMaskTP& first, regMaskTP second) -{ - first.low <<= second.low; + first |= second.getLow(); return first; } static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) { - first.low &= second.low; + first.setLow(first.getLow() & second.getLow()); return first; } static bool operator==(regMaskTP first, regMaskTP second) { - return (first.low == second.low); + return (first.getLow() == second.getLow()); } static bool operator!=(regMaskTP first, regMaskTP second) { - return (first.low != second.low); -} - -static bool operator>(regMaskTP first, regMaskTP second) -{ - return (first.low > second.low); + return (first.getLow() != second.getLow()); } static regMaskTP operator~(regMaskTP first) { - regMaskTP result(~first.low); + regMaskTP result(~first.getLow()); return result; } @@ -362,6 +348,24 @@ static regMaskTP operator~(regMaskTP first) typedef unsigned regMaskTP; #endif +static uint32_t PopCount(regMaskTP value) +{ +#ifdef TARGET_ARM64 + return BitOperations::PopCount(value.getLow()); +#else + return BitOperations::PopCount(value); +#endif +} + +static uint32_t BitScanForward(regMaskTP mask) +{ +#ifdef TARGET_ARM64 + return BitOperations::BitScanForward(mask.getLow()); +#else + return BitOperations::BitScanForward(mask); +#endif +} + #if REGMASK_BITS == 8 typedef unsigned char regMaskSmall; #define REG_MASK_INT_FMT "%02X" @@ -703,7 +707,7 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) } else { - // Can be TYP_STRUCT for HFfirst. It's not clear that's correct; what about + // Can be TYP_STRUCT for HFA. It's not clear that's correct; what about // HFA of double? We wouldn't be asserting the right alignment, and // callers like genRegMaskFloat() wouldn't be generating the right mask. diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 07ca1fb6dc09c9..ed06b863657ef9 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -417,11 +417,6 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN sep = " "; } - if (regBit > regMask) - { - break; - } - regPrev = regNum; } From 0a0e61d59029a633197d07c96129f48e3b2bb277 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 May 2024 08:16:06 -0700 Subject: [PATCH 09/10] Remove FORCEINLINE --- src/coreclr/jit/target.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 2cf5de94e6f436..6e79dcd23cd327 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -230,27 +230,27 @@ struct regMaskTP { } - FORCEINLINE explicit operator bool() const + explicit operator bool() const { return low != RBM_NONE; } - FORCEINLINE explicit operator regMaskSmall() const + explicit operator regMaskSmall() const { return (regMaskSmall)low; } - FORCEINLINE explicit operator unsigned int() const + explicit operator unsigned int() const { return (unsigned int)low; } - FORCEINLINE uint64_t getLow() const + uint64_t getLow() const { return low; } - FORCEINLINE void setLow(uint64_t _low) + void setLow(uint64_t _low) { low = _low; } From 8fdbf3f6cdae2ec284b796938d4b5a2e02497da8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 10 May 2024 08:27:55 -0700 Subject: [PATCH 10/10] Remove setLow() --- src/coreclr/jit/target.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 6e79dcd23cd327..79d64bc08cc9d2 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -249,11 +249,6 @@ struct regMaskTP { return low; } - - void setLow(uint64_t _low) - { - low = _low; - } }; static regMaskTP operator^(regMaskTP first, regMaskTP second) @@ -294,13 +289,13 @@ static regMaskTP& operator>>=(regMaskTP& first, const int b) static regMaskTP& operator|=(regMaskTP& first, regMaskTP second) { - first.setLow(first.getLow() | second.getLow()); + first = first | second; return first; } static regMaskTP& operator^=(regMaskTP& first, regMaskTP second) { - first.setLow(first.getLow() ^ second.getLow()); + first = first ^ second; return first; } @@ -324,7 +319,7 @@ static regMaskSmall operator|=(regMaskSmall& first, regMaskTP second) static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) { - first.setLow(first.getLow() & second.getLow()); + first = first & second; return first; }