Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 113 additions & 5 deletions src/System.Private.CoreLib/shared/System/BitOps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@

namespace System
{
/// <summary>
/// Utility methods for intrinsic bit-twiddling operations.
/// The methods use hardware intrinsics when available on the underlying platform,
/// otherwise they use optimized software fallbacks.
/// Operations are all little-endian.
/// </summary>
internal static class BitOps
{
// C# no-alloc optimization that directly wraps the data section of the dll (similar to string constants)
Expand Down Expand Up @@ -68,7 +74,7 @@ public static int TrailingZeroCount(uint value)
// Using deBruijn sequence, k=2, n=5 (2^5=32) : 0b_0000_0111_0111_1100_1011_0101_0011_0001u
ref MemoryMarshal.GetReference(s_TrailingZeroCountDeBruijn),
// long -> IntPtr cast on 32-bit platforms is expensive - it does overflow checks not needed here
(IntPtr)(int)(((uint)((value & -value) * 0x077CB531u)) >> 27)); // shift over long also expensive on 32-bit
(IntPtr)(((value & (uint)-value) * 0x077CB531u) >> 27)); // shift over long also expensive on 32-bit
}

/// <summary>
Expand Down Expand Up @@ -124,7 +130,7 @@ public static int LeadingZeroCount(uint value)
return 32;
}

return 31 - Log2(value);
return 31 - Log2Fallback(value);
}

/// <summary>
Expand Down Expand Up @@ -179,16 +185,16 @@ public static int Log2(uint value)
}

// Already has contract 0->0, without branching
return Log2SoftwareFallback(value);
return Log2Fallback(value);
}

/// <summary>
/// Returns the integer (floor) log of the specified value, base 2.
/// Note that by convention, input value 0 returns 0 since Log(0) is undefined.
/// Does not incur branching.
/// Does not directly use any hardware intrinsics.
/// </summary>
/// <param name="value">The value.</param>
private static int Log2SoftwareFallback(uint value)
private static int Log2Fallback(uint value)
{
// No AggressiveInlining due to large method size

Expand Down Expand Up @@ -235,5 +241,107 @@ public static int Log2(ulong value)

return 32 + Log2(hi);
}

/// <summary>
/// Rotates the specified value left by the specified number of bits.
/// Similar in behavior to the x86 instruction ROL.
/// </summary>
/// <param name="value">The value to rotate.</param>
/// <param name="offset">The number of bits to rotate by.
/// Any value outside the range [0..31] is treated as congruent mod 32.</param>
/// <returns>The rotated value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint RotateLeft(uint value, int offset)
=> (value << offset) | (value >> (32 - offset));

/// <summary>
/// Rotates the specified value left by the specified number of bits.
/// Similar in behavior to the x86 instruction ROL.
/// </summary>
/// <param name="value">The value to rotate.</param>
/// <param name="offset">The number of bits to rotate by.
/// Any value outside the range [0..63] is treated as congruent mod 64.</param>
/// <returns>The rotated value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong RotateLeft(ulong value, int offset)
=> (value << offset) | (value >> (64 - offset));

/// <summary>
/// Rotates the specified value right by the specified number of bits.
/// Similar in behavior to the x86 instruction ROR.
/// </summary>
/// <param name="value">The value to rotate.</param>
/// <param name="offset">The number of bits to rotate by.
/// Any value outside the range [0..31] is treated as congruent mod 32.</param>
/// <returns>The rotated value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static uint RotateRight(uint value, int offset)
=> (value >> offset) | (value << (32 - offset));

/// <summary>
/// Rotates the specified value right by the specified number of bits.
/// Similar in behavior to the x86 instruction ROR.
/// </summary>
/// <param name="value">The value to rotate.</param>
/// <param name="offset">The number of bits to rotate by.
/// Any value outside the range [0..63] is treated as congruent mod 64.</param>
/// <returns>The rotated value.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static ulong RotateRight(ulong value, int offset)
=> (value >> offset) | (value << (64 - offset));

/// <summary>
/// Returns the population count (number of bits set) of a mask.
/// Similar in behavior to the x86 instruction POPCNT.
/// </summary>
/// <param name="value">The value.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int PopCount(uint value)
{
if (Popcnt.IsSupported)
{
return (int)Popcnt.PopCount(value);
}

value = value - ((value >> 1) & 0x_55555555u);
value = (value & 0x_33333333u) + ((value >> 2) & 0x_33333333u);
value = (value + (value >> 4)) & 0x_0F0F0F0Fu;
value = value * 0x_01010101u;
value = value >> 24;

return (int)value;
}

/// <summary>
/// Returns the population count (number of bits set) of a mask.
/// Similar in behavior to the x86 instruction POPCNT.
/// </summary>
/// <param name="value">The value.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int PopCount(ulong value)
{
if (Popcnt.IsSupported)
{
if (Popcnt.X64.IsSupported)
{
return (int)Popcnt.X64.PopCount(value);
}

// Use the 32-bit function twice
return (int)(Popcnt.PopCount((uint)value)
+ Popcnt.PopCount((uint)(value >> 32)));
}

//return PopCount((uint)value)
// + PopCount((uint)(value >> 32));

value = value - ((value >> 1) & 0x_55555555_55555555ul);
value = (value & 0x_33333333_33333333ul) + ((value >> 2) & 0x_33333333_33333333ul);
value = (value + (value >> 4)) & 0x_0F0F0F0F_0F0F0F0Ful;
value = value * 0x_01010101_01010101ul;
value = value >> 56;

return (int)value;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,11 @@ public static uint ReverseEndianness(uint value)
// Testing shows that throughput increases if the AND
// is performed before the ROL / ROR.

uint mask_xx_zz = (value & 0x00FF00FFU);
uint mask_ww_yy = (value & 0xFF00FF00U);
return ((mask_xx_zz >> 8) | (mask_xx_zz << 24))
+ ((mask_ww_yy << 8) | (mask_ww_yy >> 24));
uint mask_xx_zz = value & 0x00FF00FFU;
uint mask_ww_yy = value & 0xFF00FF00U;

return BitOps.RotateRight(mask_xx_zz, 8)
+ BitOps.RotateLeft(mask_ww_yy, 8);
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1579,7 +1579,7 @@ private void Drain()
{
for (int i = 16; i != 80; i++)
{
this.w[i] = Rol1((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]));
this.w[i] = BitOps.RotateLeft((this.w[i - 3] ^ this.w[i - 8] ^ this.w[i - 14] ^ this.w[i - 16]), 1);
}

unchecked
Expand All @@ -1594,28 +1594,28 @@ private void Drain()
{
const uint k = 0x5A827999;
uint f = (b & c) | ((~b) & d);
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}

for (int i = 20; i != 40; i++)
{
uint f = b ^ c ^ d;
const uint k = 0x6ED9EBA1;
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}

for (int i = 40; i != 60; i++)
{
uint f = (b & c) | (b & d) | (c & d);
const uint k = 0x8F1BBCDC;
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}

for (int i = 60; i != 80; i++)
{
uint f = b ^ c ^ d;
const uint k = 0xCA62C1D6;
uint temp = Rol5(a) + f + e + k + this.w[i]; e = d; d = c; c = Rol30(b); b = a; a = temp;
uint temp = BitOps.RotateLeft(a, 5) + f + e + k + this.w[i]; e = d; d = c; c = BitOps.RotateLeft(b, 30); b = a; a = temp;
}

this.w[80] += a;
Expand All @@ -1628,21 +1628,6 @@ private void Drain()
this.length += 512; // 64 bytes == 512 bits
this.pos = 0;
}

private static uint Rol1(uint input)
{
return (input << 1) | (input >> 31);
}

private static uint Rol5(uint input)
{
return (input << 5) | (input >> 27);
}

private static uint Rol30(uint input)
{
return (input << 30) | (input >> 2);
}
}

private static Guid GenerateGuidFromName(string name)
Expand Down
10 changes: 3 additions & 7 deletions src/System.Private.CoreLib/shared/System/HashCode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,6 @@ public static int Combine<T1, T2, T3, T4, T5, T6, T7, T8>(T1 value1, T2 value2,
return (int)hash;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint Rol(uint value, int count)
=> (value << count) | (value >> (32 - count));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v4)
{
Expand All @@ -269,7 +265,7 @@ private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v
private static uint Round(uint hash, uint input)
{
hash += input * Prime2;
hash = Rol(hash, 13);
hash = BitOps.RotateLeft(hash, 13);
hash *= Prime1;
return hash;
}
Expand All @@ -278,13 +274,13 @@ private static uint Round(uint hash, uint input)
private static uint QueueRound(uint hash, uint queuedValue)
{
hash += queuedValue * Prime3;
return Rol(hash, 17) * Prime4;
return BitOps.RotateLeft(hash, 17) * Prime4;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint MixState(uint v1, uint v2, uint v3, uint v4)
{
return Rol(v1, 1) + Rol(v2, 7) + Rol(v3, 12) + Rol(v4, 18);
return BitOps.RotateLeft(v1, 1) + BitOps.RotateLeft(v2, 7) + BitOps.RotateLeft(v3, 12) + BitOps.RotateLeft(v4, 18);
}

private static uint MixEmptyState()
Expand Down
15 changes: 4 additions & 11 deletions src/System.Private.CoreLib/shared/System/Marvin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,28 +102,21 @@ private static void Block(ref uint rp0, ref uint rp1)
uint p1 = rp1;

p1 ^= p0;
p0 = _rotl(p0, 20);
p0 = BitOps.RotateLeft(p0, 20);

p0 += p1;
p1 = _rotl(p1, 9);
p1 = BitOps.RotateLeft(p1, 9);

p1 ^= p0;
p0 = _rotl(p0, 27);
p0 = BitOps.RotateLeft(p0, 27);

p0 += p1;
p1 = _rotl(p1, 19);
p1 = BitOps.RotateLeft(p1, 19);

rp0 = p0;
rp1 = p1;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint _rotl(uint value, int shift)
{
// This is expected to be optimized into a single rol (or ror with negated shift value) instruction
return (value << shift) | (value >> (32 - shift));
}

public static ulong DefaultSeed { get; } = GenerateSeed();

private static unsafe ulong GenerateSeed()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@ internal static class HashHelpers

public static int Combine(int h1, int h2)
{
// RyuJIT optimizes this to use the ROL instruction
// Related GitHub pull request: dotnet/coreclr#1830
uint rol5 = ((uint)h1 << 5) | ((uint)h1 >> 27);
uint rol5 = BitOps.RotateLeft((uint)h1, 5);
return ((int)rol5 + h1) ^ h2;
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/System.Private.CoreLib/shared/System/String.Comparison.cs
Original file line number Diff line number Diff line change
Expand Up @@ -820,15 +820,15 @@ internal unsafe int GetNonRandomizedHashCode()
{
length -= 4;
// Where length is 4n-1 (e.g. 3,7,11,15,19) this additionally consumes the null terminator
hash1 = (((hash1 << 5) | (hash1 >> 27)) + hash1) ^ ptr[0];
hash2 = (((hash2 << 5) | (hash2 >> 27)) + hash2) ^ ptr[1];
hash1 = (BitOps.RotateLeft(hash1, 5) + hash1) ^ ptr[0];
hash2 = (BitOps.RotateLeft(hash2, 5) + hash2) ^ ptr[1];
ptr += 2;
}

if (length > 0)
{
// Where length is 4n-3 (e.g. 1,5,9,13,17) this additionally consumes the null terminator
hash2 = (((hash2 << 5) | (hash2 >> 27)) + hash2) ^ ptr[0];
hash2 = (BitOps.RotateLeft(hash2, 5) + hash2) ^ ptr[0];
}

return (int)(hash1 + (hash2 * 1566083941));
Expand Down