diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs index fcbd64758fc8ec..9b7117aff0c714 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticMap.cs @@ -110,13 +110,8 @@ private static Vector512 ContainsMask64CharsAvx512(Vector512 charMap Vector512 source0 = Vector512.LoadUnsafe(ref searchSpace0); Vector512 source1 = Vector512.LoadUnsafe(ref searchSpace1); - Vector512 sourceLower = Avx512BW.PackUnsignedSaturate( - (source0 & Vector512.Create((ushort)255)).AsInt16(), - (source1 & Vector512.Create((ushort)255)).AsInt16()); - - Vector512 sourceUpper = Avx512BW.PackUnsignedSaturate( - (source0 >>> 8).AsInt16(), - (source1 >>> 8).AsInt16()); + Vector512 sourceLower = Avx512Vbmi.PermuteVar64x8x2(source0.AsByte(), Vector512.CreateSequence(0, 2), source1.AsByte()); + Vector512 sourceUpper = Avx512Vbmi.PermuteVar64x8x2(source0.AsByte(), Vector512.CreateSequence(1, 2), source1.AsByte()); Vector512 resultLower = IsCharBitNotSetAvx512(charMap, sourceLower); Vector512 resultUpper = IsCharBitNotSetAvx512(charMap, sourceUpper); @@ -128,12 +123,17 @@ private static Vector512 ContainsMask64CharsAvx512(Vector512 charMap [CompExactlyDependsOn(typeof(Avx512Vbmi))] private static Vector512 IsCharBitNotSetAvx512(Vector512 charMap, Vector512 values) { - Vector512 shifted = values >>> VectorizedIndexShift; + // X86 does not have an instruction for right shifting 8-bit values, so it's instead emulated + // by using a 32-bit value shift followed by an AND to mask off the bits that should be zeroed. + // We're using PermuteVar64x8, which only looks at the lower 6 bits, so we can skip the AND. + // Bits 4/5/6 will not affect the result as the bit positions vector is duplicated 8 times. + Vector512 shifted = (values.AsInt32() >>> VectorizedIndexShift).AsByte(); - Vector512 bitPositions = Avx512BW.Shuffle(Vector512.Create(0x8040201008040201).AsByte(), shifted); + Vector512 bitPositions = Avx512Vbmi.PermuteVar64x8(Vector512.Create(0x8040201008040201).AsByte(), shifted); - Vector512 index = values & Vector512.Create((byte)VectorizedIndexMask); - Vector512 bitMask = Avx512Vbmi.PermuteVar64x8(charMap, index); + // We want to select bytes from 'charMap' based on the low 5 bits of 'values' (values & VectorizedIndexMask). + // PermuteVar64x8 will look at the low 6 bits, but the 6th bit will not affect the result as the 'charMap' is duplicated. + Vector512 bitMask = Avx512Vbmi.PermuteVar64x8(charMap, values); return Vector512.Equals(bitMask & bitPositions, Vector512.Zero); } @@ -145,13 +145,8 @@ private static Vector256 ContainsMask32CharsAvx512(Vector256 charMap Vector256 source0 = Vector256.LoadUnsafe(ref searchSpace0); Vector256 source1 = Vector256.LoadUnsafe(ref searchSpace1); - Vector256 sourceLower = Avx2.PackUnsignedSaturate( - (source0 & Vector256.Create((ushort)255)).AsInt16(), - (source1 & Vector256.Create((ushort)255)).AsInt16()); - - Vector256 sourceUpper = Avx2.PackUnsignedSaturate( - (source0 >>> 8).AsInt16(), - (source1 >>> 8).AsInt16()); + Vector256 sourceLower = Avx512Vbmi.VL.PermuteVar32x8x2(source0.AsByte(), Vector256.CreateSequence(0, 2), source1.AsByte()); + Vector256 sourceUpper = Avx512Vbmi.VL.PermuteVar32x8x2(source0.AsByte(), Vector256.CreateSequence(1, 2), source1.AsByte()); Vector256 resultLower = IsCharBitNotSetAvx512(charMap, sourceLower); Vector256 resultUpper = IsCharBitNotSetAvx512(charMap, sourceUpper); @@ -163,12 +158,17 @@ private static Vector256 ContainsMask32CharsAvx512(Vector256 charMap [CompExactlyDependsOn(typeof(Avx512Vbmi.VL))] private static Vector256 IsCharBitNotSetAvx512(Vector256 charMap, Vector256 values) { - Vector256 shifted = values >>> VectorizedIndexShift; + // X86 does not have an instruction for right shifting 8-bit values, so it's instead emulated + // by using a 32-bit value shift followed by an AND to mask off the bits that should be zeroed. + // We're using PermuteVar32x8, which only looks at the lower 5 bits, so we can skip the AND. + // Bits 4/5 will not affect the result as the bit positions vector is duplicated 4 times + Vector256 shifted = (values.AsInt32() >>> VectorizedIndexShift).AsByte(); - Vector256 bitPositions = Avx2.Shuffle(Vector256.Create(0x8040201008040201).AsByte(), shifted); + Vector256 bitPositions = Avx512Vbmi.VL.PermuteVar32x8(Vector256.Create(0x8040201008040201).AsByte(), shifted); - Vector256 index = values & Vector256.Create((byte)VectorizedIndexMask); - Vector256 bitMask = Avx512Vbmi.VL.PermuteVar32x8(charMap, index); + // We want to select bytes from 'charMap' based on the low 5 bits of 'values' (values & VectorizedIndexMask). + // PermuteVar32x8 already looks only at the low 5 bits, so we can skip the redundant AND. + Vector256 bitMask = Avx512Vbmi.VL.PermuteVar32x8(charMap, values); return Vector256.Equals(bitMask & bitPositions, Vector256.Zero); } @@ -436,7 +436,7 @@ private static int IndexOfAnyVectorizedAvx512(ref char searchS if (result != Vector512.Zero) { - if (TryFindMatchAvx512(ref cur, PackedSpanHelpers.FixUpPackedVector512Result(result).ExtractMostSignificantBits(), ref state, out int index)) + if (TryFindMatchAvx512(ref cur, result.ExtractMostSignificantBits(), ref state, out int index)) { return MatchOffset(ref searchSpace, ref cur) + index; } @@ -466,7 +466,7 @@ private static int IndexOfAnyVectorizedAvx512(ref char searchS if (result != Vector512.Zero) { - if (TryFindMatchOverlappedAvx512(ref searchSpace, searchSpaceLength, PackedSpanHelpers.FixUpPackedVector512Result(result).ExtractMostSignificantBits(), ref state, out int index)) + if (TryFindMatchOverlappedAvx512(ref searchSpace, searchSpaceLength, result.ExtractMostSignificantBits(), ref state, out int index)) { return index; } @@ -483,7 +483,7 @@ private static int IndexOfAnyVectorizedAvx512(ref char searchS if (result != Vector256.Zero) { - if (TryFindMatchOverlappedAvx512(ref searchSpace, searchSpaceLength, PackedSpanHelpers.FixUpPackedVector256Result(result).ExtractMostSignificantBits(), ref state, out int index)) + if (TryFindMatchOverlappedAvx512(ref searchSpace, searchSpaceLength, result.ExtractMostSignificantBits(), ref state, out int index)) { return index; } @@ -614,7 +614,7 @@ private static int LastIndexOfAnyVectorizedAvx512(ref char sea if (result != Vector512.Zero) { - if (TryFindLastMatchAvx512(ref cur, PackedSpanHelpers.FixUpPackedVector512Result(result).ExtractMostSignificantBits(), ref state, out int index)) + if (TryFindLastMatchAvx512(ref cur, result.ExtractMostSignificantBits(), ref state, out int index)) { return MatchOffset(ref searchSpace, ref cur) + index; } @@ -643,7 +643,7 @@ private static int LastIndexOfAnyVectorizedAvx512(ref char sea if (result != Vector512.Zero) { - if (TryFindLastMatchOverlappedAvx512(ref searchSpace, searchSpaceLength, PackedSpanHelpers.FixUpPackedVector512Result(result).ExtractMostSignificantBits(), ref state, out int index)) + if (TryFindLastMatchOverlappedAvx512(ref searchSpace, searchSpaceLength, result.ExtractMostSignificantBits(), ref state, out int index)) { return index; } @@ -661,7 +661,7 @@ private static int LastIndexOfAnyVectorizedAvx512(ref char sea if (result != Vector256.Zero) { - if (TryFindLastMatchOverlappedAvx512(ref searchSpace, searchSpaceLength, PackedSpanHelpers.FixUpPackedVector256Result(result).ExtractMostSignificantBits(), ref state, out int index)) + if (TryFindLastMatchOverlappedAvx512(ref searchSpace, searchSpaceLength, result.ExtractMostSignificantBits(), ref state, out int index)) { return index; }