88using System . Runtime . Intrinsics . X86 ;
99
1010using Internal . Runtime . CompilerServices ;
11+ using System . Runtime . Intrinsics ;
1112
1213namespace System
1314{
@@ -196,7 +197,15 @@ public static int IndexOf(ref char searchSpace, char value, int length)
196197 int offset = 0 ;
197198 int lengthToExamine = length ;
198199
199- if ( Vector . IsHardwareAccelerated )
200+ if ( Avx2 . IsSupported || Sse2 . IsSupported )
201+ {
202+ // Avx2 branch also operates on Sse2 sizes, so check is combined.
203+ if ( length >= Vector128 < byte > . Count * 2 )
204+ {
205+ lengthToExamine = UnalignedCountVector128 ( ref searchSpace ) ;
206+ }
207+ }
208+ else if ( Vector . IsHardwareAccelerated )
200209 {
201210 if ( length >= Vector < ushort > . Count * 2 )
202211 {
@@ -231,9 +240,96 @@ public static int IndexOf(ref char searchSpace, char value, int length)
231240 offset += 1 ;
232241 }
233242
234- // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
243+ // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
235244 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
236- if ( Vector . IsHardwareAccelerated )
245+ if ( Avx2 . IsSupported )
246+ {
247+ if ( offset < length )
248+ {
249+ lengthToExamine = GetCharVector256SpanLength ( offset , length ) ;
250+ if ( lengthToExamine > offset )
251+ {
252+ Vector256 < ushort > values = Vector256 . Create ( value ) ;
253+ do
254+ {
255+ Vector256 < ushort > search = LoadVector256 ( ref searchSpace , offset ) ;
256+ int matches = Avx2 . MoveMask ( Avx2 . CompareEqual ( values , search ) . AsByte ( ) ) ;
257+ // Note that MoveMask has converted the equal vector elements into a set of bit flags,
258+ // So the bit position in 'matches' corresponds to the element offset.
259+ if ( matches == 0 )
260+ {
261+ // Zero flags set so no matches
262+ offset += Vector256 < ushort > . Count ;
263+ continue ;
264+ }
265+
266+ // Find bitflag offset of first match and add to current offset,
267+ // flags are in bytes so divide for chars
268+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
269+ } while ( lengthToExamine > offset ) ;
270+ }
271+
272+ lengthToExamine = GetCharVector128SpanLength ( offset , length ) ;
273+ if ( lengthToExamine > offset )
274+ {
275+ Vector128 < ushort > values = Vector128 . Create ( value ) ;
276+ Vector128 < ushort > search = LoadVector128 ( ref searchSpace , offset ) ;
277+
278+ // Same method as above
279+ int matches = Sse2 . MoveMask ( Sse2 . CompareEqual ( values , search ) . AsByte ( ) ) ;
280+ if ( matches == 0 )
281+ {
282+ // Zero flags set so no matches
283+ offset += Vector128 < ushort > . Count ;
284+ }
285+ else
286+ {
287+ // Find bitflag offset of first match and add to current offset,
288+ // flags are in bytes so divide for chars
289+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
290+ }
291+ }
292+
293+ if ( offset < length )
294+ {
295+ lengthToExamine = length - offset ;
296+ goto SequentialScan ;
297+ }
298+ }
299+ }
300+ else if ( Sse2 . IsSupported )
301+ {
302+ if ( offset < length )
303+ {
304+ lengthToExamine = GetCharVector128SpanLength ( offset , length ) ;
305+
306+ Vector128 < ushort > values = Vector128 . Create ( value ) ;
307+ while ( lengthToExamine > offset )
308+ {
309+ Vector128 < ushort > search = LoadVector128 ( ref searchSpace , offset ) ;
310+
311+ // Same method as above
312+ int matches = Sse2 . MoveMask ( Sse2 . CompareEqual ( values , search ) . AsByte ( ) ) ;
313+ if ( matches == 0 )
314+ {
315+ // Zero flags set so no matches
316+ offset += Vector128 < ushort > . Count ;
317+ continue ;
318+ }
319+
320+ // Find bitflag offset of first match and add to current offset,
321+ // flags are in bytes so divide for chars
322+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
323+ }
324+
325+ if ( offset < length )
326+ {
327+ lengthToExamine = length - offset ;
328+ goto SequentialScan ;
329+ }
330+ }
331+ }
332+ else if ( Vector . IsHardwareAccelerated )
237333 {
238334 if ( offset < length )
239335 {
@@ -842,6 +938,14 @@ private static int LocateLastFoundChar(ulong match)
842938 private static unsafe Vector < ushort > LoadVector ( ref char start , int offset )
843939 => Unsafe . ReadUnaligned < Vector < ushort > > ( ref Unsafe . As < char , byte > ( ref Unsafe . Add ( ref start , offset ) ) ) ;
844940
941+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
942+ private static unsafe Vector128 < ushort > LoadVector128 ( ref char start , int offset )
943+ => Unsafe . ReadUnaligned < Vector128 < ushort > > ( ref Unsafe . As < char , byte > ( ref Unsafe . Add ( ref start , offset ) ) ) ;
944+
945+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
946+ private static unsafe Vector256 < ushort > LoadVector256 ( ref char start , int offset )
947+ => Unsafe . ReadUnaligned < Vector256 < ushort > > ( ref Unsafe . As < char , byte > ( ref Unsafe . Add ( ref start , offset ) ) ) ;
948+
845949 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
846950 private static unsafe UIntPtr LoadUIntPtr ( ref char start , int offset )
847951 => Unsafe . ReadUnaligned < UIntPtr > ( ref Unsafe . As < char , byte > ( ref Unsafe . Add ( ref start , offset ) ) ) ;
@@ -850,6 +954,14 @@ private static unsafe UIntPtr LoadUIntPtr(ref char start, int offset)
850954 private static unsafe int GetCharVectorSpanLength ( int offset , int length )
851955 => ( ( length - offset ) & ~ ( Vector < ushort > . Count - 1 ) ) ;
852956
957+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
958+ private static unsafe int GetCharVector128SpanLength ( int offset , int length )
959+ => ( ( length - offset ) & ~ ( Vector128 < ushort > . Count - 1 ) ) ;
960+
961+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
962+ private static unsafe int GetCharVector256SpanLength ( int offset , int length )
963+ => ( ( length - offset ) & ~ ( Vector256 < ushort > . Count - 1 ) ) ;
964+
853965 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
854966 private static unsafe int UnalignedCountVector ( ref char searchSpace )
855967 {
@@ -862,6 +974,15 @@ private static unsafe int UnalignedCountVector(ref char searchSpace)
862974 return ( ( Vector < ushort > . Count - unaligned ) & ( Vector < ushort > . Count - 1 ) ) ;
863975 }
864976
977+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
978+ private static unsafe int UnalignedCountVector128 ( ref char searchSpace )
979+ {
980+ const int elementsPerByte = sizeof ( ushort ) / sizeof ( byte ) ;
981+
982+ int unaligned = ( ( int ) Unsafe . AsPointer ( ref searchSpace ) & ( Unsafe . SizeOf < Vector128 < ushort > > ( ) - 1 ) ) / elementsPerByte ;
983+ return ( ( Vector128 < ushort > . Count - unaligned ) & ( Vector128 < ushort > . Count - 1 ) ) ;
984+ }
985+
865986 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
866987 private static unsafe int UnalignedCountVectorFromEnd ( ref char searchSpace , int length )
867988 {
0 commit comments