22// The .NET Foundation licenses this file to you under the MIT license.
33
44using System . Diagnostics ;
5- using System . Text . Unicode ;
5+ using System . Numerics ;
66using System . Runtime . CompilerServices ;
77using System . Runtime . InteropServices ;
88using System . Runtime . Intrinsics ;
9+ using System . Runtime . Intrinsics . X86 ;
10+ using System . Text . Unicode ;
911
1012namespace System . Globalization
1113{
@@ -295,7 +297,6 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
295297 // A non-linguistic search compares chars directly against one another, so large
296298 // target strings can never be found inside small search spaces. This check also
297299 // handles empty 'source' spans.
298-
299300 return - 1 ;
300301 }
301302
@@ -309,25 +310,39 @@ internal static int IndexOfOrdinalIgnoreCase(ReadOnlySpan<char> source, ReadOnly
309310 return CompareInfo . NlsIndexOfOrdinalCore ( source , value , ignoreCase : true , fromBeginning : true ) ;
310311 }
311312
312- // If value starts with an ASCII char, we can use a vectorized path
313+ // If value doesn't start with ASCII, fall back to a non- vectorized non-ASCII friendly version.
313314 ref char valueRef = ref MemoryMarshal . GetReference ( value ) ;
314315 char valueChar = valueRef ;
315-
316316 if ( ! char . IsAscii ( valueChar ) )
317317 {
318- // Fallback to a more non-ASCII friendly version
319318 return OrdinalCasing . IndexOf ( source , value ) ;
320319 }
321320
322321 // Hoist some expressions from the loop
323322 int valueTailLength = value . Length - 1 ;
324323 int searchSpaceLength = source . Length - valueTailLength ;
324+ int searchSpaceMinusValueTailLength = source . Length - valueTailLength ;
325325 ref char searchSpace = ref MemoryMarshal . GetReference ( source ) ;
326326 char valueCharU = default ;
327327 char valueCharL = default ;
328328 nint offset = 0 ;
329329 bool isLetter = false ;
330330
331+ // If the input is long enough and the value ends with ASCII, we can take a special vectorized
332+ // path that compares both the beginning and the end at the same time.
333+ if ( Vector128 . IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128 < ushort > . Count )
334+ {
335+ valueCharU = Unsafe . Add ( ref valueRef , valueTailLength ) ;
336+ if ( char . IsAscii ( valueCharU ) )
337+ {
338+ goto SearchTwoChars ;
339+ }
340+ }
341+
342+ // We're searching for the first character and it's known to be ASCII. If it's not a letter,
343+ // then IgnoreCase doesn't impact what it matches and we just need to do a normal search
344+ // for that single character. If it is a letter, then we need to search for both its upper
345+ // and lower-case variants.
331346 if ( char . IsAsciiLetter ( valueChar ) )
332347 {
333348 valueCharU = ( char ) ( valueChar & ~ 0x20 ) ;
@@ -370,6 +385,179 @@ ref Unsafe.Add(ref valueRef, 1), valueTailLength))
370385 while ( searchSpaceLength > 0 ) ;
371386
372387 return - 1 ;
388+
389+ // Based on SpanHelpers.IndexOf(ref char, int, ref char, int), which was in turn based on
390+ // http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd. This version has additional
391+ // modifications to support case-insensitive searches.
392+ SearchTwoChars :
393+ // Both the first character in value (valueChar) and the last character in value (valueCharU) are ASCII. Get their lowercase variants.
394+ valueChar = ( char ) ( valueChar | 0x20 ) ;
395+ valueCharU = ( char ) ( valueCharU | 0x20 ) ;
396+
397+ // The search is more efficient if the two characters being searched for are different. As long as they are equal, walk backwards
398+ // from the last character in the search value until we find a character that's different. Since we're dealing with IgnoreCase,
399+ // we compare the lowercase variants, as that's what we'll be comparing against in the main loop.
400+ nint ch1ch2Distance = valueTailLength ;
401+ while ( valueCharU == valueChar && ch1ch2Distance > 1 )
402+ {
403+ char tmp = Unsafe . Add ( ref valueRef , ch1ch2Distance - 1 ) ;
404+ if ( ! char . IsAscii ( tmp ) )
405+ {
406+ break ;
407+ }
408+ -- ch1ch2Distance ;
409+ valueCharU = ( char ) ( tmp | 0x20 ) ;
410+ }
411+
412+ // Use Vector256 if the input is long enough.
413+ if ( Vector256 . IsHardwareAccelerated && searchSpaceMinusValueTailLength - Vector256 < ushort > . Count >= 0 )
414+ {
415+ // Create a vector for each of the lowercase ASCII characters we're searching for.
416+ Vector256 < ushort > ch1 = Vector256 . Create ( ( ushort ) valueChar ) ;
417+ Vector256 < ushort > ch2 = Vector256 . Create ( ( ushort ) valueCharU ) ;
418+
419+ nint searchSpaceMinusValueTailLengthAndVector = searchSpaceMinusValueTailLength - ( nint ) Vector256 < ushort > . Count ;
420+ do
421+ {
422+ // Make sure we don't go out of bounds.
423+ Debug . Assert ( offset + ch1ch2Distance + Vector256 < ushort > . Count <= searchSpaceLength ) ;
424+
425+ // Load a vector from the current search space offset and another from the offset plus the distance between the two characters.
426+ // For each, | with 0x20 so that letters are lowercased, then & those together to get a mask. If the mask is all zeros, there
427+ // was no match. If it wasn't, we have to do more work to check for a match.
428+ Vector256 < ushort > cmpCh2 = Vector256 . Equals ( ch2 , Vector256 . BitwiseOr ( Vector256 . LoadUnsafe ( ref searchSpace , ( nuint ) ( offset + ch1ch2Distance ) ) , Vector256 . Create ( ( ushort ) 0x20 ) ) ) ;
429+ Vector256 < ushort > cmpCh1 = Vector256 . Equals ( ch1 , Vector256 . BitwiseOr ( Vector256 . LoadUnsafe ( ref searchSpace , ( nuint ) offset ) , Vector256 . Create ( ( ushort ) 0x20 ) ) ) ;
430+ Vector256 < byte > cmpAnd = ( cmpCh1 & cmpCh2 ) . AsByte ( ) ;
431+ if ( cmpAnd != Vector256 < byte > . Zero )
432+ {
433+ goto CandidateFound ;
434+ }
435+
436+ LoopFooter :
437+ // No match. Advance to the next vector.
438+ offset += Vector256 < ushort > . Count ;
439+
440+ // If we've reached the end of the search space, bail.
441+ if ( offset == searchSpaceMinusValueTailLength )
442+ {
443+ return - 1 ;
444+ }
445+
446+ // If we're within a vector's length of the end of the search space, adjust the offset
447+ // to point to the last vector so that our next iteration will process it.
448+ if ( offset > searchSpaceMinusValueTailLengthAndVector )
449+ {
450+ offset = searchSpaceMinusValueTailLengthAndVector ;
451+ }
452+
453+ continue ;
454+
455+ CandidateFound :
456+ // Possible matches at the current location. Extract the bits for each element.
457+ // For each set bits, we'll check if it's a match at that location.
458+ uint mask = cmpAnd . ExtractMostSignificantBits ( ) ;
459+ do
460+ {
461+ // Do a full IgnoreCase equality comparison. SpanHelpers.IndexOf skips comparing the two characters in some cases,
462+ // but we don't actually know that the two characters are equal, since we compared with | 0x20. So we just compare
463+ // the full string always.
464+ int bitPos = BitOperations . TrailingZeroCount ( mask ) ;
465+ nint charPos = ( nint ) ( ( uint ) bitPos / 2 ) ; // div by 2 (shr) because we work with 2-byte chars
466+ if ( EqualsIgnoreCase ( ref Unsafe . Add ( ref searchSpace , offset + charPos ) , ref valueRef , value . Length ) )
467+ {
468+ // Match! Return the index.
469+ return ( int ) ( offset + charPos ) ;
470+ }
471+
472+ // Clear the two lowest set bits in the mask. If there are no more set bits, we're done.
473+ // If any remain, we loop around to do the next comparison.
474+ if ( Bmi1 . IsSupported )
475+ {
476+ mask = Bmi1 . ResetLowestSetBit ( Bmi1 . ResetLowestSetBit ( mask ) ) ;
477+ }
478+ else
479+ {
480+ mask &= ~ ( uint ) ( 0b11 << bitPos ) ;
481+ }
482+ } while ( mask != 0 ) ;
483+ goto LoopFooter ;
484+
485+ } while ( true ) ;
486+ }
487+ else // 128bit vector path (SSE2 or AdvSimd)
488+ {
489+ // Create a vector for each of the lowercase ASCII characters we're searching for.
490+ Vector128 < ushort > ch1 = Vector128 . Create ( ( ushort ) valueChar ) ;
491+ Vector128 < ushort > ch2 = Vector128 . Create ( ( ushort ) valueCharU ) ;
492+
493+ nint searchSpaceMinusValueTailLengthAndVector = searchSpaceMinusValueTailLength - ( nint ) Vector128 < ushort > . Count ;
494+ do
495+ {
496+ // Make sure we don't go out of bounds.
497+ Debug . Assert ( offset + ch1ch2Distance + Vector128 < ushort > . Count <= searchSpaceLength ) ;
498+
499+ // Load a vector from the current search space offset and another from the offset plus the distance between the two characters.
500+ // For each, | with 0x20 so that letters are lowercased, then & those together to get a mask. If the mask is all zeros, there
501+ // was no match. If it wasn't, we have to do more work to check for a match.
502+ Vector128 < ushort > cmpCh2 = Vector128 . Equals ( ch2 , Vector128 . BitwiseOr ( Vector128 . LoadUnsafe ( ref searchSpace , ( nuint ) ( offset + ch1ch2Distance ) ) , Vector128 . Create ( ( ushort ) 0x20 ) ) ) ;
503+ Vector128 < ushort > cmpCh1 = Vector128 . Equals ( ch1 , Vector128 . BitwiseOr ( Vector128 . LoadUnsafe ( ref searchSpace , ( nuint ) offset ) , Vector128 . Create ( ( ushort ) 0x20 ) ) ) ;
504+ Vector128 < byte > cmpAnd = ( cmpCh1 & cmpCh2 ) . AsByte ( ) ;
505+ if ( cmpAnd != Vector128 < byte > . Zero )
506+ {
507+ goto CandidateFound ;
508+ }
509+
510+ LoopFooter :
511+ // No match. Advance to the next vector.
512+ offset += Vector128 < ushort > . Count ;
513+
514+ // If we've reached the end of the search space, bail.
515+ if ( offset == searchSpaceMinusValueTailLength )
516+ {
517+ return - 1 ;
518+ }
519+
520+ // If we're within a vector's length of the end of the search space, adjust the offset
521+ // to point to the last vector so that our next iteration will process it.
522+ if ( offset > searchSpaceMinusValueTailLengthAndVector )
523+ {
524+ offset = searchSpaceMinusValueTailLengthAndVector ;
525+ }
526+
527+ continue ;
528+
529+ CandidateFound :
530+ // Possible matches at the current location. Extract the bits for each element.
531+ // For each set bits, we'll check if it's a match at that location.
532+ uint mask = cmpAnd . ExtractMostSignificantBits ( ) ;
533+ do
534+ {
535+ // Do a full IgnoreCase equality comparison. SpanHelpers.IndexOf skips comparing the two characters in some cases,
536+ // but we don't actually know that the two characters are equal, since we compared with | 0x20. So we just compare
537+ // the full string always.
538+ int bitPos = BitOperations . TrailingZeroCount ( mask ) ;
539+ int charPos = ( int ) ( ( uint ) bitPos / 2 ) ; // div by 2 (shr) because we work with 2-byte chars
540+ if ( EqualsIgnoreCase ( ref Unsafe . Add ( ref searchSpace , offset + charPos ) , ref valueRef , value . Length ) )
541+ {
542+ // Match! Return the index.
543+ return ( int ) ( offset + charPos ) ;
544+ }
545+
546+ // Clear the two lowest set bits in the mask. If there are no more set bits, we're done.
547+ // If any remain, we loop around to do the next comparison.
548+ if ( Bmi1 . IsSupported )
549+ {
550+ mask = Bmi1 . ResetLowestSetBit ( Bmi1 . ResetLowestSetBit ( mask ) ) ;
551+ }
552+ else
553+ {
554+ mask &= ~ ( uint ) ( 0b11 << bitPos ) ;
555+ }
556+ } while ( mask != 0 ) ;
557+ goto LoopFooter ;
558+
559+ } while ( true ) ;
560+ }
373561 }
374562
375563 internal static int LastIndexOf ( string source , string value , int startIndex , int count )
0 commit comments