@@ -703,7 +703,15 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
703703 int offset = 0 ;
704704 int lengthToExamine = length ;
705705
706- if ( Vector . IsHardwareAccelerated )
706+ if ( Avx2 . IsSupported || Sse2 . IsSupported )
707+ {
708+ // Avx2 branch also operates on Sse2 sizes, so check is combined.
709+ if ( length >= Vector128 < byte > . Count * 2 )
710+ {
711+ lengthToExamine = UnalignedCountVector128 ( ref searchSpace ) ;
712+ }
713+ }
714+ else if ( Vector . IsHardwareAccelerated )
707715 {
708716 if ( length >= Vector < ushort > . Count * 2 )
709717 {
@@ -744,9 +752,109 @@ public static int IndexOfAny(ref char searchSpace, char value0, char value1, cha
744752 offset += 1 ;
745753 }
746754
747- // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
755+ // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow
748756 // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
749- if ( Vector . IsHardwareAccelerated )
757+ if ( Avx2 . IsSupported )
758+ {
759+ if ( offset < length )
760+ {
761+ lengthToExamine = GetCharVector256SpanLength ( offset , length ) ;
762+ if ( lengthToExamine > offset )
763+ {
764+ Vector256 < ushort > values0 = Vector256 . Create ( value0 ) ;
765+ Vector256 < ushort > values1 = Vector256 . Create ( value1 ) ;
766+ Vector256 < ushort > values2 = Vector256 . Create ( value2 ) ;
767+ do
768+ {
769+ Vector256 < ushort > search = LoadVector256 ( ref searchSpace , offset ) ;
770+ // Note that MoveMask has converted the equal vector elements into a set of bit flags,
771+ // So the bit position in 'matches' corresponds to the element offset.
772+ int matches = Avx2 . MoveMask ( Avx2 . CompareEqual ( values0 , search ) . AsByte ( ) ) ;
773+ // Bitwise Or to combine the flagged matches for the second and third values to our match flags
774+ matches |= Avx2 . MoveMask ( Avx2 . CompareEqual ( values1 , search ) . AsByte ( ) ) ;
775+ matches |= Avx2 . MoveMask ( Avx2 . CompareEqual ( values2 , search ) . AsByte ( ) ) ;
776+ if ( matches == 0 )
777+ {
778+ // Zero flags set so no matches
779+ offset += Vector256 < ushort > . Count ;
780+ continue ;
781+ }
782+
783+ // Find bitflag offset of first match and add to current offset,
784+ // flags are in bytes so divide for chars
785+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
786+ } while ( lengthToExamine > offset ) ;
787+ }
788+
789+ lengthToExamine = GetCharVector128SpanLength ( offset , length ) ;
790+ if ( lengthToExamine > offset )
791+ {
792+ Vector128 < ushort > values0 = Vector128 . Create ( value0 ) ;
793+ Vector128 < ushort > values1 = Vector128 . Create ( value1 ) ;
794+ Vector128 < ushort > values2 = Vector128 . Create ( value2 ) ;
795+ Vector128 < ushort > search = LoadVector128 ( ref searchSpace , offset ) ;
796+
797+ // Same method as above
798+ int matches = Sse2 . MoveMask ( Sse2 . CompareEqual ( values0 , search ) . AsByte ( ) ) ;
799+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values1 , search ) . AsByte ( ) ) ;
800+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values2 , search ) . AsByte ( ) ) ;
801+ if ( matches == 0 )
802+ {
803+ // Zero flags set so no matches
804+ offset += Vector128 < ushort > . Count ;
805+ }
806+ else
807+ {
808+ // Find bitflag offset of first match and add to current offset,
809+ // flags are in bytes so divide for chars
810+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
811+ }
812+ }
813+
814+ if ( offset < length )
815+ {
816+ lengthToExamine = length - offset ;
817+ goto SequentialScan ;
818+ }
819+ }
820+ }
821+ else if ( Sse2 . IsSupported )
822+ {
823+ if ( offset < length )
824+ {
825+ lengthToExamine = GetCharVector128SpanLength ( offset , length ) ;
826+
827+ Vector128 < ushort > values0 = Vector128 . Create ( value0 ) ;
828+ Vector128 < ushort > values1 = Vector128 . Create ( value1 ) ;
829+ Vector128 < ushort > values2 = Vector128 . Create ( value2 ) ;
830+ while ( lengthToExamine > offset )
831+ {
832+ Vector128 < ushort > search = LoadVector128 ( ref searchSpace , offset ) ;
833+
834+ // Same method as above
835+ int matches = Sse2 . MoveMask ( Sse2 . CompareEqual ( values0 , search ) . AsByte ( ) ) ;
836+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values1 , search ) . AsByte ( ) ) ;
837+ matches |= Sse2 . MoveMask ( Sse2 . CompareEqual ( values2 , search ) . AsByte ( ) ) ;
838+ if ( matches == 0 )
839+ {
840+ // Zero flags set so no matches
841+ offset += Vector128 < ushort > . Count ;
842+ continue ;
843+ }
844+
845+ // Find bitflag offset of first match and add to current offset,
846+ // flags are in bytes so divide for chars
847+ return offset + ( BitOps . TrailingZeroCount ( matches ) / sizeof ( char ) ) ;
848+ }
849+
850+ if ( offset < length )
851+ {
852+ lengthToExamine = length - offset ;
853+ goto SequentialScan ;
854+ }
855+ }
856+ }
857+ else if ( Vector . IsHardwareAccelerated )
750858 {
751859 if ( offset < length )
752860 {
0 commit comments