@@ -81,8 +81,8 @@ cfg_select! {
8181 // use `loadu`, which supports unaligned loading.
8282 let chunk = unsafe { _mm_loadu_si128( chunk. as_ptr( ) as * const __m128i) } ;
8383
84- // For character in the chunk, see if its byte value is < 0, which
85- // indicates that it's part of a UTF-8 char.
84+ // For each character in the chunk, see if its byte value is < 0,
85+ // which indicates that it's part of a UTF-8 char.
8686 let multibyte_test = _mm_cmplt_epi8( chunk, _mm_set1_epi8( 0 ) ) ;
8787 // Create a bit mask from the comparison results.
8888 let multibyte_mask = _mm_movemask_epi8( multibyte_test) ;
@@ -132,8 +132,111 @@ cfg_select! {
132132 }
133133 }
134134 }
135+ target_arch = "loongarch64" => {
136+ fn analyze_source_file_dispatch(
137+ src: & str ,
138+ lines: & mut Vec <RelativeBytePos >,
139+ multi_byte_chars: & mut Vec <MultiByteChar >,
140+ ) {
141+ use std:: arch:: is_loongarch_feature_detected;
142+
143+ if is_loongarch_feature_detected!( "lsx" ) {
144+ unsafe {
145+ analyze_source_file_lsx( src, lines, multi_byte_chars) ;
146+ }
147+ } else {
148+ analyze_source_file_generic(
149+ src,
150+ src. len( ) ,
151+ RelativeBytePos :: from_u32( 0 ) ,
152+ lines,
153+ multi_byte_chars,
154+ ) ;
155+ }
156+ }
157+
158+ /// Checks 16 byte chunks of text at a time. If the chunk contains
159+ /// something other than printable ASCII characters and newlines, the
160+ /// function falls back to the generic implementation. Otherwise it uses
161+ /// LSX intrinsics to quickly find all newlines.
162+ #[ target_feature( enable = "lsx" ) ]
163+ unsafe fn analyze_source_file_lsx(
164+ src: & str ,
165+ lines: & mut Vec <RelativeBytePos >,
166+ multi_byte_chars: & mut Vec <MultiByteChar >,
167+ ) {
168+ use std:: arch:: loongarch64:: * ;
169+
170+ const CHUNK_SIZE : usize = 16 ;
171+
172+ let ( chunks, tail) = src. as_bytes( ) . as_chunks:: <CHUNK_SIZE >( ) ;
173+
174+ // This variable keeps track of where we should start decoding a
175+ // chunk. If a multi-byte character spans across chunk boundaries,
176+ // we need to skip that part in the next chunk because we already
177+ // handled it.
178+ let mut intra_chunk_offset = 0 ;
179+
180+ for ( chunk_index, chunk) in chunks. iter( ) . enumerate( ) {
181+ // All LSX memory instructions support unaligned access, so using
182+ // vld is fine.
183+ let chunk = unsafe { lsx_vld:: <0 >( chunk. as_ptr( ) as * const i8 ) } ;
184+
185+ // For each character in the chunk, see if its byte value is < 0,
186+ // which indicates that it's part of a UTF-8 char.
187+ let multibyte_mask = lsx_vmskltz_b( chunk) ;
188+ // Create a bit mask from the comparison results.
189+ let multibyte_mask = lsx_vpickve2gr_w:: <0 >( multibyte_mask) ;
190+
191+ // If the bit mask is all zero, we only have ASCII chars here:
192+ if multibyte_mask == 0 {
193+ assert!( intra_chunk_offset == 0 ) ;
194+
195+ // Check for newlines in the chunk
196+ let newlines_test = lsx_vseqi_b:: <{ b'\n' as i32 } >( chunk) ;
197+ let newlines_mask = lsx_vmskltz_b( newlines_test) ;
198+ let mut newlines_mask = lsx_vpickve2gr_w:: <0 >( newlines_mask) ;
199+
200+ let output_offset = RelativeBytePos :: from_usize( chunk_index * CHUNK_SIZE + 1 ) ;
201+
202+ while newlines_mask != 0 {
203+ let index = newlines_mask. trailing_zeros( ) ;
204+
205+ lines. push( RelativeBytePos ( index) + output_offset) ;
206+
207+ // Clear the bit, so we can find the next one.
208+ newlines_mask &= newlines_mask - 1 ;
209+ }
210+ } else {
211+ // The slow path.
212+ // There are multibyte chars in here, fallback to generic decoding.
213+ let scan_start = chunk_index * CHUNK_SIZE + intra_chunk_offset;
214+ intra_chunk_offset = analyze_source_file_generic(
215+ & src[ scan_start..] ,
216+ CHUNK_SIZE - intra_chunk_offset,
217+ RelativeBytePos :: from_usize( scan_start) ,
218+ lines,
219+ multi_byte_chars,
220+ ) ;
221+ }
222+ }
223+
224+ // There might still be a tail left to analyze
225+ let tail_start = src. len( ) - tail. len( ) + intra_chunk_offset;
226+ if tail_start < src. len( ) {
227+ analyze_source_file_generic(
228+ & src[ tail_start..] ,
229+ src. len( ) - tail_start,
230+ RelativeBytePos :: from_usize( tail_start) ,
231+ lines,
232+ multi_byte_chars,
233+ ) ;
234+ }
235+ }
236+ }
135237 _ => {
136- // The target (or compiler version) does not support SSE2 ...
238+ // The target (or compiler version) does not support vector instructions
239+ // our specialized implementations need (x86 SSE2, loongarch64 LSX)...
137240 fn analyze_source_file_dispatch(
138241 src: & str ,
139242 lines: & mut Vec <RelativeBytePos >,
0 commit comments