@@ -52,10 +52,8 @@ pub enum EscapeError {
5252
5353 /// Unicode escape code in byte literal.
5454 UnicodeEscapeInByte ,
55- /// Non-ascii character in byte literal.
55+ /// Non-ascii character in byte literal, byte string literal, or raw byte string literal .
5656 NonAsciiCharInByte ,
57- /// Non-ascii character in byte string literal.
58- NonAsciiCharInByteString ,
5957
6058 /// After a line ending with '\', the next line contains whitespace
6159 /// characters that are not skipped.
@@ -78,54 +76,37 @@ impl EscapeError {
7876/// Takes a contents of a literal (without quotes) and produces a
7977/// sequence of escaped characters or errors.
8078/// Values are returned through invoking of the provided callback.
81- pub fn unescape_literal < F > ( literal_text : & str , mode : Mode , callback : & mut F )
79+ pub fn unescape_literal < F > ( src : & str , mode : Mode , callback : & mut F )
8280where
8381 F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
8482{
8583 match mode {
8684 Mode :: Char | Mode :: Byte => {
87- let mut chars = literal_text. chars ( ) ;
88- let result = unescape_char_or_byte ( & mut chars, mode) ;
89- // The Chars iterator moved forward.
90- callback ( 0 ..( literal_text. len ( ) - chars. as_str ( ) . len ( ) ) , result) ;
85+ let mut chars = src. chars ( ) ;
86+ let res = unescape_char_or_byte ( & mut chars, mode == Mode :: Byte ) ;
87+ callback ( 0 ..( src. len ( ) - chars. as_str ( ) . len ( ) ) , res) ;
9188 }
92- Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( literal_text, mode, callback) ,
93- // NOTE: Raw strings do not perform any explicit character escaping, here we
94- // only translate CRLF to LF and produce errors on bare CR.
89+ Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( src, mode == Mode :: ByteStr , callback) ,
9590 Mode :: RawStr | Mode :: RawByteStr => {
96- unescape_raw_str_or_raw_byte_str ( literal_text , mode, callback)
91+ unescape_raw_str_or_raw_byte_str ( src , mode == Mode :: RawByteStr , callback)
9792 }
9893 }
9994}
10095
101- /// Takes a contents of a byte, byte string or raw byte string (without quotes)
102- /// and produces a sequence of bytes or errors.
103- /// Values are returned through invoking of the provided callback.
104- pub fn unescape_byte_literal < F > ( literal_text : & str , mode : Mode , callback : & mut F )
105- where
106- F : FnMut ( Range < usize > , Result < u8 , EscapeError > ) ,
107- {
108- debug_assert ! ( mode. is_bytes( ) ) ;
109- unescape_literal ( literal_text, mode, & mut |range, result| {
110- callback ( range, result. map ( byte_from_char) ) ;
111- } )
112- }
113-
11496/// Takes a contents of a char literal (without quotes), and returns an
11597/// unescaped char or an error
116- pub fn unescape_char ( literal_text : & str ) -> Result < char , ( usize , EscapeError ) > {
117- let mut chars = literal_text. chars ( ) ;
118- unescape_char_or_byte ( & mut chars, Mode :: Char )
119- . map_err ( |err| ( literal_text. len ( ) - chars. as_str ( ) . len ( ) , err) )
98+ pub fn unescape_char ( src : & str ) -> Result < char , ( usize , EscapeError ) > {
99+ let mut chars = src. chars ( ) ;
100+ unescape_char_or_byte ( & mut chars, false ) . map_err ( |err| ( src. len ( ) - chars. as_str ( ) . len ( ) , err) )
120101}
121102
122103/// Takes a contents of a byte literal (without quotes), and returns an
123104/// unescaped byte or an error.
124- pub fn unescape_byte ( literal_text : & str ) -> Result < u8 , ( usize , EscapeError ) > {
125- let mut chars = literal_text . chars ( ) ;
126- unescape_char_or_byte ( & mut chars, Mode :: Byte )
105+ pub fn unescape_byte ( src : & str ) -> Result < u8 , ( usize , EscapeError ) > {
106+ let mut chars = src . chars ( ) ;
107+ unescape_char_or_byte ( & mut chars, true )
127108 . map ( byte_from_char)
128- . map_err ( |err| ( literal_text . len ( ) - chars. as_str ( ) . len ( ) , err) )
109+ . map_err ( |err| ( src . len ( ) - chars. as_str ( ) . len ( ) , err) )
129110}
130111
131112/// What kind of literal do we parse.
@@ -147,20 +128,17 @@ impl Mode {
147128 }
148129 }
149130
150- pub fn is_bytes ( self ) -> bool {
131+ pub fn is_byte ( self ) -> bool {
151132 match self {
152133 Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
153134 Mode :: Char | Mode :: Str | Mode :: RawStr => false ,
154135 }
155136 }
156137}
157138
158- fn scan_escape ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
139+ fn scan_escape ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
159140 // Previous character was '\\', unescape what follows.
160-
161- let second_char = chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ?;
162-
163- let res = match second_char {
141+ let res = match chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ? {
164142 '"' => '"' ,
165143 'n' => '\n' ,
166144 'r' => '\r' ,
@@ -181,7 +159,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
181159 let value = hi * 16 + lo;
182160
183161 // For a non-byte literal verify that it is within ASCII range.
184- if !mode . is_bytes ( ) && !is_ascii ( value) {
162+ if !is_byte && !is_ascii ( value) {
185163 return Err ( EscapeError :: OutOfRangeHexEscape ) ;
186164 }
187165 let value = value as u8 ;
@@ -217,7 +195,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
217195
218196 // Incorrect syntax has higher priority for error reporting
219197 // than unallowed value for a literal.
220- if mode . is_bytes ( ) {
198+ if is_byte {
221199 return Err ( EscapeError :: UnicodeEscapeInByte ) ;
222200 }
223201
@@ -249,23 +227,22 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
249227}
250228
251229#[ inline]
252- fn ascii_check ( first_char : char , mode : Mode ) -> Result < char , EscapeError > {
253- if mode . is_bytes ( ) && !first_char . is_ascii ( ) {
230+ fn ascii_check ( c : char , is_byte : bool ) -> Result < char , EscapeError > {
231+ if is_byte && !c . is_ascii ( ) {
254232 // Byte literal can't be a non-ascii character.
255233 Err ( EscapeError :: NonAsciiCharInByte )
256234 } else {
257- Ok ( first_char )
235+ Ok ( c )
258236 }
259237}
260238
261- fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
262- debug_assert ! ( mode == Mode :: Char || mode == Mode :: Byte ) ;
263- let first_char = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
264- let res = match first_char {
265- '\\' => scan_escape ( chars, mode) ,
239+ fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
240+ let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
241+ let res = match c {
242+ '\\' => scan_escape ( chars, is_byte) ,
266243 '\n' | '\t' | '\'' => Err ( EscapeError :: EscapeOnlyChar ) ,
267244 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
268- _ => ascii_check ( first_char , mode ) ,
245+ _ => ascii_check ( c , is_byte ) ,
269246 } ?;
270247 if chars. next ( ) . is_some ( ) {
271248 return Err ( EscapeError :: MoreThanOneChar ) ;
@@ -275,20 +252,20 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca
275252
276253/// Takes a contents of a string literal (without quotes) and produces a
277254/// sequence of escaped characters or errors.
278- fn unescape_str_or_byte_str < F > ( src : & str , mode : Mode , callback : & mut F )
255+ fn unescape_str_or_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
279256where
280257 F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
281258{
282- debug_assert ! ( mode == Mode :: Str || mode == Mode :: ByteStr ) ;
283- let initial_len = src. len ( ) ;
284259 let mut chars = src. chars ( ) ;
285- while let Some ( first_char) = chars. next ( ) {
286- let start = initial_len - chars. as_str ( ) . len ( ) - first_char. len_utf8 ( ) ;
287260
288- let unescaped_char = match first_char {
261+ // The `start` and `end` computation here is complicated because
262+ // `skip_ascii_whitespace` makes us to skip over chars without counting
263+ // them in the range computation.
264+ while let Some ( c) = chars. next ( ) {
265+ let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
266+ let res = match c {
289267 '\\' => {
290- let second_char = chars. clone ( ) . next ( ) ;
291- match second_char {
268+ match chars. clone ( ) . next ( ) {
292269 Some ( '\n' ) => {
293270 // Rust language specification requires us to skip whitespaces
294271 // if unescaped '\' character is followed by '\n'.
@@ -297,17 +274,17 @@ where
297274 skip_ascii_whitespace ( & mut chars, start, callback) ;
298275 continue ;
299276 }
300- _ => scan_escape ( & mut chars, mode ) ,
277+ _ => scan_escape ( & mut chars, is_byte ) ,
301278 }
302279 }
303280 '\n' => Ok ( '\n' ) ,
304281 '\t' => Ok ( '\t' ) ,
305282 '"' => Err ( EscapeError :: EscapeOnlyChar ) ,
306283 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
307- _ => ascii_check ( first_char , mode ) ,
284+ _ => ascii_check ( c , is_byte ) ,
308285 } ;
309- let end = initial_len - chars. as_str ( ) . len ( ) ;
310- callback ( start..end, unescaped_char ) ;
286+ let end = src . len ( ) - chars. as_str ( ) . len ( ) ;
287+ callback ( start..end, res ) ;
311288 }
312289
313290 fn skip_ascii_whitespace < F > ( chars : & mut Chars < ' _ > , start : usize , callback : & mut F )
@@ -340,30 +317,29 @@ where
340317/// Takes a contents of a string literal (without quotes) and produces a
341318/// sequence of characters or errors.
342319/// NOTE: Raw strings do not perform any explicit character escaping, here we
343- /// only translate CRLF to LF and produce errors on bare CR.
344- fn unescape_raw_str_or_raw_byte_str < F > ( literal_text : & str , mode : Mode , callback : & mut F )
320+ /// only produce errors on bare CR.
321+ fn unescape_raw_str_or_raw_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
345322where
346323 F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
347324{
348- debug_assert ! ( mode == Mode :: RawStr || mode == Mode :: RawByteStr ) ;
349- let initial_len = literal_text. len ( ) ;
350-
351- let mut chars = literal_text. chars ( ) ;
352- while let Some ( curr) = chars. next ( ) {
353- let start = initial_len - chars. as_str ( ) . len ( ) - curr. len_utf8 ( ) ;
325+ let mut chars = src. chars ( ) ;
354326
355- let result = match curr {
327+ // The `start` and `end` computation here matches the one in
328+ // `unescape_str_or_byte_str` for consistency, even though this function
329+ // doesn't have to worry about skipping any chars.
330+ while let Some ( c) = chars. next ( ) {
331+ let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
332+ let res = match c {
356333 '\r' => Err ( EscapeError :: BareCarriageReturnInRawString ) ,
357- c if mode. is_bytes ( ) && !c. is_ascii ( ) => Err ( EscapeError :: NonAsciiCharInByteString ) ,
358- c => Ok ( c) ,
334+ _ => ascii_check ( c, is_byte) ,
359335 } ;
360- let end = initial_len - chars. as_str ( ) . len ( ) ;
361-
362- callback ( start..end, result) ;
336+ let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
337+ callback ( start..end, res) ;
363338 }
364339}
365340
366- fn byte_from_char ( c : char ) -> u8 {
341+ #[ inline]
342+ pub fn byte_from_char ( c : char ) -> u8 {
367343 let res = c as u32 ;
368344 debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of Mode::ByteStr" ) ;
369345 res as u8
0 commit comments