@@ -52,10 +52,8 @@ pub enum EscapeError {
5252
5353 /// Unicode escape code in byte literal.
5454 UnicodeEscapeInByte ,
55- /// Non-ascii character in byte literal.
55+ /// Non-ascii character in byte literal, byte string literal, or raw byte string literal .
5656 NonAsciiCharInByte ,
57- /// Non-ascii character in byte string literal.
58- NonAsciiCharInByteString ,
5957
6058 /// After a line ending with '\', the next line contains whitespace
6159 /// characters that are not skipped.
@@ -78,54 +76,33 @@ impl EscapeError {
7876/// Takes a contents of a literal (without quotes) and produces a
7977/// sequence of escaped characters or errors.
8078/// Values are returned through invoking of the provided callback.
81- pub fn unescape_literal < F > ( literal_text : & str , mode : Mode , callback : & mut F )
79+ pub fn unescape_literal < F > ( src : & str , mode : Mode , callback : & mut F )
8280where
8381 F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
8482{
8583 match mode {
8684 Mode :: Char | Mode :: Byte => {
87- let mut chars = literal_text. chars ( ) ;
88- let result = unescape_char_or_byte ( & mut chars, mode) ;
89- // The Chars iterator moved forward.
90- callback ( 0 ..( literal_text. len ( ) - chars. as_str ( ) . len ( ) ) , result) ;
85+ let mut chars = src. chars ( ) ;
86+ let res = unescape_char_or_byte ( & mut chars, mode == Mode :: Byte ) ;
87+ callback ( 0 ..( src. len ( ) - chars. as_str ( ) . len ( ) ) , res) ;
9188 }
92- Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( literal_text, mode, callback) ,
93- // NOTE: Raw strings do not perform any explicit character escaping, here we
94- // only translate CRLF to LF and produce errors on bare CR.
89+ Mode :: Str | Mode :: ByteStr => unescape_str_or_byte_str ( src, mode == Mode :: ByteStr , callback) ,
9590 Mode :: RawStr | Mode :: RawByteStr => {
96- unescape_raw_str_or_raw_byte_str ( literal_text , mode, callback)
91+ unescape_raw_str_or_raw_byte_str ( src , mode == Mode :: RawByteStr , callback)
9792 }
9893 }
9994}
10095
101- /// Takes a contents of a byte, byte string or raw byte string (without quotes)
102- /// and produces a sequence of bytes or errors.
103- /// Values are returned through invoking of the provided callback.
104- pub fn unescape_byte_literal < F > ( literal_text : & str , mode : Mode , callback : & mut F )
105- where
106- F : FnMut ( Range < usize > , Result < u8 , EscapeError > ) ,
107- {
108- debug_assert ! ( mode. is_bytes( ) ) ;
109- unescape_literal ( literal_text, mode, & mut |range, result| {
110- callback ( range, result. map ( byte_from_char) ) ;
111- } )
112- }
113-
11496/// Takes a contents of a char literal (without quotes), and returns an
115- /// unescaped char or an error
116- pub fn unescape_char ( literal_text : & str ) -> Result < char , ( usize , EscapeError ) > {
117- let mut chars = literal_text. chars ( ) ;
118- unescape_char_or_byte ( & mut chars, Mode :: Char )
119- . map_err ( |err| ( literal_text. len ( ) - chars. as_str ( ) . len ( ) , err) )
97+ /// unescaped char or an error.
98+ pub fn unescape_char ( src : & str ) -> Result < char , EscapeError > {
99+ unescape_char_or_byte ( & mut src. chars ( ) , false )
120100}
121101
122102/// Takes a contents of a byte literal (without quotes), and returns an
123103/// unescaped byte or an error.
124- pub fn unescape_byte ( literal_text : & str ) -> Result < u8 , ( usize , EscapeError ) > {
125- let mut chars = literal_text. chars ( ) ;
126- unescape_char_or_byte ( & mut chars, Mode :: Byte )
127- . map ( byte_from_char)
128- . map_err ( |err| ( literal_text. len ( ) - chars. as_str ( ) . len ( ) , err) )
104+ pub fn unescape_byte ( src : & str ) -> Result < u8 , EscapeError > {
105+ unescape_char_or_byte ( & mut src. chars ( ) , true ) . map ( byte_from_char)
129106}
130107
131108/// What kind of literal do we parse.
@@ -147,20 +124,17 @@ impl Mode {
147124 }
148125 }
149126
150- pub fn is_bytes ( self ) -> bool {
127+ pub fn is_byte ( self ) -> bool {
151128 match self {
152129 Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
153130 Mode :: Char | Mode :: Str | Mode :: RawStr => false ,
154131 }
155132 }
156133}
157134
158- fn scan_escape ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
135+ fn scan_escape ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
159136 // Previous character was '\\', unescape what follows.
160-
161- let second_char = chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ?;
162-
163- let res = match second_char {
137+ let res = match chars. next ( ) . ok_or ( EscapeError :: LoneSlash ) ? {
164138 '"' => '"' ,
165139 'n' => '\n' ,
166140 'r' => '\r' ,
@@ -181,7 +155,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
181155 let value = hi * 16 + lo;
182156
183157 // For a non-byte literal verify that it is within ASCII range.
184- if !mode . is_bytes ( ) && !is_ascii ( value) {
158+ if !is_byte && !is_ascii ( value) {
185159 return Err ( EscapeError :: OutOfRangeHexEscape ) ;
186160 }
187161 let value = value as u8 ;
@@ -217,7 +191,7 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
217191
218192 // Incorrect syntax has higher priority for error reporting
219193 // than unallowed value for a literal.
220- if mode . is_bytes ( ) {
194+ if is_byte {
221195 return Err ( EscapeError :: UnicodeEscapeInByte ) ;
222196 }
223197
@@ -249,23 +223,22 @@ fn scan_escape(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
249223}
250224
251225#[ inline]
252- fn ascii_check ( first_char : char , mode : Mode ) -> Result < char , EscapeError > {
253- if mode . is_bytes ( ) && !first_char . is_ascii ( ) {
226+ fn ascii_check ( c : char , is_byte : bool ) -> Result < char , EscapeError > {
227+ if is_byte && !c . is_ascii ( ) {
254228 // Byte literal can't be a non-ascii character.
255229 Err ( EscapeError :: NonAsciiCharInByte )
256230 } else {
257- Ok ( first_char )
231+ Ok ( c )
258232 }
259233}
260234
261- fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
262- debug_assert ! ( mode == Mode :: Char || mode == Mode :: Byte ) ;
263- let first_char = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
264- let res = match first_char {
265- '\\' => scan_escape ( chars, mode) ,
235+ fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
236+ let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
237+ let res = match c {
238+ '\\' => scan_escape ( chars, is_byte) ,
266239 '\n' | '\t' | '\'' => Err ( EscapeError :: EscapeOnlyChar ) ,
267240 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
268- _ => ascii_check ( first_char , mode ) ,
241+ _ => ascii_check ( c , is_byte ) ,
269242 } ?;
270243 if chars. next ( ) . is_some ( ) {
271244 return Err ( EscapeError :: MoreThanOneChar ) ;
@@ -275,20 +248,20 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca
275248
276249/// Takes a contents of a string literal (without quotes) and produces a
277250/// sequence of escaped characters or errors.
278- fn unescape_str_or_byte_str < F > ( src : & str , mode : Mode , callback : & mut F )
251+ fn unescape_str_or_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
279252where
280253 F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
281254{
282- debug_assert ! ( mode == Mode :: Str || mode == Mode :: ByteStr ) ;
283- let initial_len = src. len ( ) ;
284255 let mut chars = src. chars ( ) ;
285- while let Some ( first_char) = chars. next ( ) {
286- let start = initial_len - chars. as_str ( ) . len ( ) - first_char. len_utf8 ( ) ;
287256
288- let unescaped_char = match first_char {
257+ // The `start` and `end` computation here is complicated because
258+ // `skip_ascii_whitespace` makes us to skip over chars without counting
259+ // them in the range computation.
260+ while let Some ( c) = chars. next ( ) {
261+ let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
262+ let res = match c {
289263 '\\' => {
290- let second_char = chars. clone ( ) . next ( ) ;
291- match second_char {
264+ match chars. clone ( ) . next ( ) {
292265 Some ( '\n' ) => {
293266 // Rust language specification requires us to skip whitespaces
294267 // if unescaped '\' character is followed by '\n'.
@@ -297,17 +270,17 @@ where
297270 skip_ascii_whitespace ( & mut chars, start, callback) ;
298271 continue ;
299272 }
300- _ => scan_escape ( & mut chars, mode ) ,
273+ _ => scan_escape ( & mut chars, is_byte ) ,
301274 }
302275 }
303276 '\n' => Ok ( '\n' ) ,
304277 '\t' => Ok ( '\t' ) ,
305278 '"' => Err ( EscapeError :: EscapeOnlyChar ) ,
306279 '\r' => Err ( EscapeError :: BareCarriageReturn ) ,
307- _ => ascii_check ( first_char , mode ) ,
280+ _ => ascii_check ( c , is_byte ) ,
308281 } ;
309- let end = initial_len - chars. as_str ( ) . len ( ) ;
310- callback ( start..end, unescaped_char ) ;
282+ let end = src . len ( ) - chars. as_str ( ) . len ( ) ;
283+ callback ( start..end, res ) ;
311284 }
312285
313286 fn skip_ascii_whitespace < F > ( chars : & mut Chars < ' _ > , start : usize , callback : & mut F )
@@ -340,30 +313,29 @@ where
340313/// Takes a contents of a string literal (without quotes) and produces a
341314/// sequence of characters or errors.
342315/// NOTE: Raw strings do not perform any explicit character escaping, here we
343- /// only translate CRLF to LF and produce errors on bare CR.
344- fn unescape_raw_str_or_raw_byte_str < F > ( literal_text : & str , mode : Mode , callback : & mut F )
316+ /// only produce errors on bare CR.
317+ fn unescape_raw_str_or_raw_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
345318where
346319 F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
347320{
348- debug_assert ! ( mode == Mode :: RawStr || mode == Mode :: RawByteStr ) ;
349- let initial_len = literal_text. len ( ) ;
350-
351- let mut chars = literal_text. chars ( ) ;
352- while let Some ( curr) = chars. next ( ) {
353- let start = initial_len - chars. as_str ( ) . len ( ) - curr. len_utf8 ( ) ;
321+ let mut chars = src. chars ( ) ;
354322
355- let result = match curr {
323+ // The `start` and `end` computation here matches the one in
324+ // `unescape_str_or_byte_str` for consistency, even though this function
325+ // doesn't have to worry about skipping any chars.
326+ while let Some ( c) = chars. next ( ) {
327+ let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
328+ let res = match c {
356329 '\r' => Err ( EscapeError :: BareCarriageReturnInRawString ) ,
357- c if mode. is_bytes ( ) && !c. is_ascii ( ) => Err ( EscapeError :: NonAsciiCharInByteString ) ,
358- c => Ok ( c) ,
330+ _ => ascii_check ( c, is_byte) ,
359331 } ;
360- let end = initial_len - chars. as_str ( ) . len ( ) ;
361-
362- callback ( start..end, result) ;
332+ let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
333+ callback ( start..end, res) ;
363334 }
364335}
365336
366- fn byte_from_char ( c : char ) -> u8 {
337+ #[ inline]
338+ pub fn byte_from_char ( c : char ) -> u8 {
367339 let res = c as u32 ;
368340 debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of Mode::ByteStr" ) ;
369341 res as u8
0 commit comments