33use crate :: ast:: { self , LitKind , MetaItemLit , StrStyle } ;
44use crate :: token:: { self , Token } ;
55use rustc_lexer:: unescape:: {
6- byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit ,
7- Mode ,
6+ unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit , Mode ,
87} ;
98use rustc_span:: symbol:: { kw, sym, Symbol } ;
109use rustc_span:: Span ;
@@ -48,6 +47,10 @@ impl LitKind {
4847 return Err ( LitError :: InvalidSuffix ) ;
4948 }
5049
50+ // For byte/char/string literals, chars and escapes have already been
51+ // checked in the lexer (in `cook_lexer_literal`). So we can assume all
52+ // chars and escapes are valid here, and ignore `Rfc3349` return
53+ // values.
5154 Ok ( match kind {
5255 token:: Bool => {
5356 assert ! ( symbol. is_bool_lit( ) ) ;
@@ -56,12 +59,12 @@ impl LitKind {
5659 token:: Byte => {
5760 return unescape_byte ( symbol. as_str ( ) )
5861 . map ( LitKind :: Byte )
59- . map_err ( |_| LitError :: LexerError ) ;
62+ . map_err ( |_| panic ! ( "failed to unescape byte literal" ) ) ;
6063 }
6164 token:: Char => {
6265 return unescape_char ( symbol. as_str ( ) )
6366 . map ( LitKind :: Char )
64- . map_err ( |_| LitError :: LexerError ) ;
67+ . map_err ( |_| panic ! ( "failed to unescape char literal" ) ) ;
6568 }
6669
6770 // There are some valid suffixes for integer and float literals,
@@ -77,113 +80,72 @@ impl LitKind {
7780 let s = symbol. as_str ( ) ;
7881 // Vanilla strings are so common we optimize for the common case where no chars
7982 // requiring special behaviour are present.
80- let symbol = if s. contains ( [ '\\' , '\r' ] ) {
83+ let symbol = if s. contains ( '\\' ) {
8184 let mut buf = String :: with_capacity ( s. len ( ) ) ;
82- let mut error = Ok ( ( ) ) ;
8385 // Force-inlining here is aggressive but the closure is
84- // called on every char in the string, so it can be
85- // hot in programs with many long strings.
86- unescape_literal (
86+ // called on every char in the string, so it can be hot in
87+ // programs with many long strings containing escapes .
88+ _ = unescape_unicode (
8789 s,
8890 Mode :: Str ,
8991 & mut #[ inline ( always) ]
90- |_, unescaped_char | match unescaped_char {
92+ |_, c | match c {
9193 Ok ( c) => buf. push ( c) ,
9294 Err ( err) => {
93- if err. is_fatal ( ) {
94- error = Err ( LitError :: LexerError ) ;
95- }
95+ assert ! ( !err. is_fatal( ) , "failed to unescape string literal" )
9696 }
9797 } ,
9898 ) ;
99- error?;
10099 Symbol :: intern ( & buf)
101100 } else {
102101 symbol
103102 } ;
104103 LitKind :: Str ( symbol, ast:: StrStyle :: Cooked )
105104 }
106105 token:: StrRaw ( n) => {
107- // Raw strings have no escapes, so we only need to check for invalid chars, and we
108- // can reuse the symbol on success.
109- let mut error = Ok ( ( ) ) ;
110- unescape_literal ( symbol. as_str ( ) , Mode :: RawStr , & mut |_, unescaped_char| {
111- match unescaped_char {
112- Ok ( _) => { }
113- Err ( err) => {
114- if err. is_fatal ( ) {
115- error = Err ( LitError :: LexerError ) ;
116- }
117- }
118- }
119- } ) ;
120- error?;
106+ // Raw strings have no escapes so no work is needed here.
121107 LitKind :: Str ( symbol, ast:: StrStyle :: Raw ( n) )
122108 }
123109 token:: ByteStr => {
124110 let s = symbol. as_str ( ) ;
125111 let mut buf = Vec :: with_capacity ( s. len ( ) ) ;
126- let mut error = Ok ( ( ) ) ;
127- unescape_literal ( s, Mode :: ByteStr , & mut |_, c| match c {
128- Ok ( c) => buf. push ( byte_from_char ( c) ) ,
112+ _ = unescape_mixed ( s, Mode :: ByteStr , & mut |_, c| match c {
113+ Ok ( MixedUnit :: Char ( c) ) => {
114+ buf. extend_from_slice ( c. encode_utf8 ( & mut [ 0 ; 4 ] ) . as_bytes ( ) )
115+ }
116+ Ok ( MixedUnit :: HighByte ( b) ) => buf. push ( b) ,
129117 Err ( err) => {
130- if err. is_fatal ( ) {
131- error = Err ( LitError :: LexerError ) ;
132- }
118+ assert ! ( !err. is_fatal( ) , "failed to unescape string literal" )
133119 }
134120 } ) ;
135- error?;
136121 LitKind :: ByteStr ( buf. into ( ) , StrStyle :: Cooked )
137122 }
138123 token:: ByteStrRaw ( n) => {
139- // Raw strings have no escapes, so we only need to check for invalid chars, and we
140- // can convert the symbol directly to a `Lrc<u8>` on success.
141- let s = symbol. as_str ( ) ;
142- let mut error = Ok ( ( ) ) ;
143- unescape_literal ( s, Mode :: RawByteStr , & mut |_, c| match c {
144- Ok ( _) => { }
145- Err ( err) => {
146- if err. is_fatal ( ) {
147- error = Err ( LitError :: LexerError ) ;
148- }
149- }
150- } ) ;
151- LitKind :: ByteStr ( s. to_owned ( ) . into_bytes ( ) . into ( ) , StrStyle :: Raw ( n) )
124+ // Raw strings have no escapes so we can convert the symbol
125+ // directly to a `Lrc<u8>`.
126+ let buf = symbol. as_str ( ) . to_owned ( ) . into_bytes ( ) ;
127+ LitKind :: ByteStr ( buf. into ( ) , StrStyle :: Raw ( n) )
152128 }
153129 token:: CStr => {
154130 let s = symbol. as_str ( ) ;
155131 let mut buf = Vec :: with_capacity ( s. len ( ) ) ;
156- let mut error = Ok ( ( ) ) ;
157- unescape_c_string ( s, Mode :: CStr , & mut |_span, c| match c {
158- Ok ( CStrUnit :: Byte ( b) ) => buf. push ( b) ,
159- Ok ( CStrUnit :: Char ( c) ) => {
132+ _ = unescape_mixed ( s, Mode :: CStr , & mut |_span, c| match c {
133+ Ok ( MixedUnit :: Char ( c) ) => {
160134 buf. extend_from_slice ( c. encode_utf8 ( & mut [ 0 ; 4 ] ) . as_bytes ( ) )
161135 }
136+ Ok ( MixedUnit :: HighByte ( b) ) => buf. push ( b) ,
162137 Err ( err) => {
163- if err. is_fatal ( ) {
164- error = Err ( LitError :: LexerError ) ;
165- }
138+ assert ! ( !err. is_fatal( ) , "failed to unescape C string literal" )
166139 }
167140 } ) ;
168- error?;
169141 buf. push ( 0 ) ;
170142 LitKind :: CStr ( buf. into ( ) , StrStyle :: Cooked )
171143 }
172144 token:: CStrRaw ( n) => {
173- // Raw strings have no escapes, so we only need to check for invalid chars, and we
174- // can convert the symbol directly to a `Lrc<u8>` on success.
175- let s = symbol. as_str ( ) ;
176- let mut error = Ok ( ( ) ) ;
177- unescape_c_string ( s, Mode :: RawCStr , & mut |_, c| match c {
178- Ok ( _) => { }
179- Err ( err) => {
180- if err. is_fatal ( ) {
181- error = Err ( LitError :: LexerError ) ;
182- }
183- }
184- } ) ;
185- error?;
186- let mut buf = s. to_owned ( ) . into_bytes ( ) ;
145+ // Raw strings have no escapes so we can convert the symbol
146+ // directly to a `Lrc<u8>` after appending the terminating NUL
147+ // char.
148+ let mut buf = symbol. as_str ( ) . to_owned ( ) . into_bytes ( ) ;
187149 buf. push ( 0 ) ;
188150 LitKind :: CStr ( buf. into ( ) , StrStyle :: Raw ( n) )
189151 }
0 commit comments