@@ -10,12 +10,14 @@ use rustc_ast as ast;
1010use rustc_ast:: token:: { self , DelimToken , Nonterminal , Token , TokenKind } ;
1111use rustc_ast:: tokenstream:: { self , LazyTokenStream , TokenStream , TokenTree } ;
1212use rustc_ast_pretty:: pprust;
13+ use rustc_data_structures:: fx:: FxHashSet ;
1314use rustc_data_structures:: sync:: Lrc ;
1415use rustc_errors:: { Diagnostic , FatalError , Level , PResult } ;
1516use rustc_session:: parse:: ParseSess ;
1617use rustc_span:: { symbol:: kw, FileName , SourceFile , Span , DUMMY_SP } ;
1718
1819use smallvec:: SmallVec ;
20+ use std:: cell:: RefCell ;
1921use std:: mem;
2022use std:: path:: Path ;
2123use std:: str;
@@ -282,14 +284,33 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
282284 }
283285 } ;
284286
287+ // Caches the stringification of 'good' `TokenStreams` which passed
288+ // `tokenstream_probably_equal_for_proc_macro`. This allows us to avoid
289+ // repeatedly stringifying and comparing the same `TokenStream` for deeply
290+ // nested nonterminals.
291+ //
292+ // We cache by the strinification instead of the `TokenStream` to avoid
293+ // needing to implement `Hash` for `TokenStream`. Note that it's possible to
294+ // have two distinct `TokenStream`s that stringify to the same result
295+ // (e.g. if they differ only in hygiene information). However, any
296+ // information lost during the stringification process is also intentionally
297+ // ignored by `tokenstream_probably_equal_for_proc_macro`, so it's fine
298+ // that a single cache entry may 'map' to multiple distinct `TokenStream`s.
299+ //
300+ // This is a temporary hack to prevent compilation blowup on certain inputs.
301+ // The entire pretty-print/retokenize process will be removed soon.
302+ thread_local ! {
303+ static GOOD_TOKEN_CACHE : RefCell <FxHashSet <String >> = Default :: default ( ) ;
304+ }
305+
285306 // FIXME(#43081): Avoid this pretty-print + reparse hack
286307 // Pretty-print the AST struct without inserting any parenthesis
287308 // beyond those explicitly written by the user (e.g. `ExpnKind::Paren`).
288309 // The resulting stream may have incorrect precedence, but it's only
289310 // ever used for a comparison against the capture tokenstream.
290311 let source = pprust:: nonterminal_to_string_no_extra_parens ( nt) ;
291312 let filename = FileName :: macro_expansion_source_code ( & source) ;
292- let reparsed_tokens = parse_stream_from_source_str ( filename, source, sess, Some ( span) ) ;
313+ let reparsed_tokens = parse_stream_from_source_str ( filename, source. clone ( ) , sess, Some ( span) ) ;
293314
294315 // During early phases of the compiler the AST could get modified
295316 // directly (e.g., attributes added or removed) and the internal cache
@@ -315,8 +336,13 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
315336 // modifications, including adding/removing typically non-semantic
316337 // tokens such as extra braces and commas, don't happen.
317338 if let Some ( tokens) = tokens {
339+ if GOOD_TOKEN_CACHE . with ( |cache| cache. borrow ( ) . contains ( & source) ) {
340+ return tokens;
341+ }
342+
318343 // Compare with a non-relaxed delim match to start.
319344 if tokenstream_probably_equal_for_proc_macro ( & tokens, & reparsed_tokens, sess, false ) {
345+ GOOD_TOKEN_CACHE . with ( |cache| cache. borrow_mut ( ) . insert ( source. clone ( ) ) ) ;
320346 return tokens;
321347 }
322348
@@ -325,6 +351,11 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
325351 // token stream to match up with inserted parenthesis in the reparsed stream.
326352 let source_with_parens = pprust:: nonterminal_to_string ( nt) ;
327353 let filename_with_parens = FileName :: macro_expansion_source_code ( & source_with_parens) ;
354+
355+ if GOOD_TOKEN_CACHE . with ( |cache| cache. borrow ( ) . contains ( & source_with_parens) ) {
356+ return tokens;
357+ }
358+
328359 let reparsed_tokens_with_parens = parse_stream_from_source_str (
329360 filename_with_parens,
330361 source_with_parens,
@@ -340,6 +371,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
340371 sess,
341372 true ,
342373 ) {
374+ GOOD_TOKEN_CACHE . with ( |cache| cache. borrow_mut ( ) . insert ( source. clone ( ) ) ) ;
343375 return tokens;
344376 }
345377
@@ -419,9 +451,9 @@ pub fn tokenstream_probably_equal_for_proc_macro(
419451 // to iterate breaking tokens mutliple times. For example:
420452 // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
421453 let mut token_trees: SmallVec < [ _ ; 2 ] > ;
422- if let TokenTree :: Token ( token) = & tree {
454+ if let TokenTree :: Token ( token) = tree {
423455 let mut out = SmallVec :: < [ _ ; 2 ] > :: new ( ) ;
424- out. push ( token. clone ( ) ) ;
456+ out. push ( token) ;
425457 // Iterate to fixpoint:
426458 // * We start off with 'out' containing our initial token, and `temp` empty
427459 // * If we are able to break any tokens in `out`, then `out` will have
0 commit comments