@@ -25,7 +25,8 @@ use rustc_serialize::{Decodable, Encodable};
2525use rustc_span:: { sym, Span , SpanDecoder , SpanEncoder , Symbol , DUMMY_SP } ;
2626
2727use std:: borrow:: Cow ;
28- use std:: { cmp, fmt, iter} ;
28+ use std:: ops:: Range ;
29+ use std:: { cmp, fmt, iter, mem} ;
2930
3031/// Part of a `TokenStream`.
3132#[ derive( Debug , Clone , PartialEq , Encodable , Decodable , HashStable_Generic ) ]
@@ -156,12 +157,195 @@ impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
156157 }
157158}
158159
159- /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
160- /// information about the tokens for attribute targets. This is used
161- /// during expansion to perform early cfg-expansion, and to process attributes
162- /// during proc-macro invocations.
163- #[ derive( Clone , Debug , Default , Encodable , Decodable ) ]
164- pub struct AttrTokenStream ( pub Lrc < Vec < AttrTokenTree > > ) ;
160+ /// Indicates a range of tokens that should be replaced by the tokens in the
161+ /// provided `AttrsTarget`. This is used in two places during token collection:
162+ ///
163+ /// 1. During the parsing of an AST node that may have a `#[derive]` attribute,
164+ /// we parse a nested AST node that has `#[cfg]` or `#[cfg_attr]` In this
165+ /// case, we use a `ReplaceRange` to replace the entire inner AST node with
166+ /// `FlatToken::AttrsTarget`, allowing us to perform eager cfg-expansion on
167+ /// an `AttrTokenStream`.
168+ ///
169+ /// 2. When we parse an inner attribute while collecting tokens. We remove
170+ /// inner attributes from the token stream entirely, and instead track them
171+ /// through the `attrs` field on the AST node. This allows us to easily
172+ /// manipulate them (for example, removing the first macro inner attribute
173+ /// to invoke a proc-macro). When create a `TokenStream`, the inner
174+ /// attributes get inserted into the proper place in the token stream.
175+ pub type ReplaceRange = ( Range < u32 > , Option < AttrsTarget > ) ;
176+
177+ // Produces a `TokenStream` on-demand. Using `cursor_snapshot` and `num_calls`,
178+ // we can reconstruct the `TokenStream` seen by the callback. This allows us to
179+ // avoid producing a `TokenStream` if it is never needed - for example, a
180+ // captured `macro_rules!` argument that is never passed to a proc macro. In
181+ // practice token stream creation happens rarely compared to calls to
182+ // `collect_tokens` (see some statistics in #78736), so we are doing as little
183+ // up-front work as possible.
184+ //
185+ // This also makes `Parser` very cheap to clone, since there is no intermediate
186+ // collection buffer to clone.
187+ pub struct LazyAttrTokenStreamImpl {
188+ pub start_token : ( Token , Spacing ) ,
189+ pub cursor_snapshot : TokenCursor ,
190+ pub num_calls : u32 ,
191+ pub break_last_token : bool ,
192+ pub replace_ranges : Box < [ ReplaceRange ] > ,
193+ }
194+
195+ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
196+ fn to_attr_token_stream ( & self ) -> AttrTokenStream {
197+ // The token produced by the final call to `{,inlined_}next` was not
198+ // actually consumed by the callback. The combination of chaining the
199+ // initial token and using `take` produces the desired result - we
200+ // produce an empty `TokenStream` if no calls were made, and omit the
201+ // final token otherwise.
202+ let mut cursor_snapshot = self . cursor_snapshot . clone ( ) ;
203+ let tokens = iter:: once ( FlatToken :: Token ( self . start_token . clone ( ) ) )
204+ . chain ( iter:: repeat_with ( || FlatToken :: Token ( cursor_snapshot. next ( ) ) ) )
205+ . take ( self . num_calls as usize ) ;
206+
207+ if self . replace_ranges . is_empty ( ) {
208+ make_attr_token_stream ( tokens, self . break_last_token )
209+ } else {
210+ let mut tokens: Vec < _ > = tokens. collect ( ) ;
211+ let mut replace_ranges = self . replace_ranges . to_vec ( ) ;
212+ replace_ranges. sort_by_key ( |( range, _) | range. start ) ;
213+
214+ #[ cfg( debug_assertions) ]
215+ {
216+ for [ ( range, tokens) , ( next_range, next_tokens) ] in replace_ranges. array_windows ( ) {
217+ assert ! (
218+ range. end <= next_range. start || range. end >= next_range. end,
219+ "Replace ranges should either be disjoint or nested: \
220+ ({:?}, {:?}) ({:?}, {:?})",
221+ range,
222+ tokens,
223+ next_range,
224+ next_tokens,
225+ ) ;
226+ }
227+ }
228+
229+ // Process the replace ranges, starting from the highest start
230+ // position and working our way back. If have tokens like:
231+ //
232+ // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
233+ //
234+ // Then we will generate replace ranges for both the `#[cfg(FALSE)]
235+ // field: bool` and the entire `#[cfg(FALSE)] struct Foo {
236+ // #[cfg(FALSE)] field: bool }`
237+ //
238+ // By starting processing from the replace range with the greatest
239+ // start position, we ensure that any replace range which encloses
240+ // another replace range will capture the *replaced* tokens for the
241+ // inner range, not the original tokens.
242+ for ( range, target) in replace_ranges. into_iter ( ) . rev ( ) {
243+ assert ! ( !range. is_empty( ) , "Cannot replace an empty range: {range:?}" ) ;
244+
245+ // Replace the tokens in range with zero or one
246+ // `FlatToken::AttrsTarget`s, plus enough `FlatToken::Empty`s
247+ // to fill up the rest of the range. This keeps the total
248+ // length of `tokens` constant throughout the replacement
249+ // process, allowing us to use all of the `ReplaceRanges`
250+ // entries without adjusting indices.
251+ let target_len = target. is_some ( ) as usize ;
252+ tokens. splice (
253+ ( range. start as usize ) ..( range. end as usize ) ,
254+ target
255+ . into_iter ( )
256+ . map ( |target| FlatToken :: AttrsTarget ( target) )
257+ . chain ( iter:: repeat ( FlatToken :: Empty ) . take ( range. len ( ) - target_len) ) ,
258+ ) ;
259+ }
260+ make_attr_token_stream ( tokens. into_iter ( ) , self . break_last_token )
261+ }
262+ }
263+ }
264+
265+ /// A helper struct used when building an `AttrTokenStream` from a
266+ /// `LazyAttrTokenStream`. Both delimiter and non-delimited tokens are stored
267+ /// as `FlatToken::Token`. A vector of `FlatToken`s is then 'parsed' to build
268+ /// up an `AttrTokenStream` with nested `AttrTokenTree::Delimited` tokens.
269+ #[ derive( Debug , Clone ) ]
270+ enum FlatToken {
271+ /// A token. This holds both delimiter (e.g. '{' and '}') and non-delimiter
272+ /// tokens.
273+ Token ( ( Token , Spacing ) ) ,
274+ /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
275+ /// directly into the constructed `AttrTokenStream` as an
276+ /// `AttrTokenTree::AttrsTarget`.
277+ AttrsTarget ( AttrsTarget ) ,
278+ /// A special 'empty' token that is ignored during the conversion to an
279+ /// `AttrTokenStream`. This is used to simplify the handling of replace
280+ /// ranges.
281+ Empty ,
282+ }
283+
284+ /// Converts a flattened iterator of tokens (including open and close delimiter
285+ /// tokens) into an `AttrTokenStream`, creating an `AttrTokenTree::Delimited`
286+ /// for each matching pair of open and close delims.
287+ fn make_attr_token_stream (
288+ iter : impl Iterator < Item = FlatToken > ,
289+ break_last_token : bool ,
290+ ) -> AttrTokenStream {
291+ #[ derive( Debug ) ]
292+ struct FrameData {
293+ // This is `None` for the first frame, `Some` for all others.
294+ open_delim_sp : Option < ( Delimiter , Span , Spacing ) > ,
295+ inner : Vec < AttrTokenTree > ,
296+ }
297+ // The stack always has at least one element. Storing it separately makes for shorter code.
298+ let mut stack_top = FrameData { open_delim_sp : None , inner : vec ! [ ] } ;
299+ let mut stack_rest = vec ! [ ] ;
300+ for flat_token in iter {
301+ match flat_token {
302+ FlatToken :: Token ( ( Token { kind : TokenKind :: OpenDelim ( delim) , span } , spacing) ) => {
303+ stack_rest. push ( mem:: replace (
304+ & mut stack_top,
305+ FrameData { open_delim_sp : Some ( ( delim, span, spacing) ) , inner : vec ! [ ] } ,
306+ ) ) ;
307+ }
308+ FlatToken :: Token ( ( Token { kind : TokenKind :: CloseDelim ( delim) , span } , spacing) ) => {
309+ let frame_data = mem:: replace ( & mut stack_top, stack_rest. pop ( ) . unwrap ( ) ) ;
310+ let ( open_delim, open_sp, open_spacing) = frame_data. open_delim_sp . unwrap ( ) ;
311+ assert_eq ! (
312+ open_delim, delim,
313+ "Mismatched open/close delims: open={open_delim:?} close={span:?}"
314+ ) ;
315+ let dspan = DelimSpan :: from_pair ( open_sp, span) ;
316+ let dspacing = DelimSpacing :: new ( open_spacing, spacing) ;
317+ let stream = AttrTokenStream :: new ( frame_data. inner ) ;
318+ let delimited = AttrTokenTree :: Delimited ( dspan, dspacing, delim, stream) ;
319+ stack_top. inner . push ( delimited) ;
320+ }
321+ FlatToken :: Token ( ( token, spacing) ) => {
322+ stack_top. inner . push ( AttrTokenTree :: Token ( token, spacing) )
323+ }
324+ FlatToken :: AttrsTarget ( target) => {
325+ stack_top. inner . push ( AttrTokenTree :: AttrsTarget ( target) )
326+ }
327+ FlatToken :: Empty => { }
328+ }
329+ }
330+
331+ if break_last_token {
332+ let last_token = stack_top. inner . pop ( ) . unwrap ( ) ;
333+ if let AttrTokenTree :: Token ( last_token, spacing) = last_token {
334+ let unglued_first = last_token. kind . break_two_token_op ( ) . unwrap ( ) . 0 ;
335+
336+ // An 'unglued' token is always two ASCII characters.
337+ let mut first_span = last_token. span . shrink_to_lo ( ) ;
338+ first_span = first_span. with_hi ( first_span. lo ( ) + rustc_span:: BytePos ( 1 ) ) ;
339+
340+ stack_top
341+ . inner
342+ . push ( AttrTokenTree :: Token ( Token :: new ( unglued_first, first_span) , spacing) ) ;
343+ } else {
344+ panic ! ( "Unexpected last token {last_token:?}" )
345+ }
346+ }
347+ AttrTokenStream :: new ( stack_top. inner )
348+ }
165349
166350/// Like `TokenTree`, but for `AttrTokenStream`.
167351#[ derive( Clone , Debug , Encodable , Decodable ) ]
@@ -174,6 +358,13 @@ pub enum AttrTokenTree {
174358 AttrsTarget ( AttrsTarget ) ,
175359}
176360
361+ /// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
362+ /// information about the tokens for attribute targets. This is used
363+ /// during expansion to perform early cfg-expansion, and to process attributes
364+ /// during proc-macro invocations.
365+ #[ derive( Clone , Debug , Default , Encodable , Decodable ) ]
366+ pub struct AttrTokenStream ( pub Lrc < Vec < AttrTokenTree > > ) ;
367+
177368impl AttrTokenStream {
178369 pub fn new ( tokens : Vec < AttrTokenTree > ) -> AttrTokenStream {
179370 AttrTokenStream ( Lrc :: new ( tokens) )
@@ -720,6 +911,75 @@ impl TokenTreeCursor {
720911 }
721912}
722913
914+ /// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
915+ /// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
916+ /// use this type to emit them as a linear sequence. But a linear sequence is
917+ /// what the parser expects, for the most part.
918+ #[ derive( Clone , Debug ) ]
919+ pub struct TokenCursor {
920+ // Cursor for the current (innermost) token stream. The delimiters for this
921+ // token stream are found in `self.stack.last()`; when that is `None` then
922+ // we are in the outermost token stream which never has delimiters.
923+ pub tree_cursor : TokenTreeCursor ,
924+
925+ // Token streams surrounding the current one. The delimiters for stack[n]'s
926+ // tokens are in `stack[n-1]`. `stack[0]` (when present) has no delimiters
927+ // because it's the outermost token stream which never has delimiters.
928+ pub stack : Vec < ( TokenTreeCursor , DelimSpan , DelimSpacing , Delimiter ) > ,
929+ }
930+
931+ impl TokenCursor {
932+ pub fn next ( & mut self ) -> ( Token , Spacing ) {
933+ self . inlined_next ( )
934+ }
935+
936+ /// This always-inlined version should only be used on hot code paths.
937+ #[ inline( always) ]
938+ pub fn inlined_next ( & mut self ) -> ( Token , Spacing ) {
939+ loop {
940+ // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
941+ // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
942+ // below can be removed.
943+ if let Some ( tree) = self . tree_cursor . next_ref ( ) {
944+ match tree {
945+ & TokenTree :: Token ( ref token, spacing) => {
946+ debug_assert ! ( !matches!(
947+ token. kind,
948+ token:: OpenDelim ( _) | token:: CloseDelim ( _)
949+ ) ) ;
950+ return ( token. clone ( ) , spacing) ;
951+ }
952+ & TokenTree :: Delimited ( sp, spacing, delim, ref tts) => {
953+ let trees = tts. clone ( ) . into_trees ( ) ;
954+ self . stack . push ( (
955+ mem:: replace ( & mut self . tree_cursor , trees) ,
956+ sp,
957+ spacing,
958+ delim,
959+ ) ) ;
960+ if delim != Delimiter :: Invisible {
961+ return ( Token :: new ( token:: OpenDelim ( delim) , sp. open ) , spacing. open ) ;
962+ }
963+ // No open delimiter to return; continue on to the next iteration.
964+ }
965+ } ;
966+ } else if let Some ( ( tree_cursor, span, spacing, delim) ) = self . stack . pop ( ) {
967+ // We have exhausted this token stream. Move back to its parent token stream.
968+ self . tree_cursor = tree_cursor;
969+ if delim != Delimiter :: Invisible {
970+ return ( Token :: new ( token:: CloseDelim ( delim) , span. close ) , spacing. close ) ;
971+ }
972+ // No close delimiter to return; continue on to the next iteration.
973+ } else {
974+ // We have exhausted the outermost token stream. The use of
975+ // `Spacing::Alone` is arbitrary and immaterial, because the
976+ // `Eof` token's spacing is never used.
977+ return ( Token :: new ( token:: Eof , DUMMY_SP ) , Spacing :: Alone ) ;
978+ }
979+ }
980+ }
981+ }
982+
723983#[ derive( Debug , Copy , Clone , PartialEq , Encodable , Decodable , HashStable_Generic ) ]
724984pub struct DelimSpan {
725985 pub open : Span ,
@@ -765,6 +1025,7 @@ mod size_asserts {
7651025 static_assert_size ! ( AttrTokenStream , 8 ) ;
7661026 static_assert_size ! ( AttrTokenTree , 32 ) ;
7671027 static_assert_size ! ( LazyAttrTokenStream , 8 ) ;
1028+ static_assert_size ! ( LazyAttrTokenStreamImpl , 96 ) ;
7681029 static_assert_size ! ( Option <LazyAttrTokenStream >, 8 ) ; // must be small, used in many AST nodes
7691030 static_assert_size ! ( TokenStream , 8 ) ;
7701031 static_assert_size ! ( TokenTree , 32 ) ;
0 commit comments