summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMazdak Farrokhzad <twingoow@gmail.com>2019-10-09 02:34:22 +0200
committerMazdak Farrokhzad <twingoow@gmail.com>2019-10-13 20:13:18 +0200
commit42f32f06d63b9947db63f0c739483e0294f588b2 (patch)
tree01fce347eecc2b7d0e4f5e0cea380866b277f638
parentaa2ae564d391a3da10bca2a79ab529a9925fbe58 (diff)
downloadrust-42f32f06d63b9947db63f0c739483e0294f588b2.tar.gz
token: extract Nonterminal::to_tokenstream to parser.
-rw-r--r--src/librustc/hir/lowering.rs6
-rw-r--r--src/libsyntax/ext/proc_macro_server.rs2
-rw-r--r--src/libsyntax/parse/mod.rs138
-rw-r--r--src/libsyntax/parse/token.rs137
4 files changed, 143 insertions, 140 deletions
diff --git a/src/librustc/hir/lowering.rs b/src/librustc/hir/lowering.rs
index 72fd054ee8a..513512b9f6b 100644
--- a/src/librustc/hir/lowering.rs
+++ b/src/librustc/hir/lowering.rs
@@ -70,6 +70,7 @@ use syntax::print::pprust;
use syntax::source_map::{respan, ExpnData, ExpnKind, DesugaringKind, Spanned};
use syntax::symbol::{kw, sym, Symbol};
use syntax::tokenstream::{TokenStream, TokenTree};
+use syntax::parse;
use syntax::parse::token::{self, Token};
use syntax::visit::{self, Visitor};
use syntax_pos::Span;
@@ -1022,7 +1023,10 @@ impl<'a> LoweringContext<'a> {
fn lower_token(&mut self, token: Token) -> TokenStream {
match token.kind {
token::Interpolated(nt) => {
- let tts = nt.to_tokenstream(&self.sess.parse_sess, token.span);
+ // FIXME(Centril): Consider indirection `(parse_sess.nt_to_tokenstream)(...)`
+ // to hack around the current hack that requires `nt_to_tokenstream` to live
+ // in the parser.
+ let tts = parse::nt_to_tokenstream(&nt, &self.sess.parse_sess, token.span);
self.lower_token_stream(tts)
}
_ => TokenTree::Token(token).into(),
diff --git a/src/libsyntax/ext/proc_macro_server.rs b/src/libsyntax/ext/proc_macro_server.rs
index 021ec46d987..0647c2fa313 100644
--- a/src/libsyntax/ext/proc_macro_server.rs
+++ b/src/libsyntax/ext/proc_macro_server.rs
@@ -174,7 +174,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
}
Interpolated(nt) => {
- let stream = nt.to_tokenstream(sess, span);
+ let stream = parse::nt_to_tokenstream(&nt, sess, span);
TokenTree::Group(Group {
delimiter: Delimiter::None,
stream,
diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs
index 1518da23b09..0df695d37b9 100644
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -4,10 +4,9 @@ use crate::ast::{self, CrateConfig, NodeId};
use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId};
use crate::source_map::{SourceMap, FilePathMapping};
use crate::feature_gate::UnstableFeatures;
-use crate::parse::parser::Parser;
-use crate::parse::parser::emit_unclosed_delims;
-use crate::parse::token::TokenKind;
-use crate::tokenstream::{TokenStream, TokenTree};
+use crate::parse::parser::{Parser, emit_unclosed_delims};
+use crate::parse::token::{Nonterminal, TokenKind};
+use crate::tokenstream::{self, TokenStream, TokenTree};
use crate::print::pprust;
use crate::symbol::Symbol;
@@ -24,6 +23,8 @@ use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::str;
+use log::info;
+
#[cfg(test)]
mod tests;
@@ -407,3 +408,132 @@ impl SeqSep {
}
}
}
+
+// NOTE(Centril): The following probably shouldn't be here but it acknowledges the
+// fact that architecturally, we are using parsing (read on below to understand why).
+
+pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream {
+ // A `Nonterminal` is often a parsed AST item. At this point we now
+ // need to convert the parsed AST to an actual token stream, e.g.
+ // un-parse it basically.
+ //
+ // Unfortunately there's not really a great way to do that in a
+ // guaranteed lossless fashion right now. The fallback here is to just
+ // stringify the AST node and reparse it, but this loses all span
+ // information.
+ //
+ // As a result, some AST nodes are annotated with the token stream they
+ // came from. Here we attempt to extract these lossless token streams
+ // before we fall back to the stringification.
+ let tokens = match *nt {
+ Nonterminal::NtItem(ref item) => {
+ prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
+ }
+ Nonterminal::NtTraitItem(ref item) => {
+ prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
+ }
+ Nonterminal::NtImplItem(ref item) => {
+ prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
+ }
+ Nonterminal::NtIdent(ident, is_raw) => {
+ Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
+ }
+ Nonterminal::NtLifetime(ident) => {
+ Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
+ }
+ Nonterminal::NtTT(ref tt) => {
+ Some(tt.clone().into())
+ }
+ _ => None,
+ };
+
+ // FIXME(#43081): Avoid this pretty-print + reparse hack
+ let source = pprust::nonterminal_to_string(nt);
+ let filename = FileName::macro_expansion_source_code(&source);
+ let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
+
+ // During early phases of the compiler the AST could get modified
+ // directly (e.g., attributes added or removed) and the internal cache
+ // of tokens my not be invalidated or updated. Consequently if the
+ // "lossless" token stream disagrees with our actual stringification
+ // (which has historically been much more battle-tested) then we go
+ // with the lossy stream anyway (losing span information).
+ //
+ // Note that the comparison isn't `==` here to avoid comparing spans,
+ // but it *also* is a "probable" equality which is a pretty weird
+ // definition. We mostly want to catch actual changes to the AST
+ // like a `#[cfg]` being processed or some weird `macro_rules!`
+ // expansion.
+ //
+ // What we *don't* want to catch is the fact that a user-defined
+ // literal like `0xf` is stringified as `15`, causing the cached token
+ // stream to not be literal `==` token-wise (ignoring spans) to the
+ // token stream we got from stringification.
+ //
+ // Instead the "probably equal" check here is "does each token
+ // recursively have the same discriminant?" We basically don't look at
+ // the token values here and assume that such fine grained token stream
+ // modifications, including adding/removing typically non-semantic
+ // tokens such as extra braces and commas, don't happen.
+ if let Some(tokens) = tokens {
+ if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
+ return tokens
+ }
+ info!("cached tokens found, but they're not \"probably equal\", \
+ going with stringified version");
+ }
+ return tokens_for_real
+}
+
+fn prepend_attrs(
+ sess: &ParseSess,
+ attrs: &[ast::Attribute],
+ tokens: Option<&tokenstream::TokenStream>,
+ span: syntax_pos::Span
+) -> Option<tokenstream::TokenStream> {
+ let tokens = tokens?;
+ if attrs.len() == 0 {
+ return Some(tokens.clone())
+ }
+ let mut builder = tokenstream::TokenStreamBuilder::new();
+ for attr in attrs {
+ assert_eq!(attr.style, ast::AttrStyle::Outer,
+ "inner attributes should prevent cached tokens from existing");
+
+ let source = pprust::attribute_to_string(attr);
+ let macro_filename = FileName::macro_expansion_source_code(&source);
+ if attr.is_sugared_doc {
+ let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
+ builder.push(stream);
+ continue
+ }
+
+ // synthesize # [ $path $tokens ] manually here
+ let mut brackets = tokenstream::TokenStreamBuilder::new();
+
+ // For simple paths, push the identifier directly
+ if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
+ let ident = attr.path.segments[0].ident;
+ let token = token::Ident(ident.name, ident.as_str().starts_with("r#"));
+ brackets.push(tokenstream::TokenTree::token(token, ident.span));
+
+ // ... and for more complicated paths, fall back to a reparse hack that
+ // should eventually be removed.
+ } else {
+ let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
+ brackets.push(stream);
+ }
+
+ brackets.push(attr.tokens.clone());
+
+ // The span we list here for `#` and for `[ ... ]` are both wrong in
+ // that it encompasses more than each token, but it hopefully is "good
+ // enough" for now at least.
+ builder.push(tokenstream::TokenTree::token(token::Pound, attr.span));
+ let delim_span = tokenstream::DelimSpan::from_single(attr.span);
+ builder.push(tokenstream::TokenTree::Delimited(
+ delim_span, token::DelimToken::Bracket, brackets.build().into()));
+ }
+ builder.push(tokens.clone());
+ Some(builder.build())
+}
diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs
index fd78a2bd534..eb74ab2b919 100644
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -4,16 +4,13 @@ pub use DelimToken::*;
pub use LitKind::*;
pub use TokenKind::*;
-use crate::ast::{self};
-use crate::parse::{parse_stream_from_source_str, ParseSess};
-use crate::print::pprust;
+use crate::ast;
use crate::ptr::P;
use crate::symbol::kw;
-use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
+use crate::tokenstream::TokenTree;
use syntax_pos::symbol::Symbol;
-use syntax_pos::{self, Span, FileName, DUMMY_SP};
-use log::info;
+use syntax_pos::{self, Span, DUMMY_SP};
use std::fmt;
use std::mem;
@@ -737,131 +734,3 @@ impl fmt::Debug for Nonterminal {
}
}
}
-
-impl Nonterminal {
- pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
- // A `Nonterminal` is often a parsed AST item. At this point we now
- // need to convert the parsed AST to an actual token stream, e.g.
- // un-parse it basically.
- //
- // Unfortunately there's not really a great way to do that in a
- // guaranteed lossless fashion right now. The fallback here is to just
- // stringify the AST node and reparse it, but this loses all span
- // information.
- //
- // As a result, some AST nodes are annotated with the token stream they
- // came from. Here we attempt to extract these lossless token streams
- // before we fall back to the stringification.
- let tokens = match *self {
- Nonterminal::NtItem(ref item) => {
- prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
- }
- Nonterminal::NtTraitItem(ref item) => {
- prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
- }
- Nonterminal::NtImplItem(ref item) => {
- prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
- }
- Nonterminal::NtIdent(ident, is_raw) => {
- Some(TokenTree::token(Ident(ident.name, is_raw), ident.span).into())
- }
- Nonterminal::NtLifetime(ident) => {
- Some(TokenTree::token(Lifetime(ident.name), ident.span).into())
- }
- Nonterminal::NtTT(ref tt) => {
- Some(tt.clone().into())
- }
- _ => None,
- };
-
- // FIXME(#43081): Avoid this pretty-print + reparse hack
- let source = pprust::nonterminal_to_string(self);
- let filename = FileName::macro_expansion_source_code(&source);
- let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));
-
- // During early phases of the compiler the AST could get modified
- // directly (e.g., attributes added or removed) and the internal cache
- // of tokens my not be invalidated or updated. Consequently if the
- // "lossless" token stream disagrees with our actual stringification
- // (which has historically been much more battle-tested) then we go
- // with the lossy stream anyway (losing span information).
- //
- // Note that the comparison isn't `==` here to avoid comparing spans,
- // but it *also* is a "probable" equality which is a pretty weird
- // definition. We mostly want to catch actual changes to the AST
- // like a `#[cfg]` being processed or some weird `macro_rules!`
- // expansion.
- //
- // What we *don't* want to catch is the fact that a user-defined
- // literal like `0xf` is stringified as `15`, causing the cached token
- // stream to not be literal `==` token-wise (ignoring spans) to the
- // token stream we got from stringification.
- //
- // Instead the "probably equal" check here is "does each token
- // recursively have the same discriminant?" We basically don't look at
- // the token values here and assume that such fine grained token stream
- // modifications, including adding/removing typically non-semantic
- // tokens such as extra braces and commas, don't happen.
- if let Some(tokens) = tokens {
- if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
- return tokens
- }
- info!("cached tokens found, but they're not \"probably equal\", \
- going with stringified version");
- }
- return tokens_for_real
- }
-}
-
-fn prepend_attrs(sess: &ParseSess,
- attrs: &[ast::Attribute],
- tokens: Option<&tokenstream::TokenStream>,
- span: syntax_pos::Span)
- -> Option<tokenstream::TokenStream>
-{
- let tokens = tokens?;
- if attrs.len() == 0 {
- return Some(tokens.clone())
- }
- let mut builder = tokenstream::TokenStreamBuilder::new();
- for attr in attrs {
- assert_eq!(attr.style, ast::AttrStyle::Outer,
- "inner attributes should prevent cached tokens from existing");
-
- let source = pprust::attribute_to_string(attr);
- let macro_filename = FileName::macro_expansion_source_code(&source);
- if attr.is_sugared_doc {
- let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
- builder.push(stream);
- continue
- }
-
- // synthesize # [ $path $tokens ] manually here
- let mut brackets = tokenstream::TokenStreamBuilder::new();
-
- // For simple paths, push the identifier directly
- if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
- let ident = attr.path.segments[0].ident;
- let token = Ident(ident.name, ident.as_str().starts_with("r#"));
- brackets.push(tokenstream::TokenTree::token(token, ident.span));
-
- // ... and for more complicated paths, fall back to a reparse hack that
- // should eventually be removed.
- } else {
- let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
- brackets.push(stream);
- }
-
- brackets.push(attr.tokens.clone());
-
- // The span we list here for `#` and for `[ ... ]` are both wrong in
- // that it encompasses more than each token, but it hopefully is "good
- // enough" for now at least.
- builder.push(tokenstream::TokenTree::token(Pound, attr.span));
- let delim_span = DelimSpan::from_single(attr.span);
- builder.push(tokenstream::TokenTree::Delimited(
- delim_span, DelimToken::Bracket, brackets.build().into()));
- }
- builder.push(tokens.clone());
- Some(builder.build())
-}