diff options
author | Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> | 2023-01-05 20:17:30 +0100 |
---|---|---|
committer | Nilstrieb <48135649+Nilstrieb@users.noreply.github.com> | 2023-03-14 13:16:52 +0000 |
commit | 729185338f1726f60ea2dc282fe6bbf0eb84cb17 (patch) | |
tree | e84828d0610ea806f017a1ba76014199c180e8f0 /compiler/rustc_parse_format | |
parent | 0058748944abb3282aba0e0a74823c6411703565 (diff) | |
download | rust-729185338f1726f60ea2dc282fe6bbf0eb84cb17.tar.gz |
Properly allow macro expanded `format_args` invocations to uses captures
Originally, this was kinda half-allowed. There were some primitive
checks in place that looked at the span to see whether the input was
likely a literal. These "source literal" checks are needed because the
spans created during `format_args` parsing only make sense when it is
indeed a literal that was written in the source code directly.
This is orthogonal to the restriction that the first argument must be a
"direct literal", not being exanpanded from macros. This restriction was
imposed by [RFC 2795] on the basis of being too confusing. But this was
only concerned with the argument of the invocation being a literal, not
whether it was a source literal (maybe in spirit it meant it being a
source literal, this is not clear to me).
Since the original check only really cared about source literals (which
is good enough to deny the `format_args!(concat!())` example), macros
expanding to `format_args` invocations were able to use implicit
captures if they spanned the string in a way that lead back to a source
string.
The "source literal" checks were not strict enough and caused ICEs in
certain cases (see # 106191 (the space is intended to avoid spammy
backreferences)). So I tightened it up in # 106195 to really only work
if it's a direct source literal.
This caused the `indoc` crate to break. `indoc` transformed the source
literal by removing whitespace, which made it not a "source literal"
anymore (which is required to fix the ICE). But since `indoc` spanned
the literal in ways that made the old check think that it's a literal,
it was able to use implicit captures (which is useful and nice for the
users of `indoc`).
This commit properly seperates the previously introduced concepts of
"source literal" and "direct literal" and therefore allows `indoc`
invocations, which don't create "source literals" to use implicit
captures again.
[RFC 2795]: https://rust-lang.github.io/rfcs/2795-format-args-implicit-identifiers.html#macro-hygiene
Diffstat (limited to 'compiler/rustc_parse_format')
-rw-r--r-- | compiler/rustc_parse_format/src/lib.rs | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 8a3cedfee79..d67239efa80 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -234,8 +234,10 @@ pub struct Parser<'a> { last_opening_brace: Option<InnerSpan>, /// Whether the source string is comes from `println!` as opposed to `format!` or `print!` append_newline: bool, - /// Whether this formatting string is a literal or it comes from a macro. - pub is_literal: bool, + /// Whether this formatting string was written directly in the source. This controls whether we + /// can use spans to refer into it and give better error messages. + /// N.B: This does _not_ control whether implicit argument captures can be used. + pub is_source_literal: bool, /// Start position of the current line. cur_line_start: usize, /// Start and end byte offset of every line of the format string. Excludes @@ -262,7 +264,7 @@ impl<'a> Iterator for Parser<'a> { } else { let arg = self.argument(lbrace_end); if let Some(rbrace_pos) = self.must_consume('}') { - if self.is_literal { + if self.is_source_literal { let lbrace_byte_pos = self.to_span_index(pos); let rbrace_byte_pos = self.to_span_index(rbrace_pos); @@ -302,7 +304,7 @@ impl<'a> Iterator for Parser<'a> { _ => Some(String(self.string(pos))), } } else { - if self.is_literal { + if self.is_source_literal { let span = self.span(self.cur_line_start, self.input.len()); if self.line_spans.last() != Some(&span) { self.line_spans.push(span); @@ -323,7 +325,7 @@ impl<'a> Parser<'a> { mode: ParseMode, ) -> Parser<'a> { let input_string_kind = find_width_map_from_snippet(snippet, style); - let (width_map, is_literal) = match input_string_kind { + let (width_map, is_source_literal) = match input_string_kind { InputStringKind::Literal { width_mappings } => (width_mappings, true), InputStringKind::NotALiteral => (Vec::new(), false), }; @@ -339,7 +341,7 @@ impl<'a> Parser<'a> { width_map, last_opening_brace: None, append_newline, - is_literal, + is_source_literal, cur_line_start: 0, line_spans: vec![], } @@ -532,13 +534,13 @@ impl<'a> Parser<'a> { '{' | '}' => { return &self.input[start..pos]; } - '\n' if self.is_literal => { + '\n' if self.is_source_literal => { self.line_spans.push(self.span(self.cur_line_start, pos)); self.cur_line_start = pos + 1; self.cur.next(); } _ => { - if self.is_literal && pos == self.cur_line_start && c.is_whitespace() { + if self.is_source_literal && pos == self.cur_line_start && c.is_whitespace() { self.cur_line_start = pos + c.len_utf8(); } self.cur.next(); |