diff options
Diffstat (limited to 'compiler/rustc_lexer/src/lib.rs')
-rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 118 |
1 files changed, 73 insertions, 45 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index b3f4b5cd5e5..d511d2b1280 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -186,12 +186,16 @@ pub enum LiteralKind { Str { terminated: bool }, /// "b"abc"", "b"abc" ByteStr { terminated: bool }, + /// `c"abc"`, `c"abc` + CStr { terminated: bool }, /// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates /// an invalid literal. RawStr { n_hashes: Option<u8> }, /// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None` /// indicates an invalid literal. RawByteStr { n_hashes: Option<u8> }, + /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal. + RawCStr { n_hashes: Option<u8> }, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -357,39 +361,18 @@ impl Cursor<'_> { }, // Byte literal, byte string literal, raw byte string literal or identifier. - 'b' => match (self.first(), self.second()) { - ('\'', _) => { - self.bump(); - let terminated = self.single_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = Byte { terminated }; - Literal { kind, suffix_start } - } - ('"', _) => { - self.bump(); - let terminated = self.double_quoted_string(); - let suffix_start = self.pos_within_token(); - if terminated { - self.eat_literal_suffix(); - } - let kind = ByteStr { terminated }; - Literal { kind, suffix_start } - } - ('r', '"') | ('r', '#') => { - self.bump(); - let res = self.raw_double_quoted_string(2); - let suffix_start = self.pos_within_token(); - if res.is_ok() { - self.eat_literal_suffix(); - } - let kind = RawByteStr { n_hashes: res.ok() }; - Literal { kind, suffix_start } - } - _ => self.ident_or_unknown_prefix(), - }, + 'b' => self.c_or_byte_string( + |terminated| ByteStr { terminated }, + |n_hashes| RawByteStr { n_hashes }, + Some(|terminated| Byte { terminated }), + ), + + // c-string literal, raw c-string literal or identifier. + 'c' => self.c_or_byte_string( + |terminated| CStr { terminated }, + |n_hashes| RawCStr { n_hashes }, + None, + ), // Identifier (this should be checked after other variant that can // start as identifier). @@ -553,39 +536,84 @@ impl Cursor<'_> { } } + fn c_or_byte_string( + &mut self, + mk_kind: impl FnOnce(bool) -> LiteralKind, + mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind, + single_quoted: Option<fn(bool) -> LiteralKind>, + ) -> TokenKind { + match (self.first(), self.second(), single_quoted) { + ('\'', _, Some(mk_kind)) => { + self.bump(); + let terminated = self.single_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = mk_kind(terminated); + Literal { kind, suffix_start } + } + ('"', _, _) => { + self.bump(); + let terminated = self.double_quoted_string(); + let suffix_start = self.pos_within_token(); + if terminated { + self.eat_literal_suffix(); + } + let kind = mk_kind(terminated); + Literal { kind, suffix_start } + } + ('r', '"', _) | ('r', '#', _) => { + self.bump(); + let res = self.raw_double_quoted_string(2); + let suffix_start = self.pos_within_token(); + if res.is_ok() { + self.eat_literal_suffix(); + } + let kind = mk_kind_raw(res.ok()); + Literal { kind, suffix_start } + } + _ => self.ident_or_unknown_prefix(), + } + } + fn number(&mut self, first_digit: char) -> LiteralKind { debug_assert!('0' <= self.prev() && self.prev() <= '9'); let mut base = Base::Decimal; if first_digit == '0' { // Attempt to parse encoding base. - let has_digits = match self.first() { + match self.first() { 'b' => { base = Base::Binary; self.bump(); - self.eat_decimal_digits() + if !self.eat_decimal_digits() { + return Int { base, empty_int: true }; + } } 'o' => { base = Base::Octal; self.bump(); - self.eat_decimal_digits() + if !self.eat_decimal_digits() { + return Int { base, empty_int: true }; + } } 'x' => { base = Base::Hexadecimal; self.bump(); - self.eat_hexadecimal_digits() + if !self.eat_hexadecimal_digits() { + return Int { base, empty_int: true }; + } } - // Not a base prefix. - '0'..='9' | '_' | '.' | 'e' | 'E' => { + // Not a base prefix; consume additional digits. + '0'..='9' | '_' => { self.eat_decimal_digits(); - true } + + // Also not a base prefix; nothing more to do here. + '.' | 'e' | 'E' => {} + // Just a 0. _ => return Int { base, empty_int: false }, - }; - // Base prefix was provided, but there were no digits - // after it, e.g. "0x". - if !has_digits { - return Int { base, empty_int: true }; } } else { // No base prefix, parse number in the usual way. |