summaryrefslogtreecommitdiff
path: root/compiler/rustc_lexer/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/rustc_lexer/src/lib.rs')
-rw-r--r--compiler/rustc_lexer/src/lib.rs118
1 files changed, 73 insertions, 45 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index b3f4b5cd5e5..d511d2b1280 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -186,12 +186,16 @@ pub enum LiteralKind {
Str { terminated: bool },
/// "b"abc"", "b"abc"
ByteStr { terminated: bool },
+ /// `c"abc"`, `c"abc`
+ CStr { terminated: bool },
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a". `None` indicates
/// an invalid literal.
RawStr { n_hashes: Option<u8> },
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a". `None`
/// indicates an invalid literal.
RawByteStr { n_hashes: Option<u8> },
+ /// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
+ RawCStr { n_hashes: Option<u8> },
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -357,39 +361,18 @@ impl Cursor<'_> {
},
// Byte literal, byte string literal, raw byte string literal or identifier.
- 'b' => match (self.first(), self.second()) {
- ('\'', _) => {
- self.bump();
- let terminated = self.single_quoted_string();
- let suffix_start = self.pos_within_token();
- if terminated {
- self.eat_literal_suffix();
- }
- let kind = Byte { terminated };
- Literal { kind, suffix_start }
- }
- ('"', _) => {
- self.bump();
- let terminated = self.double_quoted_string();
- let suffix_start = self.pos_within_token();
- if terminated {
- self.eat_literal_suffix();
- }
- let kind = ByteStr { terminated };
- Literal { kind, suffix_start }
- }
- ('r', '"') | ('r', '#') => {
- self.bump();
- let res = self.raw_double_quoted_string(2);
- let suffix_start = self.pos_within_token();
- if res.is_ok() {
- self.eat_literal_suffix();
- }
- let kind = RawByteStr { n_hashes: res.ok() };
- Literal { kind, suffix_start }
- }
- _ => self.ident_or_unknown_prefix(),
- },
+ 'b' => self.c_or_byte_string(
+ |terminated| ByteStr { terminated },
+ |n_hashes| RawByteStr { n_hashes },
+ Some(|terminated| Byte { terminated }),
+ ),
+
+ // c-string literal, raw c-string literal or identifier.
+ 'c' => self.c_or_byte_string(
+ |terminated| CStr { terminated },
+ |n_hashes| RawCStr { n_hashes },
+ None,
+ ),
// Identifier (this should be checked after other variant that can
// start as identifier).
@@ -553,39 +536,84 @@ impl Cursor<'_> {
}
}
+ fn c_or_byte_string(
+ &mut self,
+ mk_kind: impl FnOnce(bool) -> LiteralKind,
+ mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind,
+ single_quoted: Option<fn(bool) -> LiteralKind>,
+ ) -> TokenKind {
+ match (self.first(), self.second(), single_quoted) {
+ ('\'', _, Some(mk_kind)) => {
+ self.bump();
+ let terminated = self.single_quoted_string();
+ let suffix_start = self.pos_within_token();
+ if terminated {
+ self.eat_literal_suffix();
+ }
+ let kind = mk_kind(terminated);
+ Literal { kind, suffix_start }
+ }
+ ('"', _, _) => {
+ self.bump();
+ let terminated = self.double_quoted_string();
+ let suffix_start = self.pos_within_token();
+ if terminated {
+ self.eat_literal_suffix();
+ }
+ let kind = mk_kind(terminated);
+ Literal { kind, suffix_start }
+ }
+ ('r', '"', _) | ('r', '#', _) => {
+ self.bump();
+ let res = self.raw_double_quoted_string(2);
+ let suffix_start = self.pos_within_token();
+ if res.is_ok() {
+ self.eat_literal_suffix();
+ }
+ let kind = mk_kind_raw(res.ok());
+ Literal { kind, suffix_start }
+ }
+ _ => self.ident_or_unknown_prefix(),
+ }
+ }
+
fn number(&mut self, first_digit: char) -> LiteralKind {
debug_assert!('0' <= self.prev() && self.prev() <= '9');
let mut base = Base::Decimal;
if first_digit == '0' {
// Attempt to parse encoding base.
- let has_digits = match self.first() {
+ match self.first() {
'b' => {
base = Base::Binary;
self.bump();
- self.eat_decimal_digits()
+ if !self.eat_decimal_digits() {
+ return Int { base, empty_int: true };
+ }
}
'o' => {
base = Base::Octal;
self.bump();
- self.eat_decimal_digits()
+ if !self.eat_decimal_digits() {
+ return Int { base, empty_int: true };
+ }
}
'x' => {
base = Base::Hexadecimal;
self.bump();
- self.eat_hexadecimal_digits()
+ if !self.eat_hexadecimal_digits() {
+ return Int { base, empty_int: true };
+ }
}
- // Not a base prefix.
- '0'..='9' | '_' | '.' | 'e' | 'E' => {
+ // Not a base prefix; consume additional digits.
+ '0'..='9' | '_' => {
self.eat_decimal_digits();
- true
}
+
+ // Also not a base prefix; nothing more to do here.
+ '.' | 'e' | 'E' => {}
+
// Just a 0.
_ => return Int { base, empty_int: false },
- };
- // Base prefix was provided, but there were no digits
- // after it, e.g. "0x".
- if !has_digits {
- return Int { base, empty_int: true };
}
} else {
// No base prefix, parse number in the usual way.