diff options
Diffstat (limited to 'gdb/rust-parse.c')
-rw-r--r-- | gdb/rust-parse.c | 86 |
1 files changed, 60 insertions, 26 deletions
diff --git a/gdb/rust-parse.c b/gdb/rust-parse.c index 6628a86f575..7d7d882872c 100644 --- a/gdb/rust-parse.c +++ b/gdb/rust-parse.c @@ -22,8 +22,8 @@ #include "block.h" #include "charset.h" #include "cp-support.h" -#include "gdb_obstack.h" -#include "gdb_regex.h" +#include "gdbsupport/gdb_obstack.h" +#include "gdbsupport/gdb_regex.h" #include "rust-lang.h" #include "parser-defs.h" #include "gdbsupport/selftest.h" @@ -271,7 +271,10 @@ struct rust_parser operation_up parse_entry_point () { lex (); - return parse_expr (); + operation_up result = parse_expr (); + if (current_token != 0) + error (_("Syntax error near '%s'"), pstate->prev_lexptr); + return result; } operation_up parse_tuple (); @@ -452,7 +455,7 @@ rust_parser::rust_lookup_type (const char *name) if (result.symbol != NULL) { update_innermost_block (result); - return SYMBOL_TYPE (result.symbol); + return result.symbol->type (); } type = lookup_typename (language (), name, NULL, 1); @@ -577,6 +580,36 @@ rust_parser::lex_escape (int is_byte) return result; } +/* A helper for lex_character. Search forward for the closing single + quote, then convert the bytes from the host charset to UTF-32. */ + +static uint32_t +lex_multibyte_char (const char *text, int *len) +{ + /* Only look a maximum of 5 bytes for the closing quote. This is + the maximum for UTF-8. */ + int quote; + gdb_assert (text[0] != '\''); + for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote) + ; + *len = quote; + /* The caller will issue an error. */ + if (text[quote] == '\0') + return 0; + + auto_obstack result; + convert_between_encodings (host_charset (), HOST_UTF32, + (const gdb_byte *) text, + quote, 1, &result, translit_none); + + int size = obstack_object_size (&result); + if (size > 4) + error (_("overlong character literal")); + uint32_t value; + memcpy (&value, obstack_finish (&result), size); + return value; +} + /* Lex a character constant. */ int @@ -592,13 +625,15 @@ rust_parser::lex_character () } gdb_assert (pstate->lexptr[0] == '\''); ++pstate->lexptr; - /* This should handle UTF-8 here. */ - if (pstate->lexptr[0] == '\\') + if (pstate->lexptr[0] == '\'') + error (_("empty character literal")); + else if (pstate->lexptr[0] == '\\') value = lex_escape (is_byte); else { - value = pstate->lexptr[0] & 0xff; - ++pstate->lexptr; + int len; + value = lex_multibyte_char (&pstate->lexptr[0], &len); + pstate->lexptr += len; } if (pstate->lexptr[0] != '\'') @@ -695,16 +730,10 @@ rust_parser::lex_string () if (is_byte) obstack_1grow (&obstack, value); else - { -#if WORDS_BIGENDIAN -#define UTF32 "UTF-32BE" -#else -#define UTF32 "UTF-32LE" -#endif - convert_between_encodings (UTF32, "UTF-8", (gdb_byte *) &value, - sizeof (value), sizeof (value), - &obstack, translit_none); - } + convert_between_encodings (HOST_UTF32, "UTF-8", + (gdb_byte *) &value, + sizeof (value), sizeof (value), + &obstack, translit_none); } else if (pstate->lexptr[0] == '\0') error (_("Unexpected EOF in string")); @@ -746,7 +775,10 @@ rust_identifier_start_p (char c) return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' - || c == '$'); + || c == '$' + /* Allow any non-ASCII character as an identifier. There + doesn't seem to be a need to be picky about this. */ + || (c & 0x80) != 0); } /* Lex an identifier. */ @@ -772,13 +804,14 @@ rust_parser::lex_identifier () ++pstate->lexptr; - /* For the time being this doesn't handle Unicode rules. Non-ASCII - identifiers are gated anyway. */ + /* Allow any non-ASCII character here. This "handles" UTF-8 by + passing it through. */ while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z') || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z') || pstate->lexptr[0] == '_' || (is_gdb_var && pstate->lexptr[0] == '$') - || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')) + || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9') + || (pstate->lexptr[0] & 0x80) != 0) ++pstate->lexptr; @@ -1105,7 +1138,7 @@ rust_parser::parse_tuple () { /* Parenthesized expression. */ lex (); - return expr; + return make_operation<rust_parenthesized_operation> (std::move (expr)); } std::vector<operation_up> ops; @@ -1176,15 +1209,15 @@ rust_parser::name_to_operation (const std::string &name) struct block_symbol sym = lookup_symbol (name.c_str (), pstate->expression_context_block, VAR_DOMAIN); - if (sym.symbol != nullptr && SYMBOL_CLASS (sym.symbol) != LOC_TYPEDEF) + if (sym.symbol != nullptr && sym.symbol->aclass () != LOC_TYPEDEF) return make_operation<var_value_operation> (sym); struct type *type = nullptr; if (sym.symbol != nullptr) { - gdb_assert (SYMBOL_CLASS (sym.symbol) == LOC_TYPEDEF); - type = SYMBOL_TYPE (sym.symbol); + gdb_assert (sym.symbol->aclass () == LOC_TYPEDEF); + type = sym.symbol->type (); } if (type == nullptr) type = rust_lookup_type (name.c_str ()); @@ -1990,6 +2023,7 @@ rust_parser::parse_atom (bool required) case STRING: result = parse_string (); + lex (); break; case BYTESTRING: |