1 files changed, 60 insertions, 26 deletions
diff --git a/gdb/rust-parse.c b/gdb/rust-parse.c
index 6628a86f575..7d7d882872c 100644
--- a/gdb/rust-parse.c
+++ b/gdb/rust-parse.c
@@ -22,8 +22,8 @@
 #include "block.h"
 #include "charset.h"
 #include "cp-support.h"
-#include "gdb_obstack.h"
-#include "gdb_regex.h"
+#include "gdbsupport/gdb_obstack.h"
+#include "gdbsupport/gdb_regex.h"
 #include "rust-lang.h"
 #include "parser-defs.h"
 #include "gdbsupport/selftest.h"
@@ -271,7 +271,10 @@ struct rust_parser
   operation_up parse_entry_point ()
   {
     lex ();
-    return parse_expr ();
+    operation_up result = parse_expr ();
+    if (current_token != 0)
+      error (_("Syntax error near '%s'"), pstate->prev_lexptr);
+    return result;
   }
 
   operation_up parse_tuple ();
@@ -452,7 +455,7 @@ rust_parser::rust_lookup_type (const char *name)
   if (result.symbol != NULL)
     {
       update_innermost_block (result);
-      return SYMBOL_TYPE (result.symbol);
+      return result.symbol->type ();
     }
 
   type = lookup_typename (language (), name, NULL, 1);
@@ -577,6 +580,36 @@ rust_parser::lex_escape (int is_byte)
   return result;
 }
 
+/* A helper for lex_character.  Search forward for the closing single
+   quote, then convert the bytes from the host charset to UTF-32.  */
+
+static uint32_t
+lex_multibyte_char (const char *text, int *len)
+{
+  /* Only look a maximum of 5 bytes for the closing quote.  This is
+     the maximum for UTF-8.  */
+  int quote;
+  gdb_assert (text[0] != '\'');
+  for (quote = 1; text[quote] != '\0' && text[quote] != '\''; ++quote)
+    ;
+  *len = quote;
+  /* The caller will issue an error.  */
+  if (text[quote] == '\0')
+    return 0;
+
+  auto_obstack result;
+  convert_between_encodings (host_charset (), HOST_UTF32,
+			     (const gdb_byte *) text,
+			     quote, 1, &result, translit_none);
+
+  int size = obstack_object_size (&result);
+  if (size > 4)
+    error (_("overlong character literal"));
+  uint32_t value;
+  memcpy (&value, obstack_finish (&result), size);
+  return value;
+}
+
 /* Lex a character constant.  */
 
 int
@@ -592,13 +625,15 @@ rust_parser::lex_character ()
     }
   gdb_assert (pstate->lexptr[0] == '\'');
   ++pstate->lexptr;
-  /* This should handle UTF-8 here.  */
-  if (pstate->lexptr[0] == '\\')
+  if (pstate->lexptr[0] == '\'')
+    error (_("empty character literal"));
+  else if (pstate->lexptr[0] == '\\')
     value = lex_escape (is_byte);
   else
     {
-      value = pstate->lexptr[0] & 0xff;
-      ++pstate->lexptr;
+      int len;
+      value = lex_multibyte_char (&pstate->lexptr[0], &len);
+      pstate->lexptr += len;
     }
 
   if (pstate->lexptr[0] != '\'')
@@ -695,16 +730,10 @@ rust_parser::lex_string ()
 	  if (is_byte)
 	    obstack_1grow (&obstack, value);
 	  else
-	    {
-#if WORDS_BIGENDIAN
-#define UTF32 "UTF-32BE"
-#else
-#define UTF32 "UTF-32LE"
-#endif
-	      convert_between_encodings (UTF32, "UTF-8", (gdb_byte *) &value,
-					 sizeof (value), sizeof (value),
-					 &obstack, translit_none);
-	    }
+	    convert_between_encodings (HOST_UTF32, "UTF-8",
+				       (gdb_byte *) &value,
+				       sizeof (value), sizeof (value),
+				       &obstack, translit_none);
 	}
       else if (pstate->lexptr[0] == '\0')
 	error (_("Unexpected EOF in string"));
@@ -746,7 +775,10 @@ rust_identifier_start_p (char c)
   return ((c >= 'a' && c <= 'z')
 	  || (c >= 'A' && c <= 'Z')
 	  || c == '_'
-	  || c == '$');
+	  || c == '$'
+	  /* Allow any non-ASCII character as an identifier.  There
+	     doesn't seem to be a need to be picky about this.  */
+	  || (c & 0x80) != 0);
 }
 
 /* Lex an identifier.  */
@@ -772,13 +804,14 @@ rust_parser::lex_identifier ()
 
   ++pstate->lexptr;
 
-  /* For the time being this doesn't handle Unicode rules.  Non-ASCII
-     identifiers are gated anyway.  */
+  /* Allow any non-ASCII character here.  This "handles" UTF-8 by
+     passing it through.  */
   while ((pstate->lexptr[0] >= 'a' && pstate->lexptr[0] <= 'z')
 	 || (pstate->lexptr[0] >= 'A' && pstate->lexptr[0] <= 'Z')
 	 || pstate->lexptr[0] == '_'
 	 || (is_gdb_var && pstate->lexptr[0] == '$')
-	 || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9'))
+	 || (pstate->lexptr[0] >= '0' && pstate->lexptr[0] <= '9')
+	 || (pstate->lexptr[0] & 0x80) != 0)
     ++pstate->lexptr;
 
 
@@ -1105,7 +1138,7 @@ rust_parser::parse_tuple ()
     {
       /* Parenthesized expression.  */
       lex ();
-      return expr;
+      return make_operation<rust_parenthesized_operation> (std::move (expr));
     }
 
   std::vector<operation_up> ops;
@@ -1176,15 +1209,15 @@ rust_parser::name_to_operation (const std::string &name)
   struct block_symbol sym = lookup_symbol (name.c_str (),
 					   pstate->expression_context_block,
 					   VAR_DOMAIN);
-  if (sym.symbol != nullptr && SYMBOL_CLASS (sym.symbol) != LOC_TYPEDEF)
+  if (sym.symbol != nullptr && sym.symbol->aclass () != LOC_TYPEDEF)
     return make_operation<var_value_operation> (sym);
 
   struct type *type = nullptr;
 
   if (sym.symbol != nullptr)
     {
-      gdb_assert (SYMBOL_CLASS (sym.symbol) == LOC_TYPEDEF);
-      type = SYMBOL_TYPE (sym.symbol);
+      gdb_assert (sym.symbol->aclass () == LOC_TYPEDEF);
+      type = sym.symbol->type ();
     }
   if (type == nullptr)
     type = rust_lookup_type (name.c_str ());
@@ -1990,6 +2023,7 @@ rust_parser::parse_atom (bool required)
 
     case STRING:
       result = parse_string ();
+      lex ();
       break;
 
     case BYTESTRING: