11 files changed, 493 insertions, 175 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 7a30a8426f9..5946b29dc56 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,37 @@
+2009-10-19  Jakub Jelinek  <jakub@redhat.com>
+
+	* charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
+	(_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc
+	and char32_cset_desc.
+	(converter_for_type): Handle CPP_UTF8STRING.
+	(cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings.
+	* directives.c (get__Pragma_string): Handle CPP_UTF8STRING.
+	(parse_include): Reject raw strings.
+	* include/cpplib.h (CPP_UTF8STRING): New token type.
+	* internal.h (struct cpp_reader): Add utf8_cset_desc field.
+	* lex.c (lex_raw_string): New function.
+	(lex_string): Handle u8 string literals, call lex_raw_string
+	for raw string literals.
+	(_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R"
+	sequences.
+	* macro.c (stringify_arg): Handle CPP_UTF8STRING.
+
+2009-10-14  Jakub Jelinek  <jakub@redhat.com>
+
+	PR preprocessor/41543
+	* include/line-map.h (RESERVED_LOCATION_COUNT): Define.
+	* line-map.c (linemap_init): Initialize highest_location and
+	highest_line to RESERVED_LOCATION_COUNT-1 instead of 0.
+
+2009-10-09  Jason Merrill  <jason@redhat.com>
+
+	* charset.c (_cpp_valid_ucn): Update C++0x restrictions.
+
+2009-10-09  Neil Vachharajani <nvachhar@google.com>
+
+	* directives.c (DIRECTIVE_TABLE): Remove DEPRECATED from ident and
+	sccs.
+
 2009-09-23  Loren J. Rittle  <ljrittle@acm.org>
 
 	* configure.ac (AC_CHECK_HEADERS after AC_LANG(C++)): Add sys/stat.h.
diff --git a/libcpp/charset.c b/libcpp/charset.c
index b96c646f58e..837ccd77aab 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -721,6 +721,8 @@ cpp_init_iconv (cpp_reader *pfile)
 
   pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
   pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
+  pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
+  pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
   pfile->char16_cset_desc = init_iconv_desc (pfile,
 					     be ? "UTF-16BE" : "UTF-16LE",
 					     SOURCE_CHARSET);
@@ -741,6 +743,12 @@ _cpp_destroy_iconv (cpp_reader *pfile)
     {
       if (pfile->narrow_cset_desc.func == convert_using_iconv)
 	iconv_close (pfile->narrow_cset_desc.cd);
+      if (pfile->utf8_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->utf8_cset_desc.cd);
+      if (pfile->char16_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->char16_cset_desc.cd);
+      if (pfile->char32_cset_desc.func == convert_using_iconv)
+	iconv_close (pfile->char32_cset_desc.cd);
       if (pfile->wide_cset_desc.func == convert_using_iconv)
 	iconv_close (pfile->wide_cset_desc.cd);
     }
@@ -948,10 +956,16 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
    ISO/IEC 10646 is NNNNNNNN; the character designated by the
    universal character name \uNNNN is that character whose character
    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
-   for a universal character name is less than 0x20 or in the range
-   0x7F-0x9F (inclusive), or if the universal character name
-   designates a character in the basic source character set, then the
-   program is ill-formed.
+   for a universal character name corresponds to a surrogate code point
+   (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed.
+   Additionally, if the hexadecimal value for a universal-character-name
+   outside a character or string literal corresponds to a control character
+   (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a
+   character in the basic source character set, the program is ill-formed.
+
+   C99 6.4.3: A universal character name shall not specify a character
+   whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),
+   or 0060 (`), nor one in the range D800 through DFFF inclusive.
 
    *PSTR must be preceded by "\u" or "\U"; it is assumed that the
    buffer end is delimited by a non-hex digit.  Returns zero if the
@@ -1018,9 +1032,12 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
 		 (int) (str - base), base);
       result = 1;
     }
-  /* The standard permits $, @ and ` to be specified as UCNs.  We use
-     hex escapes so that this also works with EBCDIC hosts.  */
+  /* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
+     hex escapes so that this also works with EBCDIC hosts.
+     C++0x permits everything below 0xa0 within literals;
+     ucn_valid_in_identifier will complain about identifiers.  */
   else if ((result < 0xa0
+	    && !CPP_OPTION (pfile, cplusplus)
 	    && (result != 0x24 && result != 0x40 && result != 0x60))
 	   || (result & 0x80000000)
 	   || (result >= 0xD800 && result <= 0xDFFF))
@@ -1330,6 +1347,8 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
     {
     default:
 	return pfile->narrow_cset_desc;
+    case CPP_UTF8STRING:
+	return pfile->utf8_cset_desc;
     case CPP_CHAR16:
     case CPP_STRING16:
 	return pfile->char16_cset_desc;
@@ -1364,7 +1383,47 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
   for (i = 0; i < count; i++)
     {
       p = from[i].text;
-      if (*p == 'L' || *p == 'u' || *p == 'U') p++;
+      if (*p == 'u')
+	{
+	  if (*++p == '8')
+	    p++;
+	}
+      else if (*p == 'L' || *p == 'U') p++;
+      if (*p == 'R')
+	{
+	  const uchar *prefix;
+
+	  /* Skip over 'R"'.  */
+	  p += 2;
+	  prefix = p;
+	  while (*p != '[')
+	    p++;
+	  p++;
+	  limit = from[i].text + from[i].len;
+	  if (limit >= p + (p - prefix) + 1)
+	    limit -= (p - prefix) + 1;
+
+	  for (;;)
+	    {
+	      base = p;
+	      while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U')))
+		p++;
+	      if (p > base)
+		{
+		  /* We have a run of normal characters; these can be fed
+		     directly to convert_cset.  */
+		  if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
+		    goto fail;
+		}
+	      if (p == limit)
+		break;
+
+	      p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt);
+	    }
+
+	  continue;
+	}
+
       p++; /* Skip leading quote.  */
       limit = from[i].text + from[i].len - 1; /* Skip trailing quote.  */
 
diff --git a/libcpp/directives.c b/libcpp/directives.c
index c5a1895b86e..01bb599e266 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -151,11 +151,11 @@ D(error,	T_ERROR,	STDC89,    0)		   /*    475 */ \
 D(pragma,	T_PRAGMA,	STDC89,    IN_I)	   /*    195 */ \
 D(warning,	T_WARNING,	EXTENSION, 0)		   /*     22 */ \
 D(include_next,	T_INCLUDE_NEXT,	EXTENSION, INCL | EXPAND)  /*     19 */ \
-D(ident,	T_IDENT,	EXTENSION, IN_I | DEPRECATED) /*     11 */ \
+D(ident,	T_IDENT,	EXTENSION, IN_I)           /*     11 */ \
 D(import,	T_IMPORT,	EXTENSION, INCL | EXPAND)  /* 0 ObjC */	\
 D(assert,	T_ASSERT,	EXTENSION, DEPRECATED)	   /* 0 SVR4 */	\
 D(unassert,	T_UNASSERT,	EXTENSION, DEPRECATED)	   /* 0 SVR4 */	\
-D(sccs,		T_SCCS,		EXTENSION, IN_I | DEPRECATED) /* 0 SVR4? */
+D(sccs,		T_SCCS,		EXTENSION, IN_I)           /* 0 SVR4? */
 
 /* #sccs is synonymous with #ident.  */
 #define do_sccs do_ident
@@ -697,7 +697,8 @@ parse_include (cpp_reader *pfile, int *pangle_brackets,
   /* Allow macro expansion.  */
   header = get_token_no_padding (pfile);
   *location = header->src_loc;
-  if (header->type == CPP_STRING || header->type == CPP_HEADER_NAME)
+  if ((header->type == CPP_STRING && header->val.str.text[0] != 'R')
+      || header->type == CPP_HEADER_NAME)
     {
       fname = XNEWVEC (char, header->val.str.len - 1);
       memcpy (fname, header->val.str.text + 1, header->val.str.len - 2);
@@ -1537,7 +1538,8 @@ get__Pragma_string (cpp_reader *pfile)
   if (string->type == CPP_EOF)
     _cpp_backup_tokens (pfile, 1);
   if (string->type != CPP_STRING && string->type != CPP_WSTRING
-      && string->type != CPP_STRING32 && string->type != CPP_STRING16)
+      && string->type != CPP_STRING32 && string->type != CPP_STRING16
+      && string->type != CPP_UTF8STRING)
     return NULL;
 
   paren = get_token_no_padding (pfile);
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index df04668dda0..e95f01a412a 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -127,6 +127,7 @@ struct _cpp_file;
   TK(WSTRING,		LITERAL) /* L"string" */			\
   TK(STRING16,		LITERAL) /* u"string" */			\
   TK(STRING32,		LITERAL) /* U"string" */			\
+  TK(UTF8STRING,	LITERAL) /* u8"string" */			\
   TK(OBJC_STRING,	LITERAL) /* @"string" - Objective-C */		\
   TK(HEADER_NAME,	LITERAL) /* <stdio.h> in #include */		\
 									\
@@ -728,10 +729,10 @@ extern const unsigned char *cpp_macro_definition (cpp_reader *,
 extern void _cpp_backup_tokens (cpp_reader *, unsigned int);
 extern const cpp_token *cpp_peek_token (cpp_reader *, int);
 
-/* Evaluate a CPP_CHAR or CPP_WCHAR token.  */
+/* Evaluate a CPP_*CHAR* token.  */
 extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
 					  unsigned int *, int *);
-/* Evaluate a vector of CPP_STRING or CPP_WSTRING tokens.  */
+/* Evaluate a vector of CPP_*STRING* tokens.  */
 extern bool cpp_interpret_string (cpp_reader *,
 				  const cpp_string *, size_t,
 				  cpp_string *, enum cpp_ttype);
diff --git a/libcpp/include/line-map.h b/libcpp/include/line-map.h
index 31ac8e5d4c3..9e31a6ae3b9 100644
--- a/libcpp/include/line-map.h
+++ b/libcpp/include/line-map.h
@@ -143,6 +143,11 @@ extern const struct line_map *linemap_add
 extern const struct line_map *linemap_lookup
   (struct line_maps *, source_location);
 
+/* source_location values from 0 to RESERVED_LOCATION_COUNT-1 will
+   be reserved for libcpp user as special values, no token from libcpp
+   will contain any of those locations.  */
+#define RESERVED_LOCATION_COUNT	2
+
 /* Converts a map and a source_location to source line.  */
 #define SOURCE_LINE(MAP, LOC) \
   ((((LOC) - (MAP)->start_location) >> (MAP)->column_bits) + (MAP)->to_line)
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 21e51c6553c..aaa231c2ab1 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -397,6 +397,10 @@ struct cpp_reader
   struct cset_converter narrow_cset_desc;
 
   /* Descriptor for converting from the source character set to the
+     UTF-8 execution character set.  */
+  struct cset_converter utf8_cset_desc;
+
+  /* Descriptor for converting from the source character set to the
      UTF-16 execution character set.  */
   struct cset_converter char16_cset_desc;
 
diff --git a/libcpp/lex.c b/libcpp/lex.c
index bab14a4baa3..55bffa9a326 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -617,12 +617,192 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
   token->val.str.text = dest;
 }
 
+/* Lexes a raw string.  The stored string contains the spelling, including
+   double quotes, delimiter string, '[' and ']', any leading
+   'L', 'u', 'U' or 'u8' and 'R' modifier.  It returns the type of the
+   literal, or CPP_OTHER if it was not properly terminated.
+
+   The spelling is NUL-terminated, but it is not guaranteed that this
+   is the first NUL since embedded NULs are preserved.  */
+
+static void
+lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
+		const uchar *cur)
+{
+  source_location saw_NUL = 0;
+  const uchar *raw_prefix;
+  unsigned int raw_prefix_len = 0;
+  enum cpp_ttype type;
+  size_t total_len = 0;
+  _cpp_buff *first_buff = NULL, *last_buff = NULL;
+
+  type = (*base == 'L' ? CPP_WSTRING :
+	  *base == 'U' ? CPP_STRING32 :
+	  *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
+	  : CPP_STRING);
+
+  raw_prefix = cur + 1;
+  while (raw_prefix_len < 16)
+    {
+      switch (raw_prefix[raw_prefix_len])
+	{
+	case ' ': case '[': case ']': case '\t':
+	case '\v': case '\f': case '\n': default:
+	  break;
+	/* Basic source charset except the above chars.  */
+	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+	case 'y': case 'z':
+	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+	case 'Y': case 'Z':
+	case '0': case '1': case '2': case '3': case '4': case '5':
+	case '6': case '7': case '8': case '9':
+	case '_': case '{': case '}': case '#': case '(': case ')':
+	case '<': case '>': case '%': case ':': case ';': case '.':
+	case '?': case '*': case '+': case '-': case '/': case '^':
+	case '&': case '|': case '~': case '!': case '=': case ',':
+	case '\\': case '"': case '\'':
+	  raw_prefix_len++;
+	  continue;
+	}
+      break;
+    }
+
+  if (raw_prefix[raw_prefix_len] != '[')
+    {
+      int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
+		+ 1;
+      if (raw_prefix_len == 16)
+	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+			     "raw string delimiter longer than 16 characters");
+      else
+	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
+			     "invalid character '%c' in raw string delimiter",
+			     (int) raw_prefix[raw_prefix_len]);
+      pfile->buffer->cur = raw_prefix - 1;
+      create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
+      return;
+    }
+
+  cur = raw_prefix + raw_prefix_len + 1;
+  for (;;)
+    {
+      cppchar_t c = *cur++;
+
+      if (c == ']'
+	  && strncmp ((const char *) cur, (const char *) raw_prefix,
+		      raw_prefix_len) == 0
+	  && cur[raw_prefix_len] == '"')
+	{
+	  cur += raw_prefix_len + 1;
+	  break;
+	}
+      else if (c == '\n')
+	{
+	  if (pfile->state.in_directive
+	      || pfile->state.parsing_args
+	      || pfile->state.in_deferred_pragma)
+	    {
+	      cur--;
+	      type = CPP_OTHER;
+	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+				   "unterminated raw string");
+	      break;
+	    }
+
+	  /* raw strings allow embedded non-escaped newlines, which
+	     complicates this routine a lot.  */
+	  if (first_buff == NULL)
+	    {
+	      total_len = cur - base;
+	      first_buff = last_buff = _cpp_get_buff (pfile, total_len);
+	      memcpy (BUFF_FRONT (last_buff), base, total_len);
+	      raw_prefix = BUFF_FRONT (last_buff) + (raw_prefix - base);
+	      BUFF_FRONT (last_buff) += total_len;
+	    }
+	  else
+	    {
+	      size_t len = cur - base;
+	      size_t cur_len = len > BUFF_ROOM (last_buff)
+			       ? BUFF_ROOM (last_buff) : len;
+
+	      total_len += len;
+	      memcpy (BUFF_FRONT (last_buff), base, cur_len);
+	      BUFF_FRONT (last_buff) += cur_len;
+	      if (len > cur_len)
+		{
+		  last_buff = _cpp_append_extend_buff (pfile, last_buff,
+						       len - cur_len);
+		  memcpy (BUFF_FRONT (last_buff), base + cur_len,
+			  len - cur_len);
+		  BUFF_FRONT (last_buff) += len - cur_len;
+		}
+	    }
+
+	  if (pfile->buffer->cur < pfile->buffer->rlimit)
+	    CPP_INCREMENT_LINE (pfile, 0);
+	  pfile->buffer->need_line = true;
+
+	  if (!_cpp_get_fresh_line (pfile))
+	    {
+	      source_location src_loc = token->src_loc;
+	      token->type = CPP_EOF;
+	      /* Tell the compiler the line number of the EOF token.  */
+	      token->src_loc = pfile->line_table->highest_line;
+	      token->flags = BOL;
+	      if (first_buff != NULL)
+		_cpp_release_buff (pfile, first_buff);
+	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
+				   "unterminated raw string");
+	      return;
+	    }
+
+	  cur = base = pfile->buffer->cur;
+	}
+      else if (c == '\0' && !saw_NUL)
+	LINEMAP_POSITION_FOR_COLUMN (saw_NUL, pfile->line_table,
+				     CPP_BUF_COLUMN (pfile->buffer, cur));
+    }
+
+  if (saw_NUL && !pfile->state.skipping)
+    cpp_error_with_line (pfile, CPP_DL_WARNING, saw_NUL, 0,
+	       "null character(s) preserved in literal");
+
+  pfile->buffer->cur = cur;
+  if (first_buff == NULL)
+    create_literal (pfile, token, base, cur - base, type);
+  else
+    {
+      uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
+
+      token->type = type;
+      token->val.str.len = total_len + (cur - base);
+      token->val.str.text = dest;
+      last_buff = first_buff;
+      while (last_buff != NULL)
+	{
+	  memcpy (dest, last_buff->base,
+		  BUFF_FRONT (last_buff) - last_buff->base);
+	  dest += BUFF_FRONT (last_buff) - last_buff->base;
+	  last_buff = last_buff->next;
+	}
+      _cpp_release_buff (pfile, first_buff);
+      memcpy (dest, base, cur - base);
+      dest[cur - base] = '\0';
+    }
+}
+
 /* Lexes a string, character constant, or angle-bracketed header file
    name.  The stored string contains the spelling, including opening
-   quote and leading any leading 'L', 'u' or 'U'.  It returns the type
-   of the literal, or CPP_OTHER if it was not properly terminated, or
-   CPP_LESS for an unterminated header name which must be relexed as
-   normal tokens.
+   quote and any leading 'L', 'u', 'U' or 'u8' and optional
+   'R' modifier.  It returns the type of the literal, or CPP_OTHER
+   if it was not properly terminated, or CPP_LESS for an unterminated
+   header name which must be relexed as normal tokens.
 
    The spelling is NUL-terminated, but it is not guaranteed that this
    is the first NUL since embedded NULs are preserved.  */
@@ -636,12 +816,24 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 
   cur = base;
   terminator = *cur++;
-  if (terminator == 'L' || terminator == 'u' || terminator == 'U')
+  if (terminator == 'L' || terminator == 'U')
     terminator = *cur++;
-  if (terminator == '\"')
+  else if (terminator == 'u')
+    {
+      terminator = *cur++;
+      if (terminator == '8')
+	terminator = *cur++;
+    }
+  if (terminator == 'R')
+    {
+      lex_raw_string (pfile, token, base, cur);
+      return;
+    }
+  if (terminator == '"')
     type = (*base == 'L' ? CPP_WSTRING :
 	    *base == 'U' ? CPP_STRING32 :
-	    *base == 'u' ? CPP_STRING16 : CPP_STRING);
+	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
+			 : CPP_STRING);
   else if (terminator == '\'')
     type = (*base == 'L' ? CPP_WCHAR :
 	    *base == 'U' ? CPP_CHAR32 :
@@ -1101,10 +1293,21 @@ _cpp_lex_direct (cpp_reader *pfile)
     case 'L':
     case 'u':
     case 'U':
-      /* 'L', 'u' or 'U' may introduce wide characters or strings.  */
+    case 'R':
+      /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
+	 wide strings or raw strings.  */
       if (c == 'L' || CPP_OPTION (pfile, uliterals))
 	{
-	  if (*buffer->cur == '\'' || *buffer->cur == '"')
+	  if ((*buffer->cur == '\'' && c != 'R')
+	      || *buffer->cur == '"'
+	      || (*buffer->cur == 'R'
+		  && c != 'R'
+		  && buffer->cur[1] == '"'
+		  && CPP_OPTION (pfile, uliterals))
+	      || (*buffer->cur == '8'
+		  && c == 'u'
+		  && (buffer->cur[1] == '"'
+		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
 	    {
 	      lex_string (pfile, result, buffer->cur - 1);
 	      break;
@@ -1120,7 +1323,7 @@ _cpp_lex_direct (cpp_reader *pfile)
     case 'y': case 'z':
     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     case 'G': case 'H': case 'I': case 'J': case 'K':
-    case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+    case 'M': case 'N': case 'O': case 'P': case 'Q':
     case 'S': case 'T':           case 'V': case 'W': case 'X':
     case 'Y': case 'Z':
       result->type = CPP_NAME;
diff --git a/libcpp/line-map.c b/libcpp/line-map.c
index 553cc2ab605..a82c4286f07 100644
--- a/libcpp/line-map.c
+++ b/libcpp/line-map.c
@@ -38,8 +38,8 @@ linemap_init (struct line_maps *set)
   set->trace_includes = false;
   set->depth = 0;
   set->cache = 0;
-  set->highest_location = 0;
-  set->highest_line = 0;
+  set->highest_location = RESERVED_LOCATION_COUNT - 1;
+  set->highest_line = RESERVED_LOCATION_COUNT - 1;
   set->max_column_hint = 0;
   set->reallocator = 0;
 }
diff --git a/libcpp/macro.c b/libcpp/macro.c
index f31805955c6..1d284cf9f8a 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -379,7 +379,8 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg)
       escape_it = (token->type == CPP_STRING || token->type == CPP_CHAR
 		   || token->type == CPP_WSTRING || token->type == CPP_WCHAR
 		   || token->type == CPP_STRING32 || token->type == CPP_CHAR32
-		   || token->type == CPP_STRING16 || token->type == CPP_CHAR16);
+		   || token->type == CPP_STRING16 || token->type == CPP_CHAR16
+		   || token->type == CPP_UTF8STRING);
 
       /* Room for each char being written in octal, initial space and
 	 final quote and NUL.  */
diff --git a/libcpp/po/ChangeLog b/libcpp/po/ChangeLog
index 1a295b5c1f4..e6ce3126a60 100644
--- a/libcpp/po/ChangeLog
+++ b/libcpp/po/ChangeLog
@@ -1,3 +1,7 @@
+2009-10-17  Joseph Myers  <joseph@codesourcery.com>
+
+	* cpplib.pot: Regenerate.
+
 2009-09-19  Joseph Myers  <joseph@codesourcery.com>
 
 	* vi.po: Update.
diff --git a/libcpp/po/cpplib.pot b/libcpp/po/cpplib.pot
index 94adf5d08f5..7a5450ff9da 100644
--- a/libcpp/po/cpplib.pot
+++ b/libcpp/po/cpplib.pot
@@ -8,7 +8,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: PACKAGE VERSION\n"
 "Report-Msgid-Bugs-To: http://gcc.gnu.org/bugs.html\n"
-"POT-Creation-Date: 2009-04-22 16:24+0000\n"
+"POT-Creation-Date: 2009-10-17 13:36+0000\n"
 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
 "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -36,7 +36,7 @@ msgid "character 0x%lx is not in the basic source character set\n"
 msgstr ""
 
 #: charset.c:790
-#: charset.c:1398
+#: charset.c:1407
 msgid "converting to execution character set"
 msgstr ""
 
@@ -50,355 +50,355 @@ msgstr ""
 msgid "Character %x might not be NFKC"
 msgstr ""
 
-#: charset.c:980
+#: charset.c:986
 msgid "universal character names are only valid in C++ and C99"
 msgstr ""
 
-#: charset.c:983
+#: charset.c:989
 #, c-format
 msgid "the meaning of '\\%c' is different in traditional C"
 msgstr ""
 
-#: charset.c:992
+#: charset.c:998
 msgid "In _cpp_valid_ucn but not a UCN"
 msgstr ""
 
-#: charset.c:1017
+#: charset.c:1023
 #, c-format
 msgid "incomplete universal character name %.*s"
 msgstr ""
 
-#: charset.c:1029
+#: charset.c:1038
 #, c-format
 msgid "%.*s is not a valid universal character"
 msgstr ""
 
-#: charset.c:1039
+#: charset.c:1048
 #: lex.c:486
 msgid "'$' in identifier or number"
 msgstr ""
 
-#: charset.c:1049
+#: charset.c:1058
 #, c-format
 msgid "universal character %.*s is not valid in an identifier"
 msgstr ""
 
-#: charset.c:1053
+#: charset.c:1062
 #, c-format
 msgid "universal character %.*s is not valid at the start of an identifier"
 msgstr ""
 
-#: charset.c:1085
-#: charset.c:1628
+#: charset.c:1094
+#: charset.c:1637
 msgid "converting UCN to source character set"
 msgstr ""
 
-#: charset.c:1089
+#: charset.c:1098
 msgid "converting UCN to execution character set"
 msgstr ""
 
-#: charset.c:1161
+#: charset.c:1170
 msgid "the meaning of '\\x' is different in traditional C"
 msgstr ""
 
-#: charset.c:1178
+#: charset.c:1187
 msgid "\\x used with no following hex digits"
 msgstr ""
 
-#: charset.c:1185
+#: charset.c:1194
 msgid "hex escape sequence out of range"
 msgstr ""
 
-#: charset.c:1223
+#: charset.c:1232
 msgid "octal escape sequence out of range"
 msgstr ""
 
-#: charset.c:1289
+#: charset.c:1298
 msgid "the meaning of '\\a' is different in traditional C"
 msgstr ""
 
-#: charset.c:1296
+#: charset.c:1305
 #, c-format
 msgid "non-ISO-standard escape sequence, '\\%c'"
 msgstr ""
 
-#: charset.c:1304
+#: charset.c:1313
 #, c-format
-msgid "unknown escape sequence '\\%c'"
+msgid "unknown escape sequence: '\\%c'"
 msgstr ""
 
-#: charset.c:1312
+#: charset.c:1321
 #, c-format
 msgid "unknown escape sequence: '\\%s'"
 msgstr ""
 
-#: charset.c:1319
+#: charset.c:1328
 msgid "converting escape sequence to execution character set"
 msgstr ""
 
-#: charset.c:1463
-#: charset.c:1527
+#: charset.c:1472
+#: charset.c:1536
 msgid "character constant too long for its type"
 msgstr ""
 
-#: charset.c:1466
+#: charset.c:1475
 msgid "multi-character character constant"
 msgstr ""
 
-#: charset.c:1566
+#: charset.c:1575
 msgid "empty character constant"
 msgstr ""
 
-#: charset.c:1675
+#: charset.c:1684
 #, c-format
 msgid "failure to convert %s to %s"
 msgstr ""
 
-#: directives.c:219
-#: directives.c:245
+#: directives.c:220
+#: directives.c:246
 #, c-format
 msgid "extra tokens at end of #%s directive"
 msgstr ""
 
-#: directives.c:351
+#: directives.c:352
 #, c-format
 msgid "#%s is a GCC extension"
 msgstr ""
 
-#: directives.c:355
+#: directives.c:356
 #, c-format
 msgid "#%s is a deprecated GCC extension"
 msgstr ""
 
-#: directives.c:369
+#: directives.c:370
 msgid "suggest not using #elif in traditional C"
 msgstr ""
 
-#: directives.c:372
+#: directives.c:373
 #, c-format
 msgid "traditional C ignores #%s with the # indented"
 msgstr ""
 
-#: directives.c:376
+#: directives.c:377
 #, c-format
 msgid "suggest hiding #%s from traditional C with an indented #"
 msgstr ""
 
-#: directives.c:402
+#: directives.c:403
 msgid "embedding a directive within macro arguments is not portable"
 msgstr ""
 
-#: directives.c:422
+#: directives.c:423
 msgid "style of line directive is a GCC extension"
 msgstr ""
 
-#: directives.c:477
+#: directives.c:478
 #, c-format
 msgid "invalid preprocessing directive #%s"
 msgstr ""
 
-#: directives.c:545
+#: directives.c:546
 msgid "\"defined\" cannot be used as a macro name"
 msgstr ""
 
-#: directives.c:551
+#: directives.c:552
 #, c-format
 msgid "\"%s\" cannot be used as a macro name as it is an operator in C++"
 msgstr ""
 
-#: directives.c:554
+#: directives.c:555
 #, c-format
 msgid "no macro name given in #%s directive"
 msgstr ""
 
-#: directives.c:557
+#: directives.c:558
 msgid "macro names must be identifiers"
 msgstr ""
 
-#: directives.c:606
+#: directives.c:607
 #, c-format
 msgid "undefining \"%s\""
 msgstr ""
 
-#: directives.c:661
+#: directives.c:662
 msgid "missing terminating > character"
 msgstr ""
 
-#: directives.c:716
+#: directives.c:720
 #, c-format
 msgid "#%s expects \"FILENAME\" or <FILENAME>"
 msgstr ""
 
-#: directives.c:760
+#: directives.c:766
 #, c-format
 msgid "empty filename in #%s"
 msgstr ""
 
-#: directives.c:770
+#: directives.c:776
 msgid "#include nested too deeply"
 msgstr ""
 
-#: directives.c:811
+#: directives.c:817
 msgid "#include_next in primary source file"
 msgstr ""
 
-#: directives.c:837
+#: directives.c:843
 #, c-format
 msgid "invalid flag \"%s\" in line directive"
 msgstr ""
 
-#: directives.c:897
+#: directives.c:903
 msgid "unexpected end of file after #line"
 msgstr ""
 
-#: directives.c:900
+#: directives.c:906
 #, c-format
 msgid "\"%s\" after #line is not a positive integer"
 msgstr ""
 
-#: directives.c:906
-#: directives.c:908
+#: directives.c:912
+#: directives.c:914
 msgid "line number out of range"
 msgstr ""
 
-#: directives.c:921
-#: directives.c:1001
+#: directives.c:927
+#: directives.c:1007
 #, c-format
 msgid "\"%s\" is not a valid filename"
 msgstr ""
 
-#: directives.c:961
+#: directives.c:967
 #, c-format
 msgid "\"%s\" after # is not a positive integer"
 msgstr ""
 
-#: directives.c:1045
+#: directives.c:1059
 #, c-format
 msgid "%s"
 msgstr ""
 
-#: directives.c:1069
+#: directives.c:1083
 #, c-format
 msgid "invalid #%s directive"
 msgstr ""
 
-#: directives.c:1132
+#: directives.c:1146
 #, c-format
 msgid "registering pragmas in namespace \"%s\" with mismatched name expansion"
 msgstr ""
 
-#: directives.c:1141
+#: directives.c:1155
 #, c-format
 msgid "registering pragma \"%s\" with name expansion and no namespace"
 msgstr ""
 
-#: directives.c:1159
+#: directives.c:1173
 #, c-format
 msgid "registering \"%s\" as both a pragma and a pragma namespace"
 msgstr ""
 
-#: directives.c:1162
+#: directives.c:1176
 #, c-format
 msgid "#pragma %s %s is already registered"
 msgstr ""
 
-#: directives.c:1165
+#: directives.c:1179
 #, c-format
 msgid "#pragma %s is already registered"
 msgstr ""
 
-#: directives.c:1195
+#: directives.c:1209
 msgid "registering pragma with NULL handler"
 msgstr ""
 
-#: directives.c:1405
+#: directives.c:1419
 msgid "#pragma once in main file"
 msgstr ""
 
-#: directives.c:1428
+#: directives.c:1442
 msgid "invalid #pragma GCC poison directive"
 msgstr ""
 
-#: directives.c:1437
+#: directives.c:1451
 #, c-format
 msgid "poisoning existing macro \"%s\""
 msgstr ""
 
-#: directives.c:1456
+#: directives.c:1470
 msgid "#pragma system_header ignored outside include file"
 msgstr ""
 
-#: directives.c:1480
+#: directives.c:1495
 #, c-format
 msgid "cannot find source file %s"
 msgstr ""
 
-#: directives.c:1484
+#: directives.c:1499
 #, c-format
 msgid "current file is older than %s"
 msgstr ""
 
-#: directives.c:1668
+#: directives.c:1683
 msgid "_Pragma takes a parenthesized string literal"
 msgstr ""
 
-#: directives.c:1769
+#: directives.c:1784
 msgid "#else without #if"
 msgstr ""
 
-#: directives.c:1774
+#: directives.c:1789
 msgid "#else after #else"
 msgstr ""
 
-#: directives.c:1776
-#: directives.c:1809
+#: directives.c:1791
+#: directives.c:1824
 msgid "the conditional began here"
 msgstr ""
 
-#: directives.c:1802
+#: directives.c:1817
 msgid "#elif without #if"
 msgstr ""
 
-#: directives.c:1807
+#: directives.c:1822
 msgid "#elif after #else"
 msgstr ""
 
-#: directives.c:1845
+#: directives.c:1860
 msgid "#endif without #if"
 msgstr ""
 
-#: directives.c:1922
+#: directives.c:1940
 msgid "missing '(' after predicate"
 msgstr ""
 
-#: directives.c:1937
+#: directives.c:1955
 msgid "missing ')' to complete answer"
 msgstr ""
 
-#: directives.c:1957
+#: directives.c:1975
 msgid "predicate's answer is empty"
 msgstr ""
 
-#: directives.c:1984
+#: directives.c:2002
 msgid "assertion without predicate"
 msgstr ""
 
-#: directives.c:1986
+#: directives.c:2005
 msgid "predicate must be an identifier"
 msgstr ""
 
-#: directives.c:2072
+#: directives.c:2091
 #, c-format
 msgid "\"%s\" re-asserted"
 msgstr ""
 
-#: directives.c:2371
+#: directives.c:2391
 #, c-format
 msgid "unterminated #%s"
 msgstr ""
 
 #: directives-only.c:222
-#: lex.c:1149
+#: lex.c:1155
 #: traditional.c:163
 msgid "unterminated comment"
 msgstr ""
@@ -412,243 +412,243 @@ msgstr ""
 msgid "%s: %s"
 msgstr ""
 
-#: expr.c:278
+#: expr.c:279
 msgid "too many decimal points in number"
 msgstr ""
 
-#: expr.c:307
-#: expr.c:389
+#: expr.c:308
+#: expr.c:390
 msgid "fixed-point constants are a GCC extension"
 msgstr ""
 
-#: expr.c:320
+#: expr.c:321
 #, c-format
 msgid "invalid digit \"%c\" in binary constant"
 msgstr ""
 
-#: expr.c:322
+#: expr.c:323
 #, c-format
 msgid "invalid digit \"%c\" in octal constant"
 msgstr ""
 
-#: expr.c:330
+#: expr.c:331
 msgid "invalid prefix \"0b\" for floating constant"
 msgstr ""
 
-#: expr.c:336
+#: expr.c:337
 msgid "use of C99 hexadecimal floating constant"
 msgstr ""
 
-#: expr.c:345
+#: expr.c:346
 msgid "exponent has no digits"
 msgstr ""
 
-#: expr.c:352
+#: expr.c:353
 msgid "hexadecimal floating constants require an exponent"
 msgstr ""
 
-#: expr.c:358
+#: expr.c:359
 #, c-format
 msgid "invalid suffix \"%.*s\" on floating constant"
 msgstr ""
 
-#: expr.c:368
-#: expr.c:417
+#: expr.c:369
+#: expr.c:418
 #, c-format
 msgid "traditional C rejects the \"%.*s\" suffix"
 msgstr ""
 
-#: expr.c:376
+#: expr.c:377
 msgid "suffix for double constant is a GCC extension"
 msgstr ""
 
-#: expr.c:382
+#: expr.c:383
 #, c-format
 msgid "invalid suffix \"%.*s\" with hexadecimal floating constant"
 msgstr ""
 
-#: expr.c:393
+#: expr.c:394
 msgid "decimal float constants are a GCC extension"
 msgstr ""
 
-#: expr.c:403
+#: expr.c:404
 #, c-format
 msgid "invalid suffix \"%.*s\" on integer constant"
 msgstr ""
 
-#: expr.c:426
+#: expr.c:427
 msgid "use of C++0x long long integer constant"
 msgstr ""
 
-#: expr.c:435
+#: expr.c:436
 msgid "imaginary constants are a GCC extension"
 msgstr ""
 
-#: expr.c:438
+#: expr.c:439
 msgid "binary constants are a GCC extension"
 msgstr ""
 
-#: expr.c:531
+#: expr.c:532
 msgid "integer constant is too large for its type"
 msgstr ""
 
-#: expr.c:543
+#: expr.c:563
 msgid "integer constant is so large that it is unsigned"
 msgstr ""
 
-#: expr.c:638
+#: expr.c:658
 msgid "missing ')' after \"defined\""
 msgstr ""
 
-#: expr.c:645
+#: expr.c:665
 msgid "operator \"defined\" requires an identifier"
 msgstr ""
 
-#: expr.c:653
+#: expr.c:673
 #, c-format
 msgid "(\"%s\" is an alternative token for \"%s\" in C++)"
 msgstr ""
 
-#: expr.c:663
+#: expr.c:683
 msgid "this use of \"defined\" may not be portable"
 msgstr ""
 
-#: expr.c:716
+#: expr.c:736
 msgid "floating constant in preprocessor expression"
 msgstr ""
 
-#: expr.c:722
+#: expr.c:742
 msgid "imaginary number in preprocessor expression"
 msgstr ""
 
-#: expr.c:769
+#: expr.c:789
 #, c-format
 msgid "\"%s\" is not defined"
 msgstr ""
 
-#: expr.c:781
+#: expr.c:801
 msgid "assertions are a GCC extension"
 msgstr ""
 
-#: expr.c:784
+#: expr.c:804
 msgid "assertions are a deprecated extension"
 msgstr ""
 
-#: expr.c:917
-#: expr.c:946
+#: expr.c:937
+#: expr.c:966
 #, c-format
 msgid "missing binary operator before token \"%s\""
 msgstr ""
 
-#: expr.c:937
+#: expr.c:957
 #, c-format
 msgid "token \"%s\" is not valid in preprocessor expressions"
 msgstr ""
 
-#: expr.c:954
+#: expr.c:974
 msgid "missing expression between '(' and ')'"
 msgstr ""
 
-#: expr.c:957
+#: expr.c:977
 #, c-format
 msgid "%s with no expression"
 msgstr ""
 
-#: expr.c:960
+#: expr.c:980
 #, c-format
 msgid "operator '%s' has no right operand"
 msgstr ""
 
-#: expr.c:965
+#: expr.c:985
 #, c-format
 msgid "operator '%s' has no left operand"
 msgstr ""
 
-#: expr.c:991
+#: expr.c:1011
 msgid " ':' without preceding '?'"
 msgstr ""
 
-#: expr.c:1019
+#: expr.c:1039
 #, c-format
 msgid "unbalanced stack in %s"
 msgstr ""
 
-#: expr.c:1039
+#: expr.c:1059
 #, c-format
 msgid "impossible operator '%u'"
 msgstr ""
 
-#: expr.c:1140
+#: expr.c:1160
 msgid "missing ')' in expression"
 msgstr ""
 
-#: expr.c:1169
+#: expr.c:1189
 msgid "'?' without following ':'"
 msgstr ""
 
-#: expr.c:1179
+#: expr.c:1199
 msgid "integer overflow in preprocessor expression"
 msgstr ""
 
-#: expr.c:1184
+#: expr.c:1204
 msgid "missing '(' in expression"
 msgstr ""
 
-#: expr.c:1216
+#: expr.c:1236
 #, c-format
 msgid "the left operand of \"%s\" changes sign when promoted"
 msgstr ""
 
-#: expr.c:1221
+#: expr.c:1241
 #, c-format
 msgid "the right operand of \"%s\" changes sign when promoted"
 msgstr ""
 
-#: expr.c:1480
+#: expr.c:1500
 msgid "traditional C rejects the unary plus operator"
 msgstr ""
 
-#: expr.c:1563
+#: expr.c:1583
 msgid "comma operator in operand of #if"
 msgstr ""
 
-#: expr.c:1695
+#: expr.c:1719
 msgid "division by zero in #if"
 msgstr ""
 
-#: files.c:457
+#: files.c:463
 msgid "NULL directory in find_file"
 msgstr ""
 
-#: files.c:494
+#: files.c:500
 msgid "one or more PCH files were found, but they were invalid"
 msgstr ""
 
-#: files.c:497
+#: files.c:503
 msgid "use -Winvalid-pch for more information"
 msgstr ""
 
-#: files.c:588
+#: files.c:594
 #, c-format
 msgid "%s is a block device"
 msgstr ""
 
-#: files.c:605
+#: files.c:611
 #, c-format
 msgid "%s is too large"
 msgstr ""
 
-#: files.c:640
+#: files.c:646
 #, c-format
 msgid "%s is shorter than expected"
 msgstr ""
 
-#: files.c:875
+#: files.c:881
 #, c-format
 msgid "no include path in which to search for %s"
 msgstr ""
 
-#: files.c:1286
+#: files.c:1306
 msgid "Multiple include guards may be useful for:\n"
 msgstr ""
 
@@ -740,28 +740,33 @@ msgstr ""
 msgid "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro"
 msgstr ""
 
-#: lex.c:675
+#: lex.c:567
+#, c-format
+msgid "identifier \"%s\" is a special operator name in C++"
+msgstr ""
+
+#: lex.c:681
 msgid "null character(s) preserved in literal"
 msgstr ""
 
-#: lex.c:678
+#: lex.c:684
 #, c-format
 msgid "missing terminating %c character"
 msgstr ""
 
-#: lex.c:1160
+#: lex.c:1166
 msgid "C++ style comments are not allowed in ISO C90"
 msgstr ""
 
-#: lex.c:1162
+#: lex.c:1168
 msgid "(this will be reported only once per input file)"
 msgstr ""
 
-#: lex.c:1167
+#: lex.c:1173
 msgid "multi-line comment"
 msgstr ""
 
-#: lex.c:1487
+#: lex.c:1493
 #, c-format
 msgid "unspellable token %s"
 msgstr ""