From bb1fa6bb7346b75421ab702f475f3a711a02f822 Mon Sep 17 00:00:00 2001
From: geoffk <geoffk@138bc75d-0d04-0410-961f-82ee72b054a4>
Date: Sat, 12 Mar 2005 10:44:06 +0000
Subject: Index: libcpp/ChangeLog 2005-03-12  Geoffrey Keating 
 <geoffk@apple.com>

	* directives.c (glue_header_name): Update call to cpp_spell_token.
	* internal.h (_cpp_interpret_identifier): New.
	* charset.c (_cpp_interpret_identifier): New.
	(_cpp_valid_ucn): Allow UCN version of '$'.
	* lex.c (lex_identifier): Add extra parameter to indicate if initial
	character was '$' or '\'.  Support identifiers with UCNs.
	(forms_identifier_p): Allow UCNs.
	(_cpp_lex_direct): Pass extra parameter to lex_identifier.
	(utf8_to_ucn): New.
	(cpp_spell_token): Add FORSTRING parameter.  Use it.
	(cpp_token_as_text): Update call to cpp_spell_token.
	(cpp_output_token): Write UCNs back out.
	(stringify_arg): Update call to cpp_spell_token.
	(paste_tokens): Likewise.
	(cpp_macro_definition): Likewise.
	* macro.c (stringify_arg): Likewise.
	(paste_tokens): Likewise.
	(cpp_macro_definition): Likewise.
	* include/cpplib.h: Add parameter to cpp_spell_token.

Index: gcc/ChangeLog
2005-03-12  Geoffrey Keating  <geoffk@apple.com>

	* c-lex.c (c_lex_with_flags): Add parameter to call to
	cpp_spell_token.

Index: gcc/testsuite/ChangeLog
2005-03-12  Geoffrey Keating  <geoffk@apple.com>

	* gcc.dg/ucnid-1.c: New.
	* gcc.dg/ucnid-2.c: New.
	* gcc.dg/ucnid-3.c: New.
	* gcc.dg/ucnid-4.c: New.
	* gcc.dg/ucnid-5.c: New.
	* gcc.dg/ucnid-6.c: New.
	* gcc.dg/cpp/ucnid-1.c: New.
	* gcc.dg/cpp/ucnid-2.c: New.
	* gcc.dg/cpp/ucnid-3.c: New.
	* g++.dg/other/ucnid-1.C: New.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@96333 138bc75d-0d04-0410-961f-82ee72b054a4
---
 libcpp/charset.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

(limited to 'libcpp/charset.c')

diff --git a/libcpp/charset.c b/libcpp/charset.c
index 6b6c360f73d..cd25f10a2e6 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -907,6 +907,15 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
 		 (int) (str - base), base);
       result = 1;
     }
+  else if (identifier_pos && result == 0x24 
+	   && CPP_OPTION (pfile, dollars_in_ident))
+    {
+      if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
+	{
+	  CPP_OPTION (pfile, warn_dollars) = 0;
+	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
+	}
+    }
   else if (identifier_pos)
     {
       int validity = ucn_valid_in_identifier (pfile, result);
@@ -1414,7 +1423,60 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
 
   return result;
 }
+
+/* Convert an identifier denoted by ID and LEN, which might contain
+   UCN escapes, to the source character set, either UTF-8 or
+   UTF-EBCDIC.  Assumes that the identifier is actually a valid identifier.  */
+cpp_hashnode *
+_cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
+{
+  /* It turns out that a UCN escape always turns into fewer characters
+     than the escape itself, so we can allocate a temporary in advance.  */
+  uchar * buf = alloca (len + 1);
+  uchar * bufp = buf;
+  size_t idp;
+  
+  for (idp = 0; idp < len; idp++)
+    if (id[idp] != '\\')
+      *bufp++ = id[idp];
+    else
+      {
+	unsigned length = id[idp+1] == 'u' ? 4 : 8;
+	cppchar_t value = 0;
+	size_t bufleft = len - (bufp - buf);
+	int rval;
+
+	idp += 2;
+	while (length && idp < len && ISXDIGIT (id[idp]))
+	  {
+	    value = (value << 4) + hex_value (id[idp]);
+	    idp++;
+	    length--;
+	  }
+	idp--;
+
+	/* Special case for EBCDIC: if the identifier contains
+	   a '$' specified using a UCN, translate it to EBCDIC.  */
+	if (value == 0x24)
+	  {
+	    *bufp++ = '$';
+	    continue;
+	  }
+
+	rval = one_cppchar_to_utf8 (value, &bufp, &bufleft);
+	if (rval)
+	  {
+	    errno = rval;
+	    cpp_errno (pfile, CPP_DL_ERROR,
+		       "converting UCN to source character set");
+	    break;
+	  }
+      }
 
+  return CPP_HASHNODE (ht_lookup (pfile->hash_table, 
+				  buf, bufp - buf, HT_ALLOC));
+}
+
 /* Convert an input buffer (containing the complete contents of one
    source file) from INPUT_CHARSET to the source character set.  INPUT
    points to the input buffer, SIZE is its allocated size, and LEN is
-- 
cgit v1.2.1