diff options
author | geoffk <geoffk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-03-12 10:44:06 +0000 |
---|---|---|
committer | geoffk <geoffk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-03-12 10:44:06 +0000 |
commit | bb1fa6bb7346b75421ab702f475f3a711a02f822 (patch) | |
tree | 96b619db02d90b96e5dc09601db8bd7a58e95367 /libcpp/charset.c | |
parent | 1a6a3e92b3ef88587fc1da44fea2d50b817e5440 (diff) | |
download | gcc-bb1fa6bb7346b75421ab702f475f3a711a02f822.tar.gz |
Index: libcpp/ChangeLog
2005-03-12 Geoffrey Keating <geoffk@apple.com>
* directives.c (glue_header_name): Update call to cpp_spell_token.
* internal.h (_cpp_interpret_identifier): New.
* charset.c (_cpp_interpret_identifier): New.
(_cpp_valid_ucn): Allow UCN version of '$'.
* lex.c (lex_identifier): Add extra parameter to indicate if initial
character was '$' or '\'. Support identifiers with UCNs.
(forms_identifier_p): Allow UCNs.
(_cpp_lex_direct): Pass extra parameter to lex_identifier.
(utf8_to_ucn): New.
(cpp_spell_token): Add FORSTRING parameter. Use it.
(cpp_token_as_text): Update call to cpp_spell_token.
(cpp_output_token): Write UCNs back out.
(stringify_arg): Update call to cpp_spell_token.
(paste_tokens): Likewise.
(cpp_macro_definition): Likewise.
* macro.c (stringify_arg): Likewise.
(paste_tokens): Likewise.
(cpp_macro_definition): Likewise.
* include/cpplib.h: Add parameter to cpp_spell_token.
Index: gcc/ChangeLog
2005-03-12 Geoffrey Keating <geoffk@apple.com>
* c-lex.c (c_lex_with_flags): Add parameter to call to
cpp_spell_token.
Index: gcc/testsuite/ChangeLog
2005-03-12 Geoffrey Keating <geoffk@apple.com>
* gcc.dg/ucnid-1.c: New.
* gcc.dg/ucnid-2.c: New.
* gcc.dg/ucnid-3.c: New.
* gcc.dg/ucnid-4.c: New.
* gcc.dg/ucnid-5.c: New.
* gcc.dg/ucnid-6.c: New.
* gcc.dg/cpp/ucnid-1.c: New.
* gcc.dg/cpp/ucnid-2.c: New.
* gcc.dg/cpp/ucnid-3.c: New.
* g++.dg/other/ucnid-1.C: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@96333 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp/charset.c')
-rw-r--r-- | libcpp/charset.c | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c index 6b6c360f73d..cd25f10a2e6 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -907,6 +907,15 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, (int) (str - base), base); result = 1; } + else if (identifier_pos && result == 0x24 + && CPP_OPTION (pfile, dollars_in_ident)) + { + if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping) + { + CPP_OPTION (pfile, warn_dollars) = 0; + cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number"); + } + } else if (identifier_pos) { int validity = ucn_valid_in_identifier (pfile, result); @@ -1414,7 +1423,60 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, return result; } + +/* Convert an identifier denoted by ID and LEN, which might contain + UCN escapes, to the source character set, either UTF-8 or + UTF-EBCDIC. Assumes that the identifier is actually a valid identifier. */ +cpp_hashnode * +_cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len) +{ + /* It turns out that a UCN escape always turns into fewer characters + than the escape itself, so we can allocate a temporary in advance. */ + uchar * buf = alloca (len + 1); + uchar * bufp = buf; + size_t idp; + + for (idp = 0; idp < len; idp++) + if (id[idp] != '\\') + *bufp++ = id[idp]; + else + { + unsigned length = id[idp+1] == 'u' ? 4 : 8; + cppchar_t value = 0; + size_t bufleft = len - (bufp - buf); + int rval; + + idp += 2; + while (length && idp < len && ISXDIGIT (id[idp])) + { + value = (value << 4) + hex_value (id[idp]); + idp++; + length--; + } + idp--; + + /* Special case for EBCDIC: if the identifier contains + a '$' specified using a UCN, translate it to EBCDIC. */ + if (value == 0x24) + { + *bufp++ = '$'; + continue; + } + + rval = one_cppchar_to_utf8 (value, &bufp, &bufleft); + if (rval) + { + errno = rval; + cpp_errno (pfile, CPP_DL_ERROR, + "converting UCN to source character set"); + break; + } + } + return CPP_HASHNODE (ht_lookup (pfile->hash_table, + buf, bufp - buf, HT_ALLOC)); +} + /* Convert an input buffer (containing the complete contents of one source file) from INPUT_CHARSET to the source character set. INPUT points to the input buffer, SIZE is its allocated size, and LEN is |