diff options
author | zack <zack@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-02-20 17:01:32 +0000 |
---|---|---|
committer | zack <zack@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-02-20 17:01:32 +0000 |
commit | 624d37a67346eddd65841cd7093190b69a5db148 (patch) | |
tree | 60cdafb935d90504cac1d3437ff96f90d70b9c09 /libcpp | |
parent | 628dfe67321e94b743a840ef4e2ee5d0b75a9e02 (diff) | |
download | gcc-624d37a67346eddd65841cd7093190b69a5db148.tar.gz |
PR 18785
libcpp:
* charset.c (LAST_POSSIBLY_BASIC_SOURCE_CHAR): New helper macro.
(cpp_host_to_exec_charset): New function.
* include/cpplib.h: Declare cpp_host_to_exec_charset.
gcc:
* langhooks.h (struct lang_hooks): Add to_target_charset.
* langhooks.c (lhd_to_target_charset): New function.
* langhooks-def.h: Declare lhd_to_target_charset.
(LANG_HOOKS_TO_TARGET_CHARSET): New macro.
(LANG_HOOKS_INITIALIZER): Update.
* c-common.c (c_common_to_target_charset): New function.
* c-common.h: Declare it.
* c-objc-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to
c_common_to_target_charset.
* defaults.c (TARGET_BELL, TARGET_BS, TARGET_CR, TARGET_DIGIT0)
(TARGET_ESC, TARGET_FF, TARGET_NEWLINE, TARGET_TAB, TARGET_VT):
Delete definitions.
* system.h: Poison them.
* doc/tm.texi: Don't discuss them.
* builtins.c (fold_builtin_isdigit): Use lang_hooks.to_target_charset.
* c-pretty-print.c (pp_c_integer_constant): Don't use pp_c_char.
(pp_c_char): Do not attempt to generate letter escapes for
newline, tab, etc.
* config/arm/arm.c (output_ascii_pseudo_op): Likewise.
* config/mips/mips.c (mips_output_ascii): Likewise.
gcc/cp:
* cp-objcp-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to
c_common_to_target_charset. Delete bogus comment.
gcc/testsuite:
* gcc.dg/charset/builtin1.c: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@95304 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp')
-rw-r--r-- | libcpp/ChangeLog | 19 | ||||
-rw-r--r-- | libcpp/charset.c | 61 | ||||
-rw-r--r-- | libcpp/include/cpplib.h | 9 |
3 files changed, 75 insertions, 14 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 0764fc8e4b3..5e6ad289e36 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,8 +1,15 @@ +2005-02-20 Zack Weinberg <zack@codesourcery.com> + + PR 18785 + * charset.c (LAST_POSSIBLY_BASIC_SOURCE_CHAR): New helper macro. + (cpp_host_to_exec_charset): New function. + * include/cpplib.h: Declare cpp_host_to_exec_charset. + 2005-02-19 Devang Patel <dpatel@apple.com> * charset.c (_cpp_convert_input): Check '\r' before inserting '\n' at the end. - + 2005-02-15 Eric Christopher <echristo@redhat.com> PR preprocessor/19077 @@ -41,7 +48,7 @@ * include/cpplib.h (c_lang): Fix comment to say cpp_create_reader. * include/cpplib.h: Also update copyright years. - + 2005-01-03 Geoffrey Keating <geoffk@apple.com> * files.c (_cpp_find_file): Add files found by search_path_exhausted @@ -64,7 +71,7 @@ 2004-11-28 Nathanael Nerode <neroden@gcc.gnu.org> - PR preprocessor/17610 + PR preprocessor/17610 * directives.c (do_include_common): Error out if an empty filename is given for #include (or #include_next or #import). @@ -87,7 +94,7 @@ * configure: Regenerate. 2004-11-23 Daniel Jacobowitz <dan@codesourcery.com> - Joseph Myers <joseph@codesourcery.com> + Joseph Myers <joseph@codesourcery.com> * internal.h (struct lexer_state): Add in_deferred_pragma. * directives.c (struct pragma_entry): Add allow_expansion. @@ -100,7 +107,7 @@ * include/cpplib.h (cpp_register_pragma): Update prototype. 2004-11-18 Daniel Jacobowitz <dan@codesourcery.com> - Mark Mitchell <mark@codesourcery.com> + Mark Mitchell <mark@codesourcery.com> * configure.ac (i[34567]86-*-solaris2.1[0-9]*): Set need_64bit_hwint=yes. @@ -115,7 +122,7 @@ Remove local srcdir path from generated file. 2004-11-04 Zack Weinberg <zack@codesourcery.com> - Gerald Pfeifer <gerald@pfeifer.com> + Gerald Pfeifer <gerald@pfeifer.com> * internal.h (HAVE_ICONV): Undefine if we do not have HAVE_ICONV_H as well. diff --git a/libcpp/charset.c b/libcpp/charset.c index 37859c52a31..6b6c360f73d 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -81,8 +81,10 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #if HOST_CHARSET == HOST_CHARSET_ASCII #define SOURCE_CHARSET "UTF-8" +#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e #elif HOST_CHARSET == HOST_CHARSET_EBCDIC #define SOURCE_CHARSET "UTF-EBCDIC" +#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF #else #error "Unrecognized basic host character set" #endif @@ -714,6 +716,63 @@ _cpp_destroy_iconv (cpp_reader *pfile) } } +/* Utility routine for use by a full compiler. C is a character taken + from the *basic* source character set, encoded in the host's + execution encoding. Convert it to (the target's) execution + encoding, and return that value. + + Issues an internal error if C's representation in the narrow + execution character set fails to be a single-byte value (C99 + 5.2.1p3: "The representation of each member of the source and + execution character sets shall fit in a byte.") May also issue an + internal error if C fails to be a member of the basic source + character set (testing this exactly is too hard, especially when + the host character set is EBCDIC). */ +cppchar_t +cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c) +{ + uchar sbuf[1]; + struct _cpp_strbuf tbuf; + + /* This test is merely an approximation, but it suffices to catch + the most important thing, which is that we don't get handed a + character outside the unibyte range of the host character set. */ + if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR) + { + cpp_error (pfile, CPP_DL_ICE, + "character 0x%lx is not in the basic source character set\n", + (unsigned long)c); + return 0; + } + + /* Being a character in the unibyte range of the host character set, + we can safely splat it into a one-byte buffer and trust that that + is a well-formed string. */ + sbuf[0] = c; + + /* This should never need to reallocate, but just in case... */ + tbuf.asize = 1; + tbuf.text = xmalloc (tbuf.asize); + tbuf.len = 0; + + if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf)) + { + cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set"); + return 0; + } + if (tbuf.len != 1) + { + cpp_error (pfile, CPP_DL_ICE, + "character 0x%lx is not unibyte in execution character set", + (unsigned long)c); + return 0; + } + c = tbuf.text[0]; + free(tbuf.text); + return c; +} + + /* Utility routine that computes a mask of the form 0000...111... with WIDTH 1-bits. */ @@ -727,8 +786,6 @@ width_to_mask (size_t width) return ((size_t) 1 << width) - 1; } - - /* Returns 1 if C is valid in an identifier, 2 if C is valid except at the start of an identifier, and 0 if C is not valid in an identifier. We assume C has already gone through the checks of diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index c3814460705..70f8d895afd 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -659,6 +659,9 @@ extern bool cpp_interpret_string_notranslate (cpp_reader *, const cpp_string *, size_t, cpp_string *, bool); +/* Convert a host character constant to the execution character set. */ +extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t); + /* Used to register macros and assertions, perhaps from the command line. The text is the same as the command line argument. */ extern void cpp_define (cpp_reader *, const char *); @@ -743,12 +746,6 @@ cpp_num cpp_num_sign_extend (cpp_num, size_t); #define CPP_DL_WARNING_P(l) (CPP_DL_EXTRACT (l) >= CPP_DL_WARNING \ && CPP_DL_EXTRACT (l) <= CPP_DL_PEDWARN) -/* N.B. The error-message-printer prototypes have not been nicely - formatted because exgettext needs to see 'msgid' on the same line - as the name of the function in order to work properly. Only the - string argument gets a name in an effort to keep the lines from - getting ridiculously oversized. */ - /* Output a diagnostic of some kind. */ extern void cpp_error (cpp_reader *, int, const char *msgid, ...) ATTRIBUTE_PRINTF_3; |