summaryrefslogtreecommitdiff
path: root/libcpp
diff options
context:
space:
mode:
authorzack <zack@138bc75d-0d04-0410-961f-82ee72b054a4>2005-02-20 17:01:32 +0000
committerzack <zack@138bc75d-0d04-0410-961f-82ee72b054a4>2005-02-20 17:01:32 +0000
commit624d37a67346eddd65841cd7093190b69a5db148 (patch)
tree60cdafb935d90504cac1d3437ff96f90d70b9c09 /libcpp
parent628dfe67321e94b743a840ef4e2ee5d0b75a9e02 (diff)
downloadgcc-624d37a67346eddd65841cd7093190b69a5db148.tar.gz
PR 18785
libcpp: * charset.c (LAST_POSSIBLY_BASIC_SOURCE_CHAR): New helper macro. (cpp_host_to_exec_charset): New function. * include/cpplib.h: Declare cpp_host_to_exec_charset. gcc: * langhooks.h (struct lang_hooks): Add to_target_charset. * langhooks.c (lhd_to_target_charset): New function. * langhooks-def.h: Declare lhd_to_target_charset. (LANG_HOOKS_TO_TARGET_CHARSET): New macro. (LANG_HOOKS_INITIALIZER): Update. * c-common.c (c_common_to_target_charset): New function. * c-common.h: Declare it. * c-objc-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to c_common_to_target_charset. * defaults.c (TARGET_BELL, TARGET_BS, TARGET_CR, TARGET_DIGIT0) (TARGET_ESC, TARGET_FF, TARGET_NEWLINE, TARGET_TAB, TARGET_VT): Delete definitions. * system.h: Poison them. * doc/tm.texi: Don't discuss them. * builtins.c (fold_builtin_isdigit): Use lang_hooks.to_target_charset. * c-pretty-print.c (pp_c_integer_constant): Don't use pp_c_char. (pp_c_char): Do not attempt to generate letter escapes for newline, tab, etc. * config/arm/arm.c (output_ascii_pseudo_op): Likewise. * config/mips/mips.c (mips_output_ascii): Likewise. gcc/cp: * cp-objcp-common.h (LANG_HOOKS_TO_TARGET_CHARSET): Set to c_common_to_target_charset. Delete bogus comment. gcc/testsuite: * gcc.dg/charset/builtin1.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@95304 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libcpp')
-rw-r--r--libcpp/ChangeLog19
-rw-r--r--libcpp/charset.c61
-rw-r--r--libcpp/include/cpplib.h9
3 files changed, 75 insertions, 14 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 0764fc8e4b3..5e6ad289e36 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,8 +1,15 @@
+2005-02-20 Zack Weinberg <zack@codesourcery.com>
+
+ PR 18785
+ * charset.c (LAST_POSSIBLY_BASIC_SOURCE_CHAR): New helper macro.
+ (cpp_host_to_exec_charset): New function.
+ * include/cpplib.h: Declare cpp_host_to_exec_charset.
+
2005-02-19 Devang Patel <dpatel@apple.com>
* charset.c (_cpp_convert_input): Check '\r' before inserting
'\n' at the end.
-
+
2005-02-15 Eric Christopher <echristo@redhat.com>
PR preprocessor/19077
@@ -41,7 +48,7 @@
* include/cpplib.h (c_lang): Fix comment to say cpp_create_reader.
* include/cpplib.h: Also update copyright years.
-
+
2005-01-03 Geoffrey Keating <geoffk@apple.com>
* files.c (_cpp_find_file): Add files found by search_path_exhausted
@@ -64,7 +71,7 @@
2004-11-28 Nathanael Nerode <neroden@gcc.gnu.org>
- PR preprocessor/17610
+ PR preprocessor/17610
* directives.c (do_include_common): Error out if an empty filename
is given for #include (or #include_next or #import).
@@ -87,7 +94,7 @@
* configure: Regenerate.
2004-11-23 Daniel Jacobowitz <dan@codesourcery.com>
- Joseph Myers <joseph@codesourcery.com>
+ Joseph Myers <joseph@codesourcery.com>
* internal.h (struct lexer_state): Add in_deferred_pragma.
* directives.c (struct pragma_entry): Add allow_expansion.
@@ -100,7 +107,7 @@
* include/cpplib.h (cpp_register_pragma): Update prototype.
2004-11-18 Daniel Jacobowitz <dan@codesourcery.com>
- Mark Mitchell <mark@codesourcery.com>
+ Mark Mitchell <mark@codesourcery.com>
* configure.ac (i[34567]86-*-solaris2.1[0-9]*): Set
need_64bit_hwint=yes.
@@ -115,7 +122,7 @@
Remove local srcdir path from generated file.
2004-11-04 Zack Weinberg <zack@codesourcery.com>
- Gerald Pfeifer <gerald@pfeifer.com>
+ Gerald Pfeifer <gerald@pfeifer.com>
* internal.h (HAVE_ICONV): Undefine if we do not have HAVE_ICONV_H
as well.
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 37859c52a31..6b6c360f73d 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -81,8 +81,10 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#if HOST_CHARSET == HOST_CHARSET_ASCII
#define SOURCE_CHARSET "UTF-8"
+#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e
#elif HOST_CHARSET == HOST_CHARSET_EBCDIC
#define SOURCE_CHARSET "UTF-EBCDIC"
+#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF
#else
#error "Unrecognized basic host character set"
#endif
@@ -714,6 +716,63 @@ _cpp_destroy_iconv (cpp_reader *pfile)
}
}
+/* Utility routine for use by a full compiler. C is a character taken
+ from the *basic* source character set, encoded in the host's
+ execution encoding. Convert it to (the target's) execution
+ encoding, and return that value.
+
+ Issues an internal error if C's representation in the narrow
+ execution character set fails to be a single-byte value (C99
+ 5.2.1p3: "The representation of each member of the source and
+ execution character sets shall fit in a byte.") May also issue an
+ internal error if C fails to be a member of the basic source
+ character set (testing this exactly is too hard, especially when
+ the host character set is EBCDIC). */
+cppchar_t
+cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
+{
+ uchar sbuf[1];
+ struct _cpp_strbuf tbuf;
+
+ /* This test is merely an approximation, but it suffices to catch
+ the most important thing, which is that we don't get handed a
+ character outside the unibyte range of the host character set. */
+ if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR)
+ {
+ cpp_error (pfile, CPP_DL_ICE,
+ "character 0x%lx is not in the basic source character set\n",
+ (unsigned long)c);
+ return 0;
+ }
+
+ /* Being a character in the unibyte range of the host character set,
+ we can safely splat it into a one-byte buffer and trust that that
+ is a well-formed string. */
+ sbuf[0] = c;
+
+ /* This should never need to reallocate, but just in case... */
+ tbuf.asize = 1;
+ tbuf.text = xmalloc (tbuf.asize);
+ tbuf.len = 0;
+
+ if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf))
+ {
+ cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set");
+ return 0;
+ }
+ if (tbuf.len != 1)
+ {
+ cpp_error (pfile, CPP_DL_ICE,
+ "character 0x%lx is not unibyte in execution character set",
+ (unsigned long)c);
+ return 0;
+ }
+ c = tbuf.text[0];
+ free(tbuf.text);
+ return c;
+}
+
+
/* Utility routine that computes a mask of the form 0000...111... with
WIDTH 1-bits. */
@@ -727,8 +786,6 @@ width_to_mask (size_t width)
return ((size_t) 1 << width) - 1;
}
-
-
/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
the start of an identifier, and 0 if C is not valid in an
identifier. We assume C has already gone through the checks of
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index c3814460705..70f8d895afd 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -659,6 +659,9 @@ extern bool cpp_interpret_string_notranslate (cpp_reader *,
const cpp_string *, size_t,
cpp_string *, bool);
+/* Convert a host character constant to the execution character set. */
+extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t);
+
/* Used to register macros and assertions, perhaps from the command line.
The text is the same as the command line argument. */
extern void cpp_define (cpp_reader *, const char *);
@@ -743,12 +746,6 @@ cpp_num cpp_num_sign_extend (cpp_num, size_t);
#define CPP_DL_WARNING_P(l) (CPP_DL_EXTRACT (l) >= CPP_DL_WARNING \
&& CPP_DL_EXTRACT (l) <= CPP_DL_PEDWARN)
-/* N.B. The error-message-printer prototypes have not been nicely
- formatted because exgettext needs to see 'msgid' on the same line
- as the name of the function in order to work properly. Only the
- string argument gets a name in an effort to keep the lines from
- getting ridiculously oversized. */
-
/* Output a diagnostic of some kind. */
extern void cpp_error (cpp_reader *, int, const char *msgid, ...)
ATTRIBUTE_PRINTF_3;