diff options
author | zack <zack@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-07-05 00:24:00 +0000 |
---|---|---|
committer | zack <zack@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-07-05 00:24:00 +0000 |
commit | ebc0381062ddc4bc6408feb6516dcbc6c3525e92 (patch) | |
tree | 34c7734f7acee49beff2b3d99cbdf53576456697 /gcc/c-lex.c | |
parent | c472286c1a11cc726c1f0365e5805197dd96e41e (diff) | |
download | gcc-ebc0381062ddc4bc6408feb6516dcbc6c3525e92.tar.gz |
* cpplib.h (CPP_AT_NAME, CPP_OBJC_STRING): New token types.
(struct cpp_options): Add narrow_charset, wide_charset,
bytes_big_endian fields. Remove EBCDIC field.
(cpp_init_iconv, cpp_interpret_string): New external interfaces.
* cpphash.h: Include <iconv.h> if we have it, otherwise
provide a dummy definition of iconv_t.
(struct cpp_reader): Add narrow_cset_desc and wide_cset_desc fields.
(_cpp_valid_ucn): Update prototype.
(_cpp_destroy_iconv): New prototype.
* doc/cpp.texi: Document character set handling.
* doc/cppopts.texi: Document -fexec-charset= and -fexec-wide-charset=.
* doc/extend.texi: Delete entire section on multiline strings.
Rewrite section on __FUNCTION__ etc now that these are
variables in C.
* cppucnid.tab, cppucnid.pl: New files.
* cppucnid.h: New generated file.
* cppcharset.c: Include cppucnid.h. Lots of commentary added.
(iconv_open, iconv, iconv_close): Provide dummy definitions
if !HAVE_ICONV.
(SOURCE_CHARSET, struct strbuf, init_iconv_desc, cpp_init_iconv,
_cpp_destroy_iconv, convert_cset, width_to_mask, convert_ucn,
emit_numeric_escape, convert_hex, convert_oct, convert_escape,
cpp_interpret_string, narrow_str_to_charconst,
wide_str_to_charconst): New.
(ucn_valid_in_identifier): Use a binary search through the
ucnranges table defined in cppucnid.h, not a long chain of if
statements.
(_cpp_valid_ucn): Add a limit pointer. Downgrade "universal
character names are only valid in C++ and C99" to a warning.
Issue the "meaning of \[uU] is different in traditional C"
warning here. Take care not to let iconv see an invalid UCS
value if we get a malformed UCN. Issue an error if we don't
have iconv.
(cpp_interpret_charconst): Moved here from cpplex.c. Use
cpp_interpret_string to do the heavy lifting.
* cppinit.c (cpp_create_reader): Initialize bytes_big_endian,
narrow_charset, wide_charset fields of options structure.
(cpp_destroy): Call _cpp_destroy_iconv.
* cpplex.c (forms_identifier_p): Adjust call to _cpp_valid_ucn.
(maybe_read_ucn, hex_digit_value, cpp_parse_escape): Delete.
(cpp_interpret_charconst): Moved to cppcharset.c.
* cpplib.c (dequote_string): Delete.
(interpret_string_notranslate): New.
(do_line, do_linemarker): Use interpret_string_notranslate.
* Makefile.in (cppcharset.o): Depend on cppucnid.h.
* c-common.c (fname_string, combine_strings): Delete.
* c-common.h (fname_string, combine_strings): Delete prototypes.
* c-lex.c (ignore_escape_flag): Delete.
(cb_ident): Use cpp_interpret_string, not lex_string.
(get_nonpadding_token): New function.
(c_lex): Handle Objective-C @-prefixed identifiers and strings here.
Adjust calls to lex_string. Don't write *value twice.
(lex_string): Now handles string constant concatenation.
Most of the work handed off to cpp_interpret_string.
Call fix_string_type here.
* c-parse.in (STRING_FUNC_NAME, VAR_FUNC_NAME): Replace with
FUNC_NAME, throughout.
(OBJC_STRING): New token type.
(primary:STRING): No need to call fix_string_type here.
(primary:objc_string): Make that OBJC_STRING.
(objc_string nonterminal): Delete.
(yylexname): Delete code to handle fake string constants.
(yylexstring): Delete entirely.
(_yylex): Handle CPP_AT_NAME and CPP_OBJC_STRING. No need
to handle CPP_ATSIGN.
* c.opt (-fexec-charset=, -fwide-exec-charset=): New options.
* c-opts.c (missing_arg, c_common_handle_option): Handle
OPT_fexec_charset_ and OPT_fwide_exec_charset_.
(c_common_init): Set cpp_opts->bytes_big_endian, not
cpp_opts->EBCDIC. Call cpp_init_iconv.
(print_help): Document -fexec-charset= and -fexec-wide-charset=.
(TARGET_EBCDIC): Delete default definition.
* objc/objc-act.c (build_objc_string_object): No need to
handle string constant concatenation.
cp:
* parser.c (cp_lexer_read_token): No need to handle string
constant concatenation.
testsuite:
* gcc.c-torture/execute/wchar_t-1.x: New file; XFAIL wchar_t-1.c
everywhere.
* gcc.dg/concat.c: Concatenation of string constants with
__FUNCTION__ / __PRETTY_FUNCTION__ is now a hard error.
* gcc.dg/wtr-strcat-1.c: Loosen dg-warning regexp.
* gcc.dg/cpp/escape-2.c: Use wide character constants where
necessary to avoid multi-character character constant warning.
* gcc.dg/cpp/escape.c: Likewise.
* gcc.dg/cpp/ucs.c: Likewise.
Remove backslashes from dg-bogus comments, as they confuse Tcl.
Fix a typo.
libstdc++-v3:
* testsuite/22_locale/collate/compare/wchar_t/2.cc
* testsuite/22_locale/collate/compare/wchar_t/wrapped_env.cc
* testsuite/22_locale/collate/compare/wchar_t/wrapped_locale.cc
* testsuite/22_locale/collate/hash/wchar_t/2.cc
* testsuite/22_locale/collate/hash/wchar_t/wrapped_env.cc
* testsuite/22_locale/collate/hash/wchar_t/wrapped_locale.cc
* testsuite/22_locale/collate/transform/wchar_t/2.cc
* testsuite/22_locale/collate/transform/wchar_t/wrapped_env.cc
* testsuite/22_locale/collate/transform/wchar_t/wrapped_locale.cc:
XFAIL on all targets.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@68952 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/c-lex.c')
-rw-r--r-- | gcc/c-lex.c | 195 |
1 files changed, 132 insertions, 63 deletions
diff --git a/gcc/c-lex.c b/gcc/c-lex.c index 2cca2313c2f..f5733604a5a 100644 --- a/gcc/c-lex.c +++ b/gcc/c-lex.c @@ -61,16 +61,13 @@ static splay_tree file_info_tree; int pending_lang_change; /* If we need to switch languages - C++ only */ int c_header_level; /* depth in C headers - C++ only */ -/* Nonzero tells yylex to ignore \ in string constants. */ -static int ignore_escape_flag; - static tree interpret_integer (const cpp_token *, unsigned int); static tree interpret_float (const cpp_token *, unsigned int); static enum integer_type_kind narrowest_unsigned_type (tree, unsigned int); static enum integer_type_kind narrowest_signed_type (tree, unsigned int); -static tree lex_string (const cpp_string *); +static enum cpp_ttype lex_string (const cpp_token *, tree *, bool); static tree lex_charconst (const cpp_token *); static void update_header_times (const char *); static int dump_one_header (splay_tree_node, void *); @@ -184,8 +181,12 @@ cb_ident (cpp_reader *pfile ATTRIBUTE_UNUSED, if (! flag_no_ident) { /* Convert escapes in the string. */ - tree value ATTRIBUTE_UNUSED = lex_string (str); - ASM_OUTPUT_IDENT (asm_out_file, TREE_STRING_POINTER (value)); + cpp_string cstr = { 0, 0 }; + if (cpp_interpret_string (pfile, str, 1, &cstr, false)) + { + ASM_OUTPUT_IDENT (asm_out_file, cstr.text); + free ((void *)cstr.text); + } } #endif } @@ -296,12 +297,10 @@ cb_undef (cpp_reader *pfile ATTRIBUTE_UNUSED, unsigned int line, (const char *) NODE_NAME (node)); } -int -c_lex (tree *value) +static inline const cpp_token * +get_nonpadding_token (void) { const cpp_token *tok; - - retry: timevar_push (TV_CPP); do tok = cpp_get_token (parse_in); @@ -310,10 +309,22 @@ c_lex (tree *value) /* The C++ front end does horrible things with the current line number. To ensure an accurate line number, we must reset it - every time we return a token. */ + every time we advance a token. */ input_line = src_lineno; - *value = NULL_TREE; + return tok; +} + +int +c_lex (tree *value) +{ + const cpp_token *tok; + location_t atloc; + + retry: + tok = get_nonpadding_token (); + + retry_after_at: switch (tok->type) { case CPP_NAME: @@ -345,6 +356,37 @@ c_lex (tree *value) } break; + case CPP_ATSIGN: + /* An @ may give the next token special significance in Objective-C. */ + atloc = input_location; + tok = get_nonpadding_token (); + if (c_dialect_objc ()) + { + tree val; + switch (tok->type) + { + case CPP_NAME: + val = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node)); + if (C_IS_RESERVED_WORD (val) + && OBJC_IS_AT_KEYWORD (C_RID_CODE (val))) + { + *value = val; + return CPP_AT_NAME; + } + break; + + case CPP_STRING: + case CPP_WSTRING: + return lex_string (tok, value, true); + + default: break; + } + } + + /* ... or not. */ + error ("%Hstray '@' in program", &atloc); + goto retry_after_at; + case CPP_OTHER: { cppchar_t c = tok->val.str.text[0]; @@ -365,7 +407,7 @@ c_lex (tree *value) case CPP_STRING: case CPP_WSTRING: - *value = lex_string (&tok->val.str); + return lex_string (tok, value, false); break; /* These tokens should not be visible outside cpplib. */ @@ -374,7 +416,9 @@ c_lex (tree *value) case CPP_MACRO_ARG: abort (); - default: break; + default: + *value = NULL_TREE; + break; } return tok->type; @@ -571,75 +615,100 @@ interpret_float (const cpp_token *token, unsigned int flags) return value; } -static tree -lex_string (const cpp_string *str) +/* Convert a series of STRING and/or WSTRING tokens into a tree, + performing string constant concatenation. TOK is the first of + these. VALP is the location to write the string into. OBJC_STRING + indicates whether an '@' token preceded the incoming token. + Returns the CPP token type of the result (CPP_STRING, CPP_WSTRING, + or CPP_OBJC_STRING). + + This is unfortunately more work than it should be. If any of the + strings in the series has an L prefix, the result is a wide string + (6.4.5p4). Whether or not the result is a wide string affects the + meaning of octal and hexadecimal escapes (6.4.4.4p6,9). But escape + sequences do not continue across the boundary between two strings in + a series (6.4.5p7), so we must not lose the boundaries. Therefore + cpp_interpret_string takes a vector of cpp_string structures, which + we must arrange to provide. */ + +static enum cpp_ttype +lex_string (const cpp_token *tok, tree *valp, bool objc_string) { - bool wide; tree value; - char *buf, *q; - cppchar_t c; - const unsigned char *p, *limit; + bool wide = false; + size_t count = 1; + struct obstack str_ob; + cpp_string istr; - wide = str->text[0] == 'L'; - p = str->text + 1 + wide; - limit = str->text + str->len - 1; - q = buf = alloca ((str->len + 1) * (wide ? WCHAR_BYTES : 1)); + /* Try to avoid the overhead of creating and destroying an obstack + for the common case of just one string. */ + cpp_string str = tok->val.str; + cpp_string *strs = &str; - while (p < limit) - { - c = *p++; + if (tok->type == CPP_WSTRING) + wide = true; - if (c == '\\' && !ignore_escape_flag) - c = cpp_parse_escape (parse_in, &p, limit, wide); + tok = get_nonpadding_token (); + if (c_dialect_objc () && tok->type == CPP_ATSIGN) + { + objc_string = true; + tok = get_nonpadding_token (); + } + if (tok->type == CPP_STRING || tok->type == CPP_WSTRING) + { + gcc_obstack_init (&str_ob); + obstack_grow (&str_ob, &str, sizeof (cpp_string)); - /* Add this single character into the buffer either as a wchar_t, - a multibyte sequence, or as a single byte. */ - if (wide) + do { - unsigned charwidth = TYPE_PRECISION (char_type_node); - unsigned bytemask = (1 << charwidth) - 1; - int byte; - - for (byte = 0; byte < WCHAR_BYTES; ++byte) + count++; + if (tok->type == CPP_WSTRING) + wide = true; + obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string)); + + tok = get_nonpadding_token (); + if (c_dialect_objc () && tok->type == CPP_ATSIGN) { - int n; - if (byte >= (int) sizeof (c)) - n = 0; - else - n = (c >> (byte * charwidth)) & bytemask; - if (BYTES_BIG_ENDIAN) - q[WCHAR_BYTES - byte - 1] = n; - else - q[byte] = n; + objc_string = true; + tok = get_nonpadding_token (); } - q += WCHAR_BYTES; - } - else - { - *q++ = c; } + while (tok->type == CPP_STRING || tok->type == CPP_WSTRING); + strs = obstack_finish (&str_ob); } - /* Terminate the string value, either with a single byte zero - or with a wide zero. */ + /* We have read one more token than we want. */ + _cpp_backup_tokens (parse_in, 1); + + if (count > 1 && !objc_string && warn_traditional && !in_system_header) + warning ("traditional C rejects string constant concatenation"); - if (wide) + if (cpp_interpret_string (parse_in, strs, count, &istr, wide)) { - memset (q, 0, WCHAR_BYTES); - q += WCHAR_BYTES; + value = build_string (istr.len, (char *)istr.text); + free ((void *)istr.text); } else { - *q++ = '\0'; + /* Callers cannot generally handle error_mark_node in this context, + so return the empty string instead. cpp_interpret_string has + issued an error. */ + if (wide) + value = build_string (TYPE_PRECISION (wchar_type_node) + / TYPE_PRECISION (char_type_node), + "\0\0\0"); /* widest supported wchar_t + is 32 bits */ + else + value = build_string (1, ""); } - value = build_string (q - buf, buf); + TREE_TYPE (value) = wide ? wchar_array_type_node : char_array_type_node; + *valp = fix_string_type (value); - if (wide) - TREE_TYPE (value) = wchar_array_type_node; - else - TREE_TYPE (value) = char_array_type_node; - return value; + if (strs != &str) + obstack_free (&str_ob, 0); + + return objc_string ? CPP_OBJC_STRING : wide ? CPP_WSTRING : CPP_STRING; } /* Converts a (possibly wide) character constant token into a tree. */ |