diff options
author | neil <neil@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-04-20 07:29:23 +0000 |
---|---|---|
committer | neil <neil@138bc75d-0d04-0410-961f-82ee72b054a4> | 2003-04-20 07:29:23 +0000 |
commit | 2cbf1359092416d271b02844198e7ad6e5e56284 (patch) | |
tree | 01f484dfa9250821cfb9113a0a797ca2bec80e59 /gcc/cpplex.c | |
parent | 8d6d7930ebb0b4a3a39a502fdbca5842d1f226a4 (diff) | |
download | gcc-2cbf1359092416d271b02844198e7ad6e5e56284.tar.gz |
* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
(cppcharset.o): New target.
* c-lex.c (is_extended_char): Move to cppcharset.c.
(utf8_extend_token): Delete.
* cppcharset.c: New file.
* cpphash.h (_cpp_valid_ucn): New.
* cpplex.c (lex_identifier): Update prototype.
(continues_identifier_p): Rename forms_identifier_p. Handle UCN
escapes.
(maybe_read_ucs): Rename maybe_read_ucn. Update to use code
in cppcharset.c.
(lex_number, lex_identifier, cpp_parse_escape): Update.
(_cpp_lex_direct): Update to handle UCNs.
(cpp_avoid_paste): Don't paste to form a UCN.
testsuite:
* ucs.c: Update diagnostic messages.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@65845 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/cpplex.c')
-rw-r--r-- | gcc/cpplex.c | 193 |
1 files changed, 82 insertions, 111 deletions
diff --git a/gcc/cpplex.c b/gcc/cpplex.c index c8caa393d9e..41e8a009a67 100644 --- a/gcc/cpplex.c +++ b/gcc/cpplex.c @@ -59,15 +59,14 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; static void add_line_note PARAMS ((cpp_buffer *, const uchar *, unsigned int)); static int skip_line_comment PARAMS ((cpp_reader *)); static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t)); -static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *)); +static cpp_hashnode *lex_identifier PARAMS ((cpp_reader *, const uchar *)); static void lex_number PARAMS ((cpp_reader *, cpp_string *)); -static bool continues_identifier_p PARAMS ((cpp_reader *)); +static bool forms_identifier_p PARAMS ((cpp_reader *, int)); static void lex_string PARAMS ((cpp_reader *, cpp_token *)); static void save_comment PARAMS ((cpp_reader *, cpp_token *, const uchar *, cppchar_t)); static int name_p PARAMS ((cpp_reader *, const cpp_string *)); -static int maybe_read_ucs PARAMS ((cpp_reader *, const unsigned char **, - const unsigned char *, cppchar_t *)); +static cppchar_t maybe_read_ucn PARAMS ((cpp_reader *, const uchar **)); static tokenrun *next_tokenrun PARAMS ((tokenrun *)); static unsigned int hex_digit_value PARAMS ((unsigned int)); @@ -361,33 +360,53 @@ name_p (pfile, string) } /* Returns TRUE if the sequence starting at buffer->cur is invalid in - an identifier. */ + an identifier. FIRST is TRUE if this starts an identifier. */ static bool -continues_identifier_p (pfile) +forms_identifier_p (pfile, first) cpp_reader *pfile; + int first; { - if (*pfile->buffer->cur != '$' || !CPP_OPTION (pfile, dollars_in_ident)) - return false; + cpp_buffer *buffer = pfile->buffer; - if (CPP_PEDANTIC (pfile) && !pfile->state.skipping && !pfile->warned_dollar) + if (*buffer->cur == '$') { - pfile->warned_dollar = true; - cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number"); + if (!CPP_OPTION (pfile, dollars_in_ident)) + return false; + + buffer->cur++; + if (CPP_PEDANTIC (pfile) + && !pfile->state.skipping + && !pfile->warned_dollar) + { + pfile->warned_dollar = true; + cpp_error (pfile, DL_PEDWARN, "'$' in identifier or number"); + } + + return true; } - pfile->buffer->cur++; - return true; + /* Is this a syntactically valid UCN? */ + if (0 && *buffer->cur == '\\' + && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U')) + { + buffer->cur += 2; + if (_cpp_valid_ucn (pfile, &buffer->cur, 1 + !first)) + return true; + buffer->cur -= 2; + } + + return false; } /* Lex an identifier starting at BUFFER->CUR - 1. */ static cpp_hashnode * -lex_identifier (pfile) +lex_identifier (pfile, base) cpp_reader *pfile; + const uchar *base; { cpp_hashnode *result; - const uchar *cur, *base; + const uchar *cur; - base = pfile->buffer->cur - 1; do { cur = pfile->buffer->cur; @@ -398,7 +417,7 @@ lex_identifier (pfile) pfile->buffer->cur = cur; } - while (continues_identifier_p (pfile)); + while (forms_identifier_p (pfile, false)); result = (cpp_hashnode *) ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC); @@ -444,7 +463,7 @@ lex_number (pfile, number) pfile->buffer->cur = cur; } - while (continues_identifier_p (pfile)); + while (forms_identifier_p (pfile, false)); number->len = cur - base; dest = _cpp_unaligned_alloc (pfile, number->len + 1); @@ -803,7 +822,6 @@ _cpp_lex_direct (pfile) } /* Fall through. */ - start_ident: case '_': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': @@ -816,7 +834,7 @@ _cpp_lex_direct (pfile) case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': result->type = CPP_NAME; - result->val.node = lex_identifier (pfile); + result->val.node = lex_identifier (pfile, buffer->cur - 1); /* Convert named operators to their proper types. */ if (result->val.node->flags & NODE_OPERATOR) @@ -1044,14 +1062,23 @@ _cpp_lex_direct (pfile) case '@': result->type = CPP_ATSIGN; break; case '$': - if (CPP_OPTION (pfile, dollars_in_ident)) - goto start_ident; - /* Fall through... */ + case '\\': + { + const uchar *base = --buffer->cur; - default: - result->type = CPP_OTHER; - result->val.c = c; - break; + if (forms_identifier_p (pfile, true)) + { + result->type = CPP_NAME; + result->val.node = lex_identifier (pfile, base); + break; + } + buffer->cur++; + + default: + result->type = CPP_OTHER; + result->val.c = c; + break; + } } return result; @@ -1321,9 +1348,11 @@ cpp_avoid_paste (pfile, token1, token2) || b == CPP_CHAR || b == CPP_STRING); /* L */ case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME || c == '.' || c == '+' || c == '-'); - case CPP_OTHER: return (CPP_OPTION (pfile, objc) - && token1->val.c == '@' - && (b == CPP_NAME || b == CPP_STRING)); + /* UCNs */ + case CPP_OTHER: return ((token1->val.c == '\\' && b == CPP_NAME) + || (CPP_OPTION (pfile, objc) + && token1->val.c == '@' + && (b == CPP_NAME || b == CPP_STRING))); default: break; } @@ -1363,93 +1392,31 @@ hex_digit_value (c) abort (); } -/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence. Returns 1 to indicate - failure if cpplib is not parsing C++ or C99. Such failure is - silent, and no variables are updated. Otherwise returns 0, and - warns if -Wtraditional. - - [lex.charset]: The character designated by the universal character - name \UNNNNNNNN is that character whose character short name in - ISO/IEC 10646 is NNNNNNNN; the character designated by the - universal character name \uNNNN is that character whose character - short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value - for a universal character name is less than 0x20 or in the range - 0x7F-0x9F (inclusive), or if the universal character name - designates a character in the basic source character set, then the - program is ill-formed. - - We assume that wchar_t is Unicode, so we don't need to do any - mapping. Is this ever wrong? - - PC points to the 'u' or 'U', PSTR is points to the byte after PC, - LIMIT is the end of the string or charconst. PSTR is updated to - point after the UCS on return, and the UCS is written into PC. */ - -static int -maybe_read_ucs (pfile, pstr, limit, pc) +/* Read a possible universal character name starting at *PSTR. */ +static cppchar_t +maybe_read_ucn (pfile, pstr) cpp_reader *pfile; - const unsigned char **pstr; - const unsigned char *limit; - cppchar_t *pc; + const uchar **pstr; { - const unsigned char *p = *pstr; - unsigned int code = 0; - unsigned int c = *pc, length; - - /* Only attempt to interpret a UCS for C++ and C99. */ - if (! (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))) - return 1; + cppchar_t result, c = (*pstr)[-1]; - if (CPP_WTRADITIONAL (pfile)) - cpp_error (pfile, DL_WARNING, - "the meaning of '\\%c' is different in traditional C", c); - - length = (c == 'u' ? 4: 8); - - if ((size_t) (limit - p) < length) - { - cpp_error (pfile, DL_ERROR, "incomplete universal-character-name"); - /* Skip to the end to avoid more diagnostics. */ - p = limit; - } - else + result = _cpp_valid_ucn (pfile, pstr, false); + if (result) { - for (; length; length--, p++) + if (CPP_WTRADITIONAL (pfile)) + cpp_error (pfile, DL_WARNING, + "the meaning of '\\%c' is different in traditional C", + (int) c); + + if (CPP_OPTION (pfile, EBCDIC)) { - c = *p; - if (ISXDIGIT (c)) - code = (code << 4) + hex_digit_value (c); - else - { - cpp_error (pfile, DL_ERROR, - "non-hex digit '%c' in universal-character-name", c); - /* We shouldn't skip in case there are multibyte chars. */ - break; - } + cpp_error (pfile, DL_ERROR, + "universal character with an EBCDIC target"); + result = 0x3f; /* EBCDIC invalid character */ } } - if (CPP_OPTION (pfile, EBCDIC)) - { - cpp_error (pfile, DL_ERROR, "universal-character-name on EBCDIC target"); - code = 0x3f; /* EBCDIC invalid character */ - } - /* True extended characters are OK. */ - else if (code >= 0xa0 - && !(code & 0x80000000) - && !(code >= 0xD800 && code <= 0xDFFF)) - ; - /* The standard permits $, @ and ` to be specified as UCNs. We use - hex escapes so that this also works with EBCDIC hosts. */ - else if (code == 0x24 || code == 0x40 || code == 0x60) - ; - /* Don't give another error if one occurred above. */ - else if (length == 0) - cpp_error (pfile, DL_ERROR, "universal-character-name out of range"); - - *pstr = p; - *pc = code; - return 0; + return result; } /* Returns the value of an escape sequence, truncated to the correct @@ -1470,7 +1437,7 @@ cpp_parse_escape (pfile, pstr, limit, wide) int unknown = 0; const unsigned char *str = *pstr, *charconsts; - cppchar_t c, mask; + cppchar_t c, ucn, mask; unsigned int width; if (CPP_OPTION (pfile, EBCDIC)) @@ -1519,7 +1486,11 @@ cpp_parse_escape (pfile, pstr, limit, wide) break; case 'u': case 'U': - unknown = maybe_read_ucs (pfile, &str, limit, &c); + ucn = maybe_read_ucn (pfile, &str); + if (ucn) + c = ucn; + else + unknown = true; break; case 'x': |