diff options
author | brolley <brolley@138bc75d-0d04-0410-961f-82ee72b054a4> | 1999-03-16 13:10:15 +0000 |
---|---|---|
committer | brolley <brolley@138bc75d-0d04-0410-961f-82ee72b054a4> | 1999-03-16 13:10:15 +0000 |
commit | e3630c4b41e3c2ed124dd1b28c5782ae245b6422 (patch) | |
tree | aca71f4d74eba9693f38e9c8c1736e1b2c31167f /gcc/cppfiles.c | |
parent | ff39fc396a41ad0dce802fac4f2cc5b718f41f6a (diff) | |
download | gcc-e3630c4b41e3c2ed124dd1b28c5782ae245b6422.tar.gz |
1999-03-16 16:06 -0500 Zack Weinberg <zack@rabi.columbia.edu>
* cppfiles.c (read_and_prescan): Map backslash-newline to '\r'
(which cannot otherwise appear in the processed buffer) and
move it out of tokens that it appears in the middle of.
Improve performance.
(find_position): New function.
* cpplib.c: \r (one character) indicates backslash
newline, not \\\n (two characters). It cannot appear in the
middle of a token. Call CPP_BUMP_LINE (pfile) whenever
parsing moves past \n or \r. Increment pfile->lineno whenever
a \n is placed into token_buffer. Only one mark can exist at
a time, and CPP_BUMP_LINE must not be used while it is
active. It is automatically cleared by cpp_pop_buffer and
parse_goto_mark. \r is not in is_hor_space or is_space.
(NEWLINE_FIX, NEWLINE_FIX1, adjust_position,
update_position, count_newlines, parse_move_mark): Removed.
(parse_string, copy_comment): New functions.
(parse_name): Returns void.
(parse_set_mark, parse_clear_mark, parse_goto_mark): Take only
one argument, a cpp_reader *. Change for new marking scheme.
(skip_comment): Handle CHILL line comments too. Second
argument is now first character of comment marker; all callers
changed. Issue error for unterminated block comment here.
(cpp_skip_hspace): Recognize CHILL comments.
(copy_rest_of_line): Likewise. Call skip_comment and
parse_string directly, don't go through cpp_get_token. Emit
"/**/" for block comments if -traditional (create_definition
needs this).
(do_define): Don't play with put_out_comments.
(cpp_push_buffer): Initialize ->mark to -1.
(cpp_buf_line_and_col): Just read out the values in the buffer
structure.
(output_line_command): Use cpp_buf_line_and_col. Fix
formatting. Remove stale code.
(cpp_get_token): Break out string parsing code to
parse_string. Use skip_comment for CHILL comments too. Use
copy_comment for put_out_comments instead of dinking with
marks. Remove stale code. Don't call output_line_command
unless it's necessary.
* cpplib.h (parse_marker): Removed.
(struct cpp_buffer): line_base is now a unsigned char *; add
`mark' [long], remove `marks' [struct parse_marker *].
(parse_set_mark, parse_clear_mark, parse_goto_mark): Update
prototypes.
(CPP_BUMP_LINE, CPP_BUMP_BUFFER_LINE): New macros.
* cppinit.c (is_hor_space, is_space): '\r' is not considered
whitespace.
* cppexp.c (cpp_parse_expression): Use cpp_skip_hspace, not
SKIP_WHITE_SPACE.
* cpphash.c (macarg): Disable line commands while expanding.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@25802 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/cppfiles.c')
-rw-r--r-- | gcc/cppfiles.c | 325 |
1 files changed, 221 insertions, 104 deletions
diff --git a/gcc/cppfiles.c b/gcc/cppfiles.c index ef78ee34320..7828569c0dd 100644 --- a/gcc/cppfiles.c +++ b/gcc/cppfiles.c @@ -757,15 +757,51 @@ actual_directory (pfile, fname) return x; } -/* Read the entire contents of file DESC into buffer BUF, convert end-of-line - markers to canonical form, and convert trigraphs if enabled. Also, make - sure there is a newline at the end of the file. LEN is how much room we - have to start with (this can be expanded if necessary). - Returns -1 on failure, or the actual length of the data to be scanned. +/* Almost but not quite the same as adjust_position in cpplib.c. + Used only by read_and_prescan. */ +static void +find_position (start, limit, linep, colp) + U_CHAR *start; + U_CHAR *limit; + long *linep; + long *colp; +{ + long line = *linep, col = 0; + while (start < limit) + { + U_CHAR ch = *start++; + if (ch == '\n' || ch == '\r') + line++, col = 1; + else + col++; + } + *linep = line, *colp = col; +} - N.B. This function has been rearranged to out-of-line the uncommon cases - as much as possible; this is important to prevent it from being a - performance bottleneck. */ +/* Read the entire contents of file DESC into buffer BUF. LEN is how + much memory to allocate initially; more will be allocated if + necessary. Convert end-of-line markers (\n, \r, \r\n, \n\r) to + canonical form (\n). If enabled, convert and/or warn about + trigraphs. Convert backslash-newline to a one-character escape + (\r) and remove it from "embarrassing" places (i.e. the middle of a + token). If there is no newline at the end of the file, add one and + warn. Returns -1 on failure, or the actual length of the data to + be scanned. + + This function does a lot of work, and can be a serious performance + bottleneck. It has been tuned heavily; make sure you understand it + before hacking. The common case - no trigraphs, Unix style line + breaks, backslash-newline set off by whitespace, newline at EOF - + has been optimized at the expense of the others. The performance + penalty for DOS style line breaks (\r\n) is about 15%. + + Warnings lose particularly heavily since we have to determine the + line number, which involves scanning from the beginning of the file + or from the last warning. The penalty for the absence of a newline + at the end of reload1.c is about 60%. (reload1.c is 329k.) + + If your file has more than one kind of end-of-line marker, you + will get messed-up line numbering. */ static long read_and_prescan (pfile, fp, desc, len) @@ -774,29 +810,47 @@ read_and_prescan (pfile, fp, desc, len) int desc; size_t len; { - U_CHAR *buf = (U_CHAR *) xmalloc (len); + U_CHAR *buf = xmalloc (len); U_CHAR *ip, *op, *line_base; U_CHAR *ibase; - unsigned int line; + unsigned int line, deferred_newlines; int count; size_t offset; - /* 4096 bytes of buffer proper, 2 to detect running off the end without - address arithmetic all the time, and 2 for pushback in the case there's - a potential trigraph or end-of-line digraph at the end of a block. */ -#define INTERMED_BUFFER_SIZE 4096 - U_CHAR intermed[INTERMED_BUFFER_SIZE + 2 + 2]; + /* PIPE_BUF bytes of buffer proper, 2 to detect running off the end + without address arithmetic all the time, and 2 for pushback in + the case there's a potential trigraph or end-of-line digraph at + the end of a block. */ + U_CHAR intermed[PIPE_BUF + 2 + 2]; + + /* Table of characters that can't be handled in the inner loop. + Keep these contiguous to optimize the performance of the code generated + for the switch that uses them. */ + #define SPECCASE_EMPTY 0 + #define SPECCASE_NUL 1 + #define SPECCASE_CR 2 + #define SPECCASE_BACKSLASH 3 + #define SPECCASE_QUESTION 4 + U_CHAR speccase[256]; offset = 0; op = buf; line_base = buf; line = 1; ibase = intermed + 2; + deferred_newlines = 0; + + memset (speccase, SPECCASE_EMPTY, sizeof (speccase)); + speccase['\0'] = SPECCASE_NUL; + speccase['\r'] = SPECCASE_CR; + speccase['\\'] = SPECCASE_BACKSLASH; + if (CPP_OPTIONS (pfile)->trigraphs || CPP_OPTIONS (pfile)->warn_trigraphs) + speccase['?'] = SPECCASE_QUESTION; for (;;) { read_next: - count = read (desc, intermed + 2, INTERMED_BUFFER_SIZE); + count = read (desc, intermed + 2, PIPE_BUF); if (count < 0) goto error; else if (count == 0) @@ -806,16 +860,16 @@ read_and_prescan (pfile, fp, desc, len) ip = ibase; ibase = intermed + 2; ibase[count] = ibase[count+1] = '\0'; - + if (offset > len) { size_t delta_op; size_t delta_line_base; len *= 2; if (offset > len) - /* len overflowed. - This could happen if the file is larger than half the - maximum address space of the machine. */ + /* len overflowed. + This could happen if the file is larger than half the + maximum address space of the machine. */ goto too_big; delta_op = op - buf; @@ -827,93 +881,155 @@ read_and_prescan (pfile, fp, desc, len) for (;;) { - unsigned int c; - c = *ip++; - switch (c) + unsigned int span = 0; + + /* Deal with \-newline in the middle of a token. */ + if (deferred_newlines) { - /* The default case is at the top so gcc will realize - it's the common case, and leave c in a register. - Also, cache utilization is a little better this way. */ - default: - *op++ = c; - break; - - case '\0': + while (speccase[ip[span]] == SPECCASE_EMPTY + && ip[span] != '\n' + && ip[span] != '\t' + && ip[span] != ' ') + span++; + memcpy (op, ip, span); + op += span; + ip += span; + if (*ip == '\n' || *ip == '\t' + || *ip == ' ' || *ip == ' ') + while (deferred_newlines) + deferred_newlines--, *op++ = '\r'; + span = 0; + } + + /* Copy as much as we can without special treatment. */ + while (speccase[ip[span]] == SPECCASE_EMPTY) span++; + memcpy (op, ip, span); + op += span; + ip += span; + + switch (speccase[*ip++]) + { + case SPECCASE_NUL: /* \0 */ + ibase[-1] = op[-1]; goto read_next; - case '\r': - if (*ip == '\n') ip++; + + case SPECCASE_CR: /* \r */ + if (*ip == '\n') + ip++; else if (*ip == '\0') { --ibase; intermed[1] = '\r'; goto read_next; } + else if (ip[-2] == '\n') + continue; *op++ = '\n'; - line++; - line_base = op; break; - case '\n': - if (*ip == '\r') ip++; - else if (*ip == '\0') + case SPECCASE_BACKSLASH: /* \ */ + backslash: + { + /* If we're at the end of the intermediate buffer, + we have to shift the backslash down to the start + and come back next pass. */ + if (*ip == '\0') { --ibase; - intermed[1] = '\n'; + intermed[1] = '\\'; goto read_next; } - *op++ = '\n'; - line++; - line_base = op; - break; - - case '?': - if (CPP_OPTIONS (pfile)->trigraphs - || CPP_OPTIONS (pfile)->warn_trigraphs) + else if (*ip == '\n') { - unsigned int d; - /* If we're at the end of the intermediate buffer, - we have to shift the ?'s down to the start and - come back next pass. */ - d = ip[0]; - if (d == '\0') - { - --ibase; - intermed[1] = '?'; - goto read_next; - } - if (d != '?') - { - *op++ = '?'; - break; - } - d = ip[1]; - if (d == '\0') + ip++; + if (*ip == '\r') ip++; + if (*ip == '\n' || *ip == '\t' || *ip == ' ') + *op++ = '\r'; + else if (op[-1] == '\t' || op[-1] == ' ' + || op[-1] == '\r' || op[-1] == '\n') + *op++ = '\r'; + else + deferred_newlines++; + line++; + line_base = op; + } + else if (*ip == '\r') + { + ip++; + if (*ip == '\n') ip++; + else if (*ip == '\0') { ibase -= 2; - intermed[0] = intermed[1] = '?'; + intermed[0] = '\\'; + intermed[1] = '\r'; goto read_next; } - if (!trigraph_table[d]) - { - *op++ = '?'; - break; - } - - if (CPP_OPTIONS (pfile)->warn_trigraphs) - cpp_warning_with_line (pfile, line, op-line_base, - "trigraph ??%c encountered", d); - if (CPP_OPTIONS (pfile)->trigraphs) - *op++ = trigraph_table[d]; + else if (*ip == '\r' || *ip == '\t' || *ip == ' ') + *op++ = '\r'; else - { - *op++ = '?'; - *op++ = '?'; - *op++ = d; - } - ip += 2; + deferred_newlines++; + line++; + line_base = op; } else - *op++ = c; + *op++ = '\\'; + } + break; + + case SPECCASE_QUESTION: /* ? */ + { + unsigned int d; + /* If we're at the end of the intermediate buffer, + we have to shift the ?'s down to the start and + come back next pass. */ + d = ip[0]; + if (d == '\0') + { + --ibase; + intermed[1] = '?'; + goto read_next; + } + if (d != '?') + { + *op++ = '?'; + break; + } + d = ip[1]; + if (d == '\0') + { + ibase -= 2; + intermed[0] = intermed[1] = '?'; + goto read_next; + } + if (!trigraph_table[d]) + { + *op++ = '?'; + break; + } + + if (CPP_OPTIONS (pfile)->warn_trigraphs) + { + long col; + find_position (line_base, op, &line, &col); + line_base = op - col; + cpp_warning_with_line (pfile, line, col, + "trigraph ??%c encountered", d); + } + if (CPP_OPTIONS (pfile)->trigraphs) + { + if (trigraph_table[d] == '\\') + goto backslash; + else + *op++ = trigraph_table[d]; + } + else + { + *op++ = '?'; + *op++ = '?'; + *op++ = d; + } + ip += 2; + } } } } @@ -922,47 +1038,48 @@ read_and_prescan (pfile, fp, desc, len) return 0; /* Deal with pushed-back chars at true EOF. - If two chars were pushed back, they must both be ?'s. - If one was, it might be ?, \r, or \n, and \r needs to - become \n. + This may be any of: ?? ? \ \r \n \\r \\n. + \r must become \n, \\r or \\n must become \r. We know we have space already. */ if (ibase == intermed) { - *op++ = '?'; - *op++ = '?'; + if (*ibase == '?') + { + *op++ = '?'; + *op++ = '?'; + } + else + *op++ = '\r'; } else if (ibase == intermed + 1) { - if (*ibase == '?') - *op++ = '?'; - else + if (*ibase == '\r') *op++ = '\n'; + else + *op++ = *ibase; } - if (op[-1] != '\n' || op[-2] == '\\') + if (op[-1] != '\n') { - if (CPP_PEDANTIC (pfile)) - cpp_pedwarn_with_line (pfile, line, op - line_base, - "no newline at end of file"); - if (offset + 2 > len) + long col; + find_position (line_base, op, &line, &col); + cpp_warning_with_line (pfile, line, col, "no newline at end of file\n"); + if (offset + 1 > len) { - len += 2; - if (offset + 2 > len) + len += 1; + if (offset + 1 > len) goto too_big; buf = (U_CHAR *) xrealloc (buf, len); op = buf + offset; } - if (op[-1] == '\\') - *op++ = '\n'; *op++ = '\n'; } - fp->buf = - (U_CHAR *) ((len - offset < 20) ? (PTR) buf : xrealloc (buf, op - buf)); + fp->buf = ((len - offset < 20) ? buf : (U_CHAR *)xrealloc (buf, op - buf)); return op - buf; too_big: - cpp_error (pfile, "file is too large"); + cpp_error (pfile, "file is too large (>%lu bytes)\n", (unsigned long)offset); free (buf); return -1; |