diff options
author | Eric Blake <ebb9@byu.net> | 2007-10-26 10:45:51 -0600 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-01-14 13:47:37 -0700 |
commit | 622fc8cb2cb6ce0fc7391a6414bb0aaffeec6fc0 (patch) | |
tree | 6d9060ff182b458db61c03b2e1d96c6868c0cccf | |
parent | a6c94a314afa34958330b719d66c2d4e403a94af (diff) | |
download | m4-622fc8cb2cb6ce0fc7391a6414bb0aaffeec6fc0.tar.gz |
Stage10: avoid extra copying of strings and comments
-rw-r--r-- | doc/m4.texinfo | 28 | ||||
-rw-r--r-- | m4/gnulib-cache.m4 | 4 | ||||
-rw-r--r-- | src/builtin.c | 20 | ||||
-rw-r--r-- | src/format.c | 173 | ||||
-rw-r--r-- | src/input.c | 90 | ||||
-rw-r--r-- | src/m4.h | 9 | ||||
-rw-r--r-- | src/macro.c | 32 | ||||
-rw-r--r-- | src/output.c | 18 | ||||
-rw-r--r-- | src/symtab.c | 6 |
9 files changed, 257 insertions, 123 deletions
diff --git a/doc/m4.texinfo b/doc/m4.texinfo index 65becc60..3da1e755 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -43,7 +43,7 @@ This manual is for @acronym{GNU} M4 (version @value{VERSION}, @value{UPDATED}), a package containing an implementation of the m4 macro language. Copyright @copyright{} 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, -2006, 2007 Free Software Foundation, Inc. +2006, 2007, 2008 Free Software Foundation, Inc. @quotation Permission is granted to copy, distribute and/or modify this document @@ -391,9 +391,11 @@ addressed some long standing bugs in the venerable 1.4 release. Then in 2005, Gary V. Vaughan collected together the many patches to @acronym{GNU} @code{m4} 1.4 that were floating around the net and released 1.4.3 and 1.4.4. And in 2006, Eric Blake joined the team and -prepared patches for the release of 1.4.5, 1.4.6, 1.4.7, and 1.4.8. The -1.4.x series remains open for bug fixes, including releases 1.4.9, -1.4.10, and 1.4.11 in 2007. +prepared patches for the release of 1.4.5, 1.4.6, 1.4.7, and 1.4.8. +More bug fixes were incorporated in 2007, with releases 1.4.9 and +1.4.10. In 2008, Eric additionally rewrote the scanning engine to +reduce recursive evaluation from quadratic to linear complexity for +1.4.11. The 1.4.x branch remains open for bug fixes. Meanwhile, development has continued on new features for @code{m4}, such as dynamic module loading and additional builtins. When complete, @@ -1827,7 +1829,7 @@ default, because it triggers a number of warnings in Autoconf 2.61 (and Autoconf uses @option{-E} to treat warnings as errors), and because it will still be possible to restore older behavior in M4 2.0. -@comment ignore +@comment options: --warn-macro-sequence @example $ @kbd{m4 --warn-macro-sequence} define(`foo', `$001 $@{1@} $1') @@ -5325,15 +5327,23 @@ see the C Library Manual, or the @acronym{POSIX} specification (for example, @samp{%a} is supported even on platforms that haven't yet implemented C99 hexadecimal floating point output natively). -Unrecognized specifiers result in a warning. It is anticipated that a -future release of @acronym{GNU} @code{m4} will support more specifiers, -and give better warnings when various problems such as overflow are -encountered. Likewise, escape sequences are not yet recognized. +Warnings are issued for unrecognized specifiers, an improper number of +arguments, or difficulty parsing an argument according to the format +string (such as overflow or extra characters). It is anticipated that a +future release of @acronym{GNU} @code{m4} will support more specifiers. +Likewise, escape sequences are not yet recognized. @example format(`%p', `0') @error{}m4:stdin:1: Warning: format: unrecognized specifier in `%p' @result{} +format(`%*d', `') +@error{}m4:stdin:2: Warning: format: empty string treated as 0 +@error{}m4:stdin:2: Warning: format: too few arguments: 2 < 3 +@result{}0 +format(`%.1f', `2a') +@error{}m4:stdin:3: Warning: format: non-numeric argument `2a' +@result{}2.0 @end example @node Arithmetic diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index 3112f911..0055a697 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -15,11 +15,11 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix +# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([local]) -gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix]) +gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix]) gl_AVOID([]) gl_SOURCE_BASE([lib]) gl_M4_BASE([m4]) diff --git a/src/builtin.c b/src/builtin.c index cb5f2749..007ca553 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2000, 2004, 2006, 2007 - Free Software Foundation, Inc. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2000, 2004, 2006, 2007, + 2008 Free Software Foundation, Inc. This file is part of GNU M4. @@ -424,9 +424,11 @@ define_user_macro (const char *name, size_t len, const char *text, if (macro_sequence_inuse && text) { regoff_t offset = 0; + len = strlen (defn); - while ((offset = re_search (¯o_sequence_buf, defn, len, offset, - len - offset, ¯o_sequence_regs)) >= 0) + while (offset < len + && (offset = re_search (¯o_sequence_buf, defn, len, offset, + len - offset, ¯o_sequence_regs)) >= 0) { /* Skip empty matches. */ if (macro_sequence_regs.start[0] == macro_sequence_regs.end[0]) @@ -463,12 +465,9 @@ builtin_init (void) for (bp = &builtin_tab[0]; bp->name != NULL; bp++) if (!no_gnu_extensions || !bp->gnu_extension) { - size_t len = strlen (bp->name); if (prefix_all_builtins) { - string = xcharalloc (len + 4); - strcpy (string, "m4_"); - strcat (string, bp->name); + string = xasprintf ("m4_%s", bp->name); define_builtin (string, bp, SYMBOL_INSERT); free (string); } @@ -500,7 +499,7 @@ builtin_init (void) | Return true if there are not enough arguments. | `------------------------------------------------------------------*/ -static bool +bool bad_argc (const char *name, int argc, unsigned int min, unsigned int max) { if (argc - 1 < min) @@ -559,7 +558,8 @@ ntoa (int32_t value, int radix) { bool negative; uint32_t uvalue; - static char str[256]; + /* Sized for radix 2, plus sign and trailing NUL. */ + static char str[sizeof (value) * CHAR_BIT + 2]; char *s = &str[sizeof str]; *--s = '\0'; diff --git a/src/format.c b/src/format.c index 20b3e28e..9c9508db 100644 --- a/src/format.c +++ b/src/format.c @@ -1,6 +1,6 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007 + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GNU M4. @@ -22,22 +22,95 @@ /* printf like formatting for m4. */ #include "m4.h" -#include "xvasprintf.h" /* Simple varargs substitute. We assume int and unsigned int are the - same size; likewise for long and unsigned long. */ + same size; likewise for long and unsigned long. We do not yet + handle long double or long long. */ + +/* Parse STR as an integer, reporting warnings on behalf of ME. */ +static int +arg_int (const char *me, const char *str) +{ + char *endp; + long value; + + /* TODO - also allow parsing `'a' or `"a' which results in the + numeric value of 'a', as in printf(1). */ + if (*str == '\0') + { + m4_warn (0, me, _("empty string treated as 0")); + return 0; + } + errno = 0; + value = strtol (str, &endp, 10); + if (*endp != '\0') + m4_warn (0, me, _("non-numeric argument `%s'"), str); + else if (isspace (to_uchar (*str))) + m4_warn (0, me, _("leading whitespace ignored")); + else if (errno == ERANGE || (int) value != value) + m4_warn (0, me, _("numeric overflow detected")); + return value; +} + +/* Parse STR as a long, reporting warnings on behalf of ME. */ +static long +arg_long (const char *me, const char *str) +{ + char *endp; + long value; + + /* TODO - also allow parsing `'a' or `"a' which results in the + numeric value of 'a', as in printf(1). */ + if (*str == '\0') + { + m4_warn (0, me, _("empty string treated as 0")); + return 0L; + } + errno = 0; + value = strtol (str, &endp, 10); + if (*endp != '\0') + m4_warn (0, me, _("non-numeric argument `%s'"), str); + else if (isspace (to_uchar (*str))) + m4_warn (0, me, _("leading whitespace ignored")); + else if (errno == ERANGE) + m4_warn (0, me, _("numeric overflow detected")); + return value; +} + +/* Parse STR as a double, reporting warnings on behalf of ME. */ +static double +arg_double (const char *me, const char *str) +{ + char *endp; + double value; + + if (*str == '\0') + { + m4_warn (0, me, _("empty string treated as 0")); + return 0.0; + } + errno = 0; + value = strtod (str, &endp); + if (*endp != '\0') + m4_warn (0, me, _("non-numeric argument `%s'"), str); + else if (isspace (to_uchar (*str))) + m4_warn (0, me, _("leading whitespace ignored")); + else if (errno == ERANGE) + m4_warn (0, me, _("numeric overflow detected")); + return value; +} #define ARG_INT(i, argc, argv) \ - ((i == argc) ? 0 : atoi (arg_text (argv, i++))) + ((argc <= ++i) ? 0 : arg_int (me, arg_text (argv, i))) #define ARG_LONG(i, argc, argv) \ - ((i == argc) ? 0L : atol (arg_text (argv, i++))) + ((argc <= ++i) ? 0L : arg_long (me, arg_text (argv, i))) #define ARG_STR(i, argc, argv) \ - ((i == argc) ? "" : arg_text (argv, i++)) + ((argc <= ++i) ? "" : arg_text (argv, i)) #define ARG_DOUBLE(i, argc, argv) \ - ((i == argc) ? 0.0 : atof (arg_text (argv, i++))) + ((argc <= ++i) ? 0.0 : arg_double (me, arg_text (argv, i))) /*------------------------------------------------------------------. @@ -52,29 +125,30 @@ void format (struct obstack *obs, int argc, macro_arguments *argv) { const char *me = arg_text (argv, 0); - const char *f; /* format control string */ - const char *fmt; /* position within f */ - char fstart[] = "%'+- 0#*.*hhd"; /* current format spec */ - char *p; /* position within fstart */ - unsigned char c; /* a simple character */ - int index = 1; /* index within argc used so far */ + const char *f; /* Format control string. */ + const char *fmt; /* Position within f. */ + char fstart[] = "%'+- 0#*.*hhd"; /* Current format spec. */ + char *p; /* Position within fstart. */ + unsigned char c; /* A simple character. */ + int index = 0; /* Index within argc used so far. */ + bool valid_format = true; /* True if entire format string ok. */ /* Flags. */ - char flags; /* flags to use in fstart */ + char flags; /* Flags to use in fstart. */ enum { - THOUSANDS = 0x01, /* ' */ - PLUS = 0x02, /* + */ - MINUS = 0x04, /* - */ - SPACE = 0x08, /* */ - ZERO = 0x10, /* 0 */ - ALT = 0x20, /* # */ - DONE = 0x40 /* no more flags */ + THOUSANDS = 0x01, /* '\''. */ + PLUS = 0x02, /* '+'. */ + MINUS = 0x04, /* '-'. */ + SPACE = 0x08, /* ' '. */ + ZERO = 0x10, /* '0'. */ + ALT = 0x20, /* '#'. */ + DONE = 0x40 /* No more flags. */ }; /* Precision specifiers. */ - int width; /* minimum field width */ - int prec; /* precision */ - char lflag; /* long flag */ + int width; /* Minimum field width. */ + int prec; /* Precision. */ + char lflag; /* Long flag. */ /* Specifiers we are willing to accept. ok['x'] implies %x is ok. Various modifiers reduce the set, in order to avoid undefined @@ -82,17 +156,23 @@ format (struct obstack *obs, int argc, macro_arguments *argv) char ok[128]; /* Buffer and stuff. */ - char *str; /* malloc'd buffer of formatted text */ + char *base; /* Current position in obs. */ + size_t len; /* Length of formatted text. */ + char *str; /* Malloc'd buffer of formatted text. */ enum {CHAR, INT, LONG, DOUBLE, STR} datatype; f = fmt = ARG_STR (index, argc, argv); memset (ok, 0, sizeof ok); - for (;;) + while (true) { while ((c = *fmt++) != '%') { if (c == '\0') - return; + { + if (valid_format) + bad_argc (me, argc, index, index); + return; + } obstack_1grow (obs, c); } @@ -229,6 +309,7 @@ format (struct obstack *obs, int argc, macro_arguments *argv) if (c > sizeof ok || !ok[c]) { m4_warn (0, me, _("unrecognized specifier in `%s'"), f); + valid_format = false; if (c == '\0') fmt--; continue; @@ -271,42 +352,56 @@ format (struct obstack *obs, int argc, macro_arguments *argv) } *p++ = c; *p = '\0'; + base = obstack_next_free (obs); + len = obstack_room (obs); switch (datatype) { case CHAR: - str = xasprintf (fstart, width, ARG_INT (index, argc, argv)); + str = asnprintf (base, &len, fstart, width, + ARG_INT (index, argc, argv)); break; case INT: - str = xasprintf (fstart, width, prec, ARG_INT (index, argc, argv)); + str = asnprintf (base, &len, fstart, width, prec, + ARG_INT (index, argc, argv)); break; case LONG: - str = xasprintf (fstart, width, prec, ARG_LONG (index, argc, argv)); + str = asnprintf (base, &len, fstart, width, prec, + ARG_LONG (index, argc, argv)); break; case DOUBLE: - str = xasprintf (fstart, width, prec, ARG_DOUBLE (index, argc, argv)); + str = asnprintf (base, &len, fstart, width, prec, + ARG_DOUBLE (index, argc, argv)); break; case STR: - str = xasprintf (fstart, width, prec, ARG_STR (index, argc, argv)); + str = asnprintf (base, &len, fstart, width, prec, + ARG_STR (index, argc, argv)); break; default: abort (); } - /* NULL was returned on failure, such as invalid format string. - Issue a warning, then proceed. */ if (str == NULL) + /* NULL is unexpected (EILSEQ and EINVAL are not possible + based on our construction of fstart, leaving only ENOMEM, + which should always be fatal). */ + m4_error (EXIT_FAILURE, errno, me, + _("unable to format output for `%s'"), f); + else if (str == base) + /* The output was already computed in place, but we need to + account for its size. */ + obstack_blank_fast (obs, len); + else { - m4_warn (0, me, _("unable to format output for `%s'"), f); - continue; + /* The output exceeded available obstack space, copy the + allocated string. */ + obstack_grow (obs, str, len); + free (str); } - - obstack_grow (obs, str, strlen (str)); - free (str); } } diff --git a/src/input.c b/src/input.c index 633bddd6..09cf7088 100644 --- a/src/input.c +++ b/src/input.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007 - Free Software Foundation, Inc. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007, + 2008 Free Software Foundation, Inc. This file is part of GNU M4. @@ -692,17 +692,17 @@ peek_input (void) } } -/*-------------------------------------------------------------------------. -| The function next_char () is used to read and advance the input to the | -| next character. It also manages line numbers for error messages, so | -| they do not get wrong, due to lookahead. The token consisting of a | -| newline alone is taken as belonging to the line it ends, and the current | -| line number is not incremented until the next character is read. | -| 99.9% of all calls will read from a string, so factor that out into a | -| macro for speed. | -`-------------------------------------------------------------------------*/ - -#define next_char() \ +/*-------------------------------------------------------------------. +| The function next_char () is used to read and advance the input to | +| the next character. It also manages line numbers for error | +| messages, so they do not get wrong due to lookahead. The token | +| consisting of a newline alone is taken as belonging to the line it | +| ends, and the current line number is not incremented until the | +| next character is read. 99.9% of all calls will read from a | +| string, so factor that out into a macro for speed. | +`-------------------------------------------------------------------*/ + +#define next_char() \ (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \ ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \ : next_char_1 ()) @@ -883,9 +883,9 @@ match_input (const char *s, bool consume) | effectively unchanged. | `--------------------------------------------------------------------*/ -#define MATCH(ch, s, consume) \ - (to_uchar ((s)[0]) == (ch) \ - && (ch) != '\0' \ +#define MATCH(ch, s, consume) \ + (to_uchar ((s)[0]) == (ch) \ + && (ch) != '\0' \ && ((s)[1] == '\0' || (match_input ((s) + (consume), consume)))) @@ -1142,22 +1142,24 @@ safe_quotes (void) /*--------------------------------------------------------------------. | Parse and return a single token from the input stream. A token | | can either be TOKEN_EOF, if the input_stack is empty; it can be | -| TOKEN_STRING for a quoted string; TOKEN_WORD for something that is | -| a potential macro name; and TOKEN_SIMPLE for any single character | -| that is not a part of any of the previous types. If LINE is not | -| NULL, set *LINE to the line where the token starts. Report errors | -| (unterminated comments or strings) on behalf of CALLER, if | -| non-NULL. | -| | +| TOKEN_STRING for a quoted string or comment; TOKEN_WORD for | +| something that is a potential macro name; and TOKEN_SIMPLE for any | +| single character that is not a part of any of the previous types. | +| If LINE is not NULL, set *LINE to the line where the token starts. | +| If OBS is not NULL, expand TOKEN_STRING directly into OBS rather | +| than in token_stack temporary storage area. Report errors | +| (unterminated comments or strings) on behalf of CALLER, if | +| non-NULL. | +| | | Next_token () returns the token type, and passes back a pointer to | -| the token data through TD. The token text is collected on the | -| obstack token_stack, which never contains more than one token text | -| at a time. The storage pointed to by the fields in TD is | +| the token data through TD. Non-string token text is collected on | +| the obstack token_stack, which never contains more than one token | +| text at a time. The storage pointed to by the fields in TD is | | therefore subject to change the next time next_token () is called. | `--------------------------------------------------------------------*/ token_type -next_token (token_data *td, int *line, const char *caller) +next_token (token_data *td, int *line, struct obstack *obs, const char *caller) { int ch; int quote_level; @@ -1168,6 +1170,11 @@ next_token (token_data *td, int *line, const char *caller) #endif /* ENABLE_CHANGEWORD */ const char *file; int dummy; + /* The obstack where token data is stored. Generally token_stack, + for tokens where argument collection might not use the literal + token. But for comments and strings, we can output directly into + the argument collection obstack obs, if one was provided. */ + struct obstack *obs_td = &token_stack; obstack_free (&token_stack, token_bottom); if (!line) @@ -1199,12 +1206,14 @@ next_token (token_data *td, int *line, const char *caller) *line = current_line; if (MATCH (ch, bcomm.string, true)) { - obstack_grow (&token_stack, bcomm.string, bcomm.length); + if (obs) + obs_td = obs; + obstack_grow (obs_td, bcomm.string, bcomm.length); while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm.string, true)) - obstack_1grow (&token_stack, ch); + obstack_1grow (obs_td, ch); if (ch != CHAR_EOF) - obstack_grow (&token_stack, ecomm.string, ecomm.length); + obstack_grow (obs_td, ecomm.string, ecomm.length); else /* Current_file changed to "" if we see CHAR_EOF, use the previous value we stored earlier. */ @@ -1283,6 +1292,8 @@ next_token (token_data *td, int *line, const char *caller) } else { + if (obs) + obs_td = obs; quote_level = 1; while (1) { @@ -1297,23 +1308,28 @@ next_token (token_data *td, int *line, const char *caller) { if (--quote_level == 0) break; - obstack_grow (&token_stack, rquote.string, rquote.length); + obstack_grow (obs_td, rquote.string, rquote.length); } else if (MATCH (ch, lquote.string, true)) { quote_level++; - obstack_grow (&token_stack, lquote.string, lquote.length); + obstack_grow (obs_td, lquote.string, lquote.length); } else - obstack_1grow (&token_stack, ch); + obstack_1grow (obs_td, ch); } type = TOKEN_STRING; } TOKEN_DATA_TYPE (td) = TOKEN_TEXT; - TOKEN_DATA_LEN (td) = obstack_object_size (&token_stack); - obstack_1grow (&token_stack, '\0'); - TOKEN_DATA_TEXT (td) = (char *) obstack_finish (&token_stack); + TOKEN_DATA_LEN (td) = obstack_object_size (obs_td); + if (obs_td != obs) + { + obstack_1grow (obs_td, '\0'); + TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td); + } + else + TOKEN_DATA_TEXT (td) = NULL; TOKEN_DATA_QUOTE_AGE (td) = current_quote_age; #ifdef ENABLE_CHANGEWORD if (orig_text == NULL) @@ -1455,7 +1471,7 @@ lex_debug (void) token_type t; token_data td; - while ((t = next_token (&td, NULL, "<debug>")) != TOKEN_EOF) + while ((t = next_token (&td, NULL, NULL, "<debug>")) != TOKEN_EOF) print_token ("lex", t, &td); } #endif /* DEBUG_INPUT */ @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007 - Free Software Foundation, Inc. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007, + 2008 Free Software Foundation, Inc. This file is part of GNU M4. @@ -41,10 +41,12 @@ #include "closein.h" #include "error.h" #include "exitfail.h" +#include "intprops.h" #include "obstack.h" #include "stdio--.h" #include "stdlib--.h" #include "unistd--.h" +#include "vasnprintf.h" #include "verror.h" #include "xalloc.h" #include "xprintf.h" @@ -336,7 +338,7 @@ typedef enum token_data_type token_data_type; void input_init (void); token_type peek_token (void); -token_type next_token (token_data *, int *, const char *); +token_type next_token (token_data *, int *, struct obstack *, const char *); void skip_line (const char *); /* push back input */ @@ -486,6 +488,7 @@ struct re_registers; #define DEFAULT_MACRO_SEQUENCE "\\$\\({[^}]*}\\|[0-9][0-9]+\\)" void builtin_init (void); +bool bad_argc (const char *, int, unsigned int, unsigned int); void define_builtin (const char *, const builtin *, symbol_lookup); void set_macro_sequence (const char *); void free_regex (void); diff --git a/src/macro.c b/src/macro.c index 5919154a..228f82d5 100644 --- a/src/macro.c +++ b/src/macro.c @@ -1,6 +1,6 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007 Free + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GNU M4. @@ -212,8 +212,8 @@ expand_input (void) TOKEN_DATA_ORIG_TEXT (&empty_token) = ""; #endif - while ((t = next_token (&td, &line, NULL)) != TOKEN_EOF) - expand_token ((struct obstack *) NULL, t, &td, line, true); + while ((t = next_token (&td, &line, NULL, NULL)) != TOKEN_EOF) + expand_token (NULL, t, &td, line, true); for (i = 0; i < stacks_count; i++) { @@ -264,8 +264,12 @@ expand_token (struct obstack *obs, token_type t, token_data *td, int line, /* Tokens and comments are safe in isolation (since quote_age() detects any change in delimiters). But if other text is already present, multi-character delimiters could be an - issue, so use a conservative heuristic. */ + issue, so use a conservative heuristic. If obstack is + provided, the string was already expanded into it during + next_token. */ result = first || safe_quotes (); + if (obs) + return result; break; case TOKEN_OPEN: @@ -356,7 +360,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) /* Skip leading white space. */ do { - t = next_token (&td, NULL, caller); + t = next_token (&td, NULL, obs, caller); } while (t == TOKEN_SIMPLE && isspace (to_uchar (*TOKEN_DATA_TEXT (&td)))); @@ -432,7 +436,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) if (TOKEN_DATA_TYPE (argp) != TOKEN_VOID || obstack_object_size (obs)) first = false; - t = next_token (&td, NULL, caller); + t = next_token (&td, NULL, obs, caller); } } @@ -464,16 +468,18 @@ collect_arguments (symbol *sym, struct obstack *arguments, if (peek_token () == TOKEN_OPEN) { - next_token (&td, NULL, SYMBOL_NAME (sym)); /* gobble parenthesis */ + next_token (&td, NULL, NULL, SYMBOL_NAME (sym)); /* gobble parenthesis */ do { - more_args = expand_argument (arguments, &td, SYMBOL_NAME (sym)); + tdp = (token_data *) obstack_alloc (arguments, sizeof *tdp); + more_args = expand_argument (arguments, tdp, SYMBOL_NAME (sym)); - if ((TOKEN_DATA_TYPE (&td) == TOKEN_TEXT && !TOKEN_DATA_LEN (&td)) - || (!groks_macro_args && TOKEN_DATA_TYPE (&td) == TOKEN_FUNC)) - tdp = &empty_token; - else - tdp = (token_data *) obstack_copy (arguments, &td, sizeof td); + if ((TOKEN_DATA_TYPE (tdp) == TOKEN_TEXT && !TOKEN_DATA_LEN (tdp)) + || (!groks_macro_args && TOKEN_DATA_TYPE (tdp) == TOKEN_FUNC)) + { + obstack_free (arguments, tdp); + tdp = &empty_token; + } obstack_ptr_grow (argv_stack, tdp); args.arraylen++; args.argc++; diff --git a/src/output.c b/src/output.c index 4c8c9deb..d252d74e 100644 --- a/src/output.c +++ b/src/output.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, - 2007 Free Software Foundation, Inc. + 2007, 2008 Free Software Foundation, Inc. This file is part of GNU M4. @@ -188,15 +188,19 @@ static const char * m4_tmpname (int divnum) { static char *buffer; - static char *tail; + static size_t offset; if (buffer == NULL) { - tail = xasprintf ("%s/m4-%d", output_temp_dir->dir_name, INT_MAX); - buffer = obstack_copy0 (&diversion_storage, tail, strlen (tail)); - free (tail); - tail = strrchr (buffer, '-') + 1; + obstack_grow (&diversion_storage, output_temp_dir->dir_name, + strlen (output_temp_dir->dir_name)); + obstack_1grow (&diversion_storage, '/'); + obstack_1grow (&diversion_storage, 'm'); + obstack_1grow (&diversion_storage, '4'); + obstack_1grow (&diversion_storage, '-'); + offset = obstack_object_size (&diversion_storage); + buffer = obstack_alloc (&diversion_storage, INT_BUFSIZE_BOUND (divnum)); } - if (sprintf (tail, "%d", divnum) < 0) + if (snprintf (&buffer[offset], INT_BUFSIZE_BOUND (divnum), "%d", divnum) < 0) m4_error (EXIT_FAILURE, errno, NULL, _("cannot create temporary file for diversion")); return buffer; diff --git a/src/symtab.c b/src/symtab.c index e8a027f0..277a79f4 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2003, 2006, 2007 Free - Software Foundation, Inc. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2003, 2006, 2007, 2008 + Free Software Foundation, Inc. This file is part of GNU M4. @@ -350,7 +350,7 @@ symtab_debug (void) int delete; static int i; - while (next_token (&td, NULL, "<debug>") == TOKEN_WORD) + while (next_token (&td, NULL, NULL, "<debug>") == TOKEN_WORD) { text = TOKEN_DATA_TEXT (&td); if (*text == '_') |