diff options
author | Eric Blake <ebb9@byu.net> | 2007-11-21 10:14:28 -0700 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-03-14 07:01:25 -0600 |
commit | 434656c96d6486cf959c3050aa85aecb72d948a0 (patch) | |
tree | e68dec3f1f0883269ab14c363df6fdcf98fabc7b | |
parent | 58d580eeca1f75ddd2ca68d8b93fef6eead14350 (diff) | |
download | m4-434656c96d6486cf959c3050aa85aecb72d948a0.tar.gz |
Stage19: allow builtin tokens in more macros
-rw-r--r-- | NEWS | 21 | ||||
-rw-r--r-- | doc/m4.texinfo | 205 | ||||
-rw-r--r-- | m4/gnulib-cache.m4 | 4 | ||||
-rw-r--r-- | src/builtin.c | 78 | ||||
-rw-r--r-- | src/input.c | 148 | ||||
-rw-r--r-- | src/m4.h | 27 | ||||
-rw-r--r-- | src/macro.c | 134 |
7 files changed, 491 insertions, 126 deletions
@@ -2,7 +2,22 @@ GNU M4 NEWS - User visible changes. Copyright (C) 1992, 1993, 1994, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. -Version 1.4.11 - ?? ??? 2008, by ???? (git version 1.4.10a-*) +* Noteworthy changes in Version 1.4.11 (????-??-??) [stable] + Released by ????, based on git version 1.4.10b.x-* + +** Fix regression introduced in 1.4.10b where using `builtin' or `indir' + to perform nested `shift' calls triggered an assertion failure. + +** Enhance the `ifdef', `ifelse', and `shift' builtins, as well as all + user macros, to transparently handle builtin tokens generated by `defn'. + +** Enhance the `defn', `dumpdef', `ifdef', `popdef', `traceon', `traceoff', + and `undefine' macros to warn when encountering a builtin token in the + context of a macro name, rather than acting on the empty string. This + was already done for `define', `pushdef', `builtin', and `indir'. + +* Noteworthy changes in Version 1.4.10b (2008-02-25) [beta] + Released by Eric Blake, based on git version 1.4.10a * Security fixes for the -F option, for bugs present since -F was introduced in 1.3: Avoid core dump with 'm4 -F file -t undefined', and @@ -21,7 +36,7 @@ Version 1.4.11 - ?? ??? 2008, by ???? (git version 1.4.10a-*) now issued if this is attempted, although a future version of M4 may lift this restriction to match other implementations. * Enhance the `index' builtin to guarantee linear behavior and often - acheive sublinear results, in spite of the surprisingly large number of + achieve sublinear results, in spite of the surprisingly large number of systems with a brain-dead quadratic strstr(3). * Enhance the `regexp' and `patsubst' builtins to cache frequently used regular expressions, which speeds up typical Autoconf usage. @@ -80,7 +95,7 @@ Version 1.4.8b - 24 Feb 2007, by Eric Blake (CVS version 1.4.8a) * The `-E'/`--fatal-warnings' command-line option now has two levels. When specified only once, warnings affect exit status, but execution continues, so that you can see all warnings instead of fixing them one - at a time. To acheive 1.4.8 behavior, where the first warning + at a time. To achieve 1.4.8 behavior, where the first warning immediately exits, specify -E twice on the command line. * A new `--warn-macro-sequence' command-line option allows detection of sequences in `define' and `pushdef' definitions that match an optional diff --git a/doc/m4.texinfo b/doc/m4.texinfo index ba37c462..b271e92a 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -2213,11 +2213,17 @@ defn([l], [r]) @cindex builtins, special tokens @cindex tokens, builtin macro -Using @code{defn} to generate special tokens for builtin macros outside -of expected contexts can sometimes trigger warnings. But most of the -time, such tokens are silently converted to the empty string. +Using @code{defn} to generate special tokens for builtin macros will +generate a warning in contexts where a macro name is expected. But in +contexts that operate on text, the builtin token is just silently +converted to an empty string. As of M4 1.4.11, expansion of user macros +will also preserve builtin tokens. However, any use of builtin tokens +outside of the second argument to @code{define} and @code{pushdef} is +generally not portable, since earlier @acronym{GNU} M4 versions, as well +as other @code{m4} implementations, vary on how such tokens are treated. @example +$ @kbd{m4 -d} defn(`defn') @result{} define(defn(`divnum'), `cannot redefine a builtin token') @@ -2225,12 +2231,50 @@ define(defn(`divnum'), `cannot redefine a builtin token') @result{} divnum @result{}0 +len(defn(`divnum')) +@result{}0 define(`echo', `$@@') @result{} -define(`mydivnum', echo(defn(`divnum'))) +define(`mydivnum', shift(echo(`', defn(`divnum')))) @result{} mydivnum +@result{}0 +define(`', `empty-$1') +@result{} +defn(defn(`divnum')) +@error{}m4:stdin:9: Warning: defn: invalid macro name ignored +@result{} +pushdef(defn(`divnum'), `oops') +@error{}m4:stdin:10: Warning: pushdef: invalid macro name ignored +@result{} +traceon(defn(`divnum')) +@error{}m4:stdin:11: Warning: traceon: invalid macro name ignored +@result{} +indir(defn(`divnum'), `string') +@error{}m4:stdin:12: Warning: indir: invalid macro name ignored +@result{} +indir(`', `string') +@result{}empty-string +traceoff(defn(`divnum')) +@error{}m4:stdin:14: Warning: traceoff: invalid macro name ignored +@result{} +popdef(defn(`divnum')) +@error{}m4:stdin:15: Warning: popdef: invalid macro name ignored +@result{} +dumpdef(defn(`divnum')) +@error{}m4:stdin:16: Warning: dumpdef: invalid macro name ignored @result{} +undefine(defn(`divnum')) +@error{}m4:stdin:17: Warning: undefine: invalid macro name ignored +@result{} +dumpdef(`') +@error{}:@tabchar{}`empty-$1' +@result{} +define(`foo', `define(`$1', $2)')dnl +foo(`bar', defn(`divnum')) +@result{} +bar +@result{}0 @end example Also note that @code{defn} with multiple arguments can only join text @@ -2541,6 +2585,23 @@ builtin(`builtin',) builtin(`include', `foo')dnl @result{}bar @end example + +@comment And this example triggers a regression present in 1.4.10b. + +@example +define(`s', `builtin(`shift', $@@)')dnl +define(`loop', `ifelse(`$2', `', `-', `$1$2: $0(`$1', s(s($@@)))')')dnl +loop(`1') +@result{}- +loop(`1', `2') +@result{}12: - +loop(`1', `2', `3') +@result{}12: 13: - +loop(`1', `2', `3', `4') +@result{}12: 13: 14: - +loop(`1', `2', `3', `4', `5') +@result{}12: 13: 14: 15: - +@end example @end ignore @node Conditionals @@ -2588,6 +2649,22 @@ ifdef(`no_such_macro', `yes', `no', `extra argument') @result{}no @end example +As of M4 1.4.11, @code{ifdef} transparently handles builtin tokens +generated by @code{defn} (@pxref{Defn}) that occur in either +@var{string}, although a warning is issued for invalid macro names. + +@example +define(`', `empty') +@result{} +ifdef(defn(`defn'), `yes', `no') +@error{}m4:stdin:2: Warning: ifdef: invalid macro name ignored +@result{}no +define(`foo', ifdef(`divnum', defn(`divnum'), `undefined')) +@result{} +foo +@result{}0 +@end example + @node Ifelse @section If-else construct, or multibranch @@ -2688,6 +2765,24 @@ ifelse(`foo', `bar', `3', `gnu', `gnats', `6', `7', `8') @result{}7 @end example +As of M4 1.4.11, @code{ifelse} transparently handles builtin tokens +generated by @code{defn} (@pxref{Defn}). Because of this, it is always +safe to compare two macro definitions, without worrying whether the +macro might be a builtin. + +@example +ifelse(defn(`defn'), `', `yes', `no') +@result{}no +ifelse(defn(`defn'), defn(`divnum'), `yes', `no') +@result{}no +ifelse(defn(`defn'), defn(`defn'), `yes', `no') +@result{}yes +define(`foo', ifelse(`', `', defn(`divnum'))) +@result{} +foo +@result{}0 +@end example + @ignore @comment Stress tests, not worth documenting. @@ -2746,8 +2841,8 @@ ifelse(`-01234567890123456789', `-'e(long)`-', `yes', `no') @result{}no @end example -@comment It would be nice to pass builtin tokens through ifelse, m4wrap, -@comment user macros; hence the fixmes. +@comment It would be nice to pass builtin tokens through m4wrap, as well +@comment as allowing concatenation of builtins in ifelse and user macros. @example define(`e', `$@@')define(`q', ``$@@'')define(`u', `$*') @result{} @@ -2757,28 +2852,40 @@ cmp(`defn(`defn')', `defn(`d')') @result{}yes cmp(`defn(`defn')', ``<defn>'') @result{}no -cmp(`q(defn(`defn'))', `q(defn(`d'))') -@result{}yes -cmp(`q(defn(`defn'))', `q(`<defn>')') -@result{}no -cmp(`q(defn(`defn'))', ``'') -@result{}no -cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', defn(`d'))') -@result{}yes -cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', `<defn>')') -@result{}no -cmp(`q(`1', `2', defn(`defn'))', ```1',`2',<defn>'') -@result{}no +cmp(`q(defn(`defn'))', `q(defn(`d'))')-fixme +@error{}m4:stdin:5: Warning: ifelse: cannot quote builtin +@error{}m4:stdin:5: Warning: ifelse: cannot quote builtin +@result{}yes-fixme +cmp(`q(defn(`defn'))', `q(`<defn>')')-fixme +@error{}m4:stdin:6: Warning: ifelse: cannot quote builtin +@result{}no-fixme +cmp(`q(defn(`defn'))', ``'')-fixme +@error{}m4:stdin:7: Warning: ifelse: cannot quote builtin +@result{}no-fixme +cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', defn(`d'))')-fixme +@error{}m4:stdin:8: Warning: ifelse: cannot quote builtin +@error{}m4:stdin:8: Warning: ifelse: cannot quote builtin +@result{}yes-fixme +cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', `<defn>')')-fixme +@error{}m4:stdin:9: Warning: ifelse: cannot quote builtin +@result{}no-fixme +cmp(`q(`1', `2', defn(`defn'))', ```1',`2',<defn>'')-fixme +@error{}m4:stdin:10: Warning: ifelse: cannot quote builtin +@result{}no-fixme cmp(`q(`1', `2', defn(`defn'))', ```1',`2',`''')-fixme +@error{}m4:stdin:11: Warning: ifelse: cannot quote builtin @result{}yes-fixme define(`cat', `$1`'ifelse(`$#', `1', `', `$0(shift($@@))')') @result{} cat(`define(`foo',', defn(`divnum'), `)foo')-fixme +@error{}m4:stdin:13: Warning: ifelse: cannot quote builtin @result{}-fixme cat(e(`define(`bar',', defn(`divnum'), `)bar'))-fixme +@error{}m4:stdin:14: Warning: ifelse: cannot quote builtin @result{}-fixme m4wrap(`u('q(`cat(`define(`baz','', defn(`divnum'), ``)baz')')`)-fixme ') +@error{}m4:stdin:15: Warning: m4wrap: cannot quote builtin @result{} ^D @result{}-fixme @@ -3570,7 +3677,7 @@ echo(`1', `long string') @result{}1,long string indir(`echo', defn(`changequote')) @error{}m4trace: -2- defn(`change...') -@error{}m4trace: -1- indir(`echo', <changequote>) -> ``'' +@error{}m4trace: -1- indir(`echo', <changequote>) -> ``<changequote>'' @result{} @end example @@ -7382,6 +7489,66 @@ foreachq(`x', ```active'', ``active''', `<x> @result{}<active> @end example +@ignore +@comment Not worth putting in the manual, but make sure that performance +@comment on recursive algorithms is not quadratic. + +@comment boxed recursion + +@comment examples +@comment options: -Dlimit=10 -Dverbose +@example +$ @kbd {m4 -I examples -Dlimit=10 -Dverbose} +include(`loop.m4')dnl +@result{} 1 2 3 4 5 6 7 8 9 10 +@end example + +@comment examples +@comment options: -Dlimit=2500 +@example +$ @kbd {m4 -I examples -Dlimit=2500} +include(`loop.m4')dnl +@end example + +@comment examples +@comment options: -Dlimit=10000 +@example +$ @kbd {m4 -I examples -Dlimit=10000} +define(`debug', `define(`popdef',`divert`'i')') +@result{} +include(`loop.m4')dnl +@result{}10000 +@end example + +@comment unboxed recursion + +@comment examples +@comment options: -Dlimit=10 -Dverbose -Dalt +@example +$ @kbd {m4 -I examples -Dlimit=10 -Dverbose -Dalt} +include(`loop.m4')dnl +@result{} 1 2 3 4 5 6 7 8 9 10 +@end example + +@comment examples +@comment options: -Dlimit=2500 -Dalt +@example +$ @kbd {m4 -I examples -Dlimit=2500 -Dalt} +include(`loop.m4')dnl +@end example + +@comment examples +@comment options: -Dlimit=10000 -Dalt +@example +$ @kbd {m4 -I examples -Dlimit=10000 -Dalt} +define(`debug', `define(`popdef',`divert`'i')') +@result{} +include(`loop.m4')dnl +@result{}10000 +@end example + +@end ignore + @node Improved cleardivert @section Solution for @code{cleardivert} diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index be1c1217..1a3434c0 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -15,11 +15,11 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix +# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnupload gpl-3.0 intprops memchr2 memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([local]) -gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix]) +gl_MODULES([announce-gen assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnupload gpl-3.0 intprops memchr2 memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix]) gl_AVOID([]) gl_SOURCE_BASE([lib]) gl_M4_BASE([m4]) diff --git a/src/builtin.c b/src/builtin.c index beb8e350..9dbb4693 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -106,17 +106,17 @@ builtin_tab[] = { "debugfile", true, false, false, m4_debugfile }, { "decr", false, false, true, m4_decr }, { "define", false, true, true, m4_define }, - { "defn", false, false, true, m4_defn }, + { "defn", false, true, true, m4_defn }, { "divert", false, false, false, m4_divert }, { "divnum", false, false, false, m4_divnum }, { "dnl", false, false, false, m4_dnl }, - { "dumpdef", false, false, false, m4_dumpdef }, + { "dumpdef", false, true, false, m4_dumpdef }, { "errprint", false, false, true, m4_errprint }, { "esyscmd", true, false, true, m4_esyscmd }, { "eval", false, false, true, m4_eval }, { "format", true, false, true, m4_format }, - { "ifdef", false, false, true, m4_ifdef }, - { "ifelse", false, false, true, m4_ifelse }, + { "ifdef", false, true, true, m4_ifdef }, + { "ifelse", false, true, true, m4_ifelse }, { "include", false, false, true, m4_include }, { "incr", false, false, true, m4_incr }, { "index", false, false, true, m4_index }, @@ -127,18 +127,18 @@ builtin_tab[] = { "maketemp", false, false, true, m4_maketemp }, { "mkstemp", false, false, true, m4_mkstemp }, { "patsubst", true, false, true, m4_patsubst }, - { "popdef", false, false, true, m4_popdef }, + { "popdef", false, true, true, m4_popdef }, { "pushdef", false, true, true, m4_pushdef }, { "regexp", true, false, true, m4_regexp }, - { "shift", false, false, true, m4_shift }, + { "shift", false, true, true, m4_shift }, { "sinclude", false, false, true, m4_sinclude }, { "substr", false, false, true, m4_substr }, { "syscmd", false, false, true, m4_syscmd }, { "sysval", false, false, false, m4_sysval }, - { "traceoff", false, false, false, m4_traceoff }, - { "traceon", false, false, false, m4_traceon }, + { "traceoff", false, true, false, m4_traceoff }, + { "traceon", false, true, false, m4_traceon }, { "translit", false, false, true, m4_translit }, - { "undefine", false, false, true, m4_undefine }, + { "undefine", false, true, true, m4_undefine }, { "undivert", false, false, false, m4_undivert }, { 0, false, false, false, 0 }, @@ -308,7 +308,7 @@ compile_pattern (const char *str, size_t len, struct re_pattern_buffer **buf, } /* Next, check if STR can be compiled. */ - new_buf = xzalloc (sizeof *new_buf); + new_buf = (struct re_pattern_buffer *) xzalloc (sizeof *new_buf); msg = re_compile_pattern (str, len, new_buf); #ifdef DEBUG_REGEX if (trace_file) @@ -320,6 +320,8 @@ compile_pattern (const char *str, size_t len, struct re_pattern_buffer **buf, free (new_buf); return msg; } + /* Use a fastmap for speed; it is freed by regfree. */ + new_buf->fastmap = xcharalloc (UCHAR_MAX + 1); /* Now, find a victim slot. Decrease the count of all entries, then prime the count of the victim slot at REGEX_CACHE_SIZE. This @@ -438,6 +440,7 @@ define_user_macro (const char *name, size_t name_len, const char *text, SYMBOL_TYPE (s) = TOKEN_TEXT; SYMBOL_TEXT (s) = defn; + SYMBOL_MACRO_ARGS (s) = true; /* Implement --warn-macro-sequence. */ if (macro_sequence_inuse && text) @@ -691,11 +694,15 @@ m4_define (struct obstack *obs, int argc, macro_arguments *argv) static void m4_undefine (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); int i; - if (bad_argc (ARG (0), argc, 1, -1)) + if (bad_argc (me, argc, 1, -1)) return; for (i = 1; i < argc; i++) - lookup_symbol (ARG (i), SYMBOL_DELETE); + if (arg_type (argv, i) != TOKEN_TEXT) + m4_warn (0, me, _("invalid macro name ignored")); + else + lookup_symbol (ARG (i), SYMBOL_DELETE); } static void @@ -707,11 +714,15 @@ m4_pushdef (struct obstack *obs, int argc, macro_arguments *argv) static void m4_popdef (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); int i; - if (bad_argc (ARG (0), argc, 1, -1)) + if (bad_argc (me, argc, 1, -1)) return; for (i = 1; i < argc; i++) - lookup_symbol (ARG (i), SYMBOL_POPDEF); + if (arg_type (argv, i) != TOKEN_TEXT) + m4_warn (0, me, _("invalid macro name ignored")); + else + lookup_symbol (ARG (i), SYMBOL_POPDEF); } /*---------------------. @@ -721,10 +732,17 @@ m4_popdef (struct obstack *obs, int argc, macro_arguments *argv) static void m4_ifdef (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); symbol *s; - if (bad_argc (ARG (0), argc, 2, 3)) + if (bad_argc (me, argc, 2, 3)) return; + if (arg_type (argv, 1) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + push_arg (obs, argv, 3); + return; + } s = lookup_symbol (ARG (1), SYMBOL_LOOKUP); push_arg (obs, argv, (s && SYMBOL_TYPE (s) != TOKEN_VOID) ? 2 : 3); } @@ -832,6 +850,11 @@ m4_dumpdef (struct obstack *obs, int argc, macro_arguments *argv) { for (i = 1; i < argc; i++) { + if (arg_type (argv, i) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + continue; + } s = lookup_symbol (ARG (i), SYMBOL_LOOKUP); if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID) dump_symbol (s, &data); @@ -966,6 +989,11 @@ m4_defn (struct obstack *obs, int argc, macro_arguments *argv) for (i = 1; i < argc; i++) { + if (arg_type (argv, i) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + continue; + } s = lookup_symbol (ARG (i), SYMBOL_LOOKUP); if (s == NULL) continue; @@ -1287,10 +1315,10 @@ m4_dnl (struct obstack *obs, int argc, macro_arguments *argv) skip_line (me); } -/*-------------------------------------------------------------------------. -| Shift all argument one to the left, discarding the first argument. Each | -| output argument is quoted with the current quotes. | -`-------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------. +| Shift all arguments one to the left, discarding the first | +| argument. Each output argument is quoted with the current quotes. | +`--------------------------------------------------------------------*/ static void m4_shift (struct obstack *obs, int argc, macro_arguments *argv) @@ -1622,6 +1650,7 @@ set_trace (symbol *sym, void *data) static void m4_traceon (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); symbol *s; int i; @@ -1630,6 +1659,11 @@ m4_traceon (struct obstack *obs, int argc, macro_arguments *argv) else for (i = 1; i < argc; i++) { + if (arg_type (argv, i) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + continue; + } s = lookup_symbol (ARG (i), SYMBOL_INSERT); set_trace (s, obs); } @@ -1642,6 +1676,7 @@ m4_traceon (struct obstack *obs, int argc, macro_arguments *argv) static void m4_traceoff (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); symbol *s; int i; @@ -1650,6 +1685,11 @@ m4_traceoff (struct obstack *obs, int argc, macro_arguments *argv) else for (i = 1; i < argc; i++) { + if (arg_type (argv, i) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + continue; + } s = lookup_symbol (ARG (i), SYMBOL_LOOKUP); if (s != NULL) set_trace (s, NULL); diff --git a/src/input.c b/src/input.c index 6078b54f..cc40a587 100644 --- a/src/input.c +++ b/src/input.c @@ -364,7 +364,7 @@ push_token (token_data *token, int level, bool inuse) return false; } } - else + else if (TOKEN_DATA_TYPE (token) != TOKEN_FUNC) { /* For composite tokens, if argv is already in use, creating additional references for long text segments is more @@ -410,8 +410,23 @@ push_token (token_data *token, int level, bool inuse) adjust_refcount (level, true); inuse = true; } + else if (TOKEN_DATA_TYPE (token) == TOKEN_FUNC) + { + chain = (token_chain *) obstack_alloc (current_input, sizeof *chain); + if (next->u.u_c.end) + next->u.u_c.end->next = chain; + else + next->u.u_c.chain = chain; + next->u.u_c.end = chain; + chain->next = NULL; + chain->type = CHAIN_FUNC; + chain->quote_age = 0; + chain->u.func = TOKEN_DATA_FUNC (token); + } while (src_chain) { + // TODO support func concatenation + assert (src_chain->type != CHAIN_FUNC); if (level == -1) { /* Nothing to copy, since link already lives on obstack. */ @@ -571,7 +586,8 @@ pop_input (bool cleanup) break; case INPUT_MACRO: - if (!cleanup) + assert (!isp->u.func || !cleanup); + if (isp->u.func) return false; break; @@ -588,6 +604,10 @@ pop_input (bool cleanup) if (chain->u.u_s.level >= 0) adjust_refcount (chain->u.u_s.level, false); break; + case CHAIN_FUNC: + if (chain->u.func) + return false; + break; case CHAIN_ARGV: if (chain->u.u_a.index < arg_argc (chain->u.u_a.argv)) return false; @@ -708,6 +728,10 @@ input_print (struct obstack *obs, const input_block *input) &maxlen)) return; break; + case CHAIN_FUNC: + func_print (obs, find_builtin_by_addr (chain->u.func), false, + NULL); + break; case CHAIN_ARGV: assert (!chain->u.u_a.comma); if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, @@ -770,7 +794,9 @@ peek_input (bool allow_argv) break; case INPUT_MACRO: - return CHAR_MACRO; + if (block->u.func) + return CHAR_MACRO; + break; case INPUT_CHAIN: chain = block->u.u_c.chain; @@ -783,6 +809,10 @@ peek_input (bool allow_argv) if (chain->u.u_s.len) return to_uchar (*chain->u.u_s.str); break; + case CHAIN_FUNC: + if (chain->u.func) + return CHAR_MACRO; + break; case CHAIN_ARGV: argc = arg_argc (chain->u.u_a.argv); if (chain->u.u_a.index == argc) @@ -891,9 +921,9 @@ next_char_1 (bool allow_quote) break; case INPUT_MACRO: - /* INPUT_MACRO input sources has only one token */ - pop_input (true); - return CHAR_MACRO; + if (isp->u.func) + return CHAR_MACRO; + break; case INPUT_CHAIN: chain = isp->u.u_c.chain; @@ -914,6 +944,10 @@ next_char_1 (bool allow_quote) if (chain->u.u_s.level >= 0) adjust_refcount (chain->u.u_s.level, false); break; + case CHAIN_FUNC: + if (chain->u.func) + return CHAR_MACRO; + break; case CHAIN_ARGV: if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv)) { @@ -984,17 +1018,37 @@ skip_line (const char *name) input_change = true; } -/*-------------------------------------------------------------------. -| When a MACRO token is seen, next_token () uses init_macro_token () | -| to retrieve the value of the function pointer and store it in TD. | -`-------------------------------------------------------------------*/ +/*------------------------------------------------------------------. +| When next_token() sees a builtin token with peek_input, this | +| retrieves the value of the function pointer, stores it in TD, and | +| consumes the input so the caller does not need to do next_char. | +| If TD is NULL, discard the token instead. | +`------------------------------------------------------------------*/ static void init_macro_token (token_data *td) { - assert (isp->type == INPUT_MACRO); - TOKEN_DATA_TYPE (td) = TOKEN_FUNC; - TOKEN_DATA_FUNC (td) = isp->u.func; + int ch = next_char (false); + assert (ch == CHAR_MACRO); + if (td) + TOKEN_DATA_TYPE (td) = TOKEN_FUNC; + if (isp->type == INPUT_MACRO) + { + assert (isp->u.func); + if (td) + TOKEN_DATA_FUNC (td) = isp->u.func; + isp->u.func = NULL; + } + else + { + token_chain *chain; + assert (isp->type == INPUT_CHAIN); + chain = isp->u.u_c.chain; + assert (!chain->quote_age && chain->type == CHAIN_FUNC && chain->u.func); + if (td) + TOKEN_DATA_FUNC (td) = chain->u.func; + chain->u.func = NULL; + } } /*-------------------------------------------------------------------. @@ -1518,7 +1572,6 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, if (ch == CHAR_MACRO) { init_macro_token (td); - next_char (false); #ifdef DEBUG_INPUT xfprintf (stderr, "next_token -> MACDEF (%s)\n", find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name); @@ -1545,20 +1598,30 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, if (obs) obs_td = obs; obstack_grow (obs_td, curr_comm.str1, curr_comm.len1); - while ((ch = next_char (false)) < CHAR_EOF - && !MATCH (ch, curr_comm.str2, true)) - obstack_1grow (obs_td, ch); - if (ch != CHAR_EOF) + while (1) { + ch = next_char (false); + if (ch == CHAR_EOF) + /* Current_file changed to "" if we see CHAR_EOF, use the + previous value we stored earlier. */ + m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller, + _("end of file in comment")); + if (ch == CHAR_MACRO) + { + // TODO support concatenation of builtins + m4_warn_at_line (0, file, *line, caller, + _("cannot comment builtin")); + init_macro_token (NULL); + continue; + } + if (MATCH (ch, curr_comm.str2, true)) + { + obstack_grow (obs_td, curr_comm.str2, curr_comm.len2); + break; + } assert (ch < CHAR_EOF); - obstack_grow (obs_td, curr_comm.str2, curr_comm.len2); + obstack_1grow (obs_td, ch); } - else - /* Current_file changed to "" if we see CHAR_EOF, use the - previous value we stored earlier. */ - m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller, - _("end of file in comment")); - type = TOKEN_STRING; } else if (default_word_regexp && (isalpha (ch) || ch == '_')) @@ -1610,6 +1673,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, else if (!MATCH (ch, curr_quote.str1, true)) { + assert (ch < CHAR_EOF); switch (ch) { case '(': @@ -1632,6 +1696,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, if (obs) obs_td = obs; quote_level = 1; + type = TOKEN_STRING; while (1) { ch = next_char (obs != NULL && current_quote_age); @@ -1641,6 +1706,35 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller, _("end of file in string")); + if (ch == CHAR_MACRO) + { + // TODO support concatenation of builtins + if (obstack_object_size (obs_td) == 0 + && TOKEN_DATA_TYPE (td) == TOKEN_VOID) + { + assert (quote_level == 1); + init_macro_token (td); + ch = peek_input (false); + if (MATCH (ch, curr_quote.str2, false)) + { +#ifdef DEBUG_INPUT + const builtin *bp + = find_builtin_by_addr (TOKEN_DATA_FUNC (td)); + xfprintf (stderr, "next_token -> MACDEF (%s)\n", + bp->name); +#endif + ch = next_char (false); + MATCH (ch, curr_quote.str2, true); + return TOKEN_MACDEF; + } + TOKEN_DATA_TYPE (td) = TOKEN_VOID; + } + else + init_macro_token (NULL); + m4_warn_at_line (0, file, *line, caller, + _("cannot quote builtin")); + continue; + } if (ch == CHAR_QUOTE) append_quote_token (obs, td); else if (MATCH (ch, curr_quote.str2, true)) @@ -1660,7 +1754,6 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, obstack_1grow (obs_td, ch); } } - type = TOKEN_STRING; } if (TOKEN_DATA_TYPE (td) == TOKEN_VOID) @@ -1709,6 +1802,9 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, xfprintf (stderr, "%s", chain->u.u_s.str); len += chain->u.u_s.len; break; + case CHAIN_FUNC: + xfprintf (stderr, "<func>"); + break; case CHAIN_ARGV: xfprintf (stderr, "{$@}"); break; @@ -161,30 +161,30 @@ extern FILE *debug; /* The value of debug_level is a bitmask of the following. */ /* a: show arglist in trace output */ -#define DEBUG_TRACE_ARGS 1 +#define DEBUG_TRACE_ARGS 0x001 /* e: show expansion in trace output */ -#define DEBUG_TRACE_EXPANSION 2 +#define DEBUG_TRACE_EXPANSION 0x002 /* q: quote args and expansion in trace output */ -#define DEBUG_TRACE_QUOTE 4 +#define DEBUG_TRACE_QUOTE 0x004 /* t: trace all macros -- overrides trace{on,off} */ -#define DEBUG_TRACE_ALL 8 +#define DEBUG_TRACE_ALL 0x008 /* l: add line numbers to trace output */ -#define DEBUG_TRACE_LINE 16 +#define DEBUG_TRACE_LINE 0x010 /* f: add file name to trace output */ -#define DEBUG_TRACE_FILE 32 +#define DEBUG_TRACE_FILE 0x020 /* p: trace path search of include files */ -#define DEBUG_TRACE_PATH 64 +#define DEBUG_TRACE_PATH 0x040 /* c: show macro call before args collection */ -#define DEBUG_TRACE_CALL 128 +#define DEBUG_TRACE_CALL 0x080 /* i: trace changes of input files */ -#define DEBUG_TRACE_INPUT 256 +#define DEBUG_TRACE_INPUT 0x100 /* x: add call id to trace output */ -#define DEBUG_TRACE_CALLID 512 +#define DEBUG_TRACE_CALLID 0x200 /* V: very verbose -- print everything */ -#define DEBUG_TRACE_VERBOSE 1023 +#define DEBUG_TRACE_VERBOSE 0x377 /* default flags -- equiv: aeq */ -#define DEBUG_TRACE_DEFAULT 7 +#define DEBUG_TRACE_DEFAULT 0x007 #define DEBUG_PRINT1(Fmt, Arg1) \ do \ @@ -283,7 +283,7 @@ enum token_data_type enum token_chain_type { CHAIN_STR, /* Link contains a string, u.u_s is valid. */ - // TODO add CHAIN_FUNC + CHAIN_FUNC, /* Builtin function definition, u.func is valid. */ CHAIN_ARGV /* Link contains a $@ reference, u.u_a is valid. */ }; @@ -304,6 +304,7 @@ struct token_chain int level; /* Expansion level of link content, or -1. */ } u_s; + builtin_func *func; /* Builtin token from defn. */ struct { macro_arguments *argv; /* Reference to earlier $@. */ diff --git a/src/macro.c b/src/macro.c index 7a79b766..d6e92728 100644 --- a/src/macro.c +++ b/src/macro.c @@ -470,6 +470,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) assert (paren_level == 0 && TOKEN_DATA_TYPE (argp) == TOKEN_VOID && obstack_object_size (obs) == 0 && td.u.u_c.chain == td.u.u_c.end + && td.u.u_c.chain->quote_age == age && td.u.u_c.chain->type == CHAIN_ARGV); TOKEN_DATA_TYPE (argp) = TOKEN_COMP; argp->u.u_c.chain = argp->u.u_c.end = td.u.u_c.chain; @@ -581,7 +582,8 @@ collect_arguments (symbol *sym, struct obstack *arguments, argv->wrapper = args.wrapper; argv->has_ref = args.has_ref; argv->has_func = args.has_func; - if (args.quote_age != quote_age ()) + // TODO allow funcs without crippling quote age + if (args.quote_age != quote_age () || args.has_func) argv->quote_age = 0; argv->arraylen = args.arraylen; return argv; @@ -661,8 +663,10 @@ expand_macro (symbol *sym) if (!stacks[level].args) { assert (!stacks[level].refcount); - stacks[level].args = xmalloc (sizeof (struct obstack)); - stacks[level].argv = xmalloc (sizeof (struct obstack)); + stacks[level].args = + (struct obstack *) xmalloc (sizeof *stacks[level].args); + stacks[level].argv = + (struct obstack *) xmalloc (sizeof *stacks[level].argv); obstack_init (stacks[level].args); obstack_init (stacks[level].argv); stacks[level].args_base = obstack_finish (stacks[level].args); @@ -814,9 +818,10 @@ arg_adjust_refcount (macro_arguments *argv, bool increase) /* Given ARGV, return the token_data that contains argument INDEX; INDEX must be > 0, < argv->argc. If LEVEL is non-NULL, *LEVEL is set to the obstack level that contains the token (which is not - necessarily the level of ARGV). */ + necessarily the level of ARGV). If FLATTEN, avoid returning a + builtin function. */ static token_data * -arg_token (macro_arguments *argv, unsigned int index, int *level) +arg_token (macro_arguments *argv, unsigned int index, int *level, bool flatten) { unsigned int i; token_data *token; @@ -824,8 +829,14 @@ arg_token (macro_arguments *argv, unsigned int index, int *level) assert (index && index < argv->argc); if (level) *level = argv->level; + flatten |= argv->flatten; if (!argv->wrapper) - return argv->array[index - 1]; + { + token = argv->array[index - 1]; + if (flatten && TOKEN_DATA_TYPE (token) == TOKEN_FUNC) + token = &empty_token; + return token; + } /* Must cycle through all tokens, until we find index, since a ref may occupy multiple indices. */ @@ -840,10 +851,8 @@ arg_token (macro_arguments *argv, unsigned int index, int *level) - chain->u.u_a.skip_last)) { token = arg_token (chain->u.u_a.argv, - chain->u.u_a.index - 1 + index, level); - if (chain->u.u_a.flatten - && TOKEN_DATA_TYPE (token) == TOKEN_FUNC) - token = &empty_token; + chain->u.u_a.index - 1 + index, level, + flatten || chain->u.u_a.flatten); break; } index -= (chain->u.u_a.argv->argc - chain->u.u_a.index @@ -895,7 +904,7 @@ arg_type (macro_arguments *argv, unsigned int index) if (argv->flatten || !argv->has_func || index == 0 || index >= argv->argc) return TOKEN_TEXT; - token = arg_token (argv, index, NULL); + token = arg_token (argv, index, NULL, false); type = TOKEN_DATA_TYPE (token); if (type == TOKEN_COMP && !token->u.u_c.has_func) type = TOKEN_TEXT; @@ -921,7 +930,7 @@ arg_text (macro_arguments *argv, unsigned int index) return argv->argv0; if (index >= argv->argc) return ""; - token = arg_token (argv, index, NULL); + token = arg_token (argv, index, NULL, false); switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: @@ -937,11 +946,16 @@ arg_text (macro_arguments *argv, unsigned int index) case CHAIN_STR: obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); break; + case CHAIN_FUNC: + // TODO concatenate builtins + assert (!"implemented"); + abort (); case CHAIN_ARGV: arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, quote_cache (NULL, chain->quote_age, chain->u.u_a.quotes), - chain->u.u_a.flatten, NULL, NULL, false); + argv->flatten || chain->u.u_a.flatten, NULL, NULL, + false); break; default: assert (!"arg_text"); @@ -951,6 +965,7 @@ arg_text (macro_arguments *argv, unsigned int index) } obstack_1grow (obs, '\0'); return (char *) obstack_finish (obs); + case TOKEN_FUNC: default: break; } @@ -965,8 +980,8 @@ arg_text (macro_arguments *argv, unsigned int index) bool arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) { - token_data *ta = arg_token (argv, indexa, NULL); - token_data *tb = arg_token (argv, indexb, NULL); + token_data *ta = arg_token (argv, indexa, NULL, false); + token_data *tb = arg_token (argv, indexb, NULL, false); token_chain tmpa; token_chain tmpb; token_chain *ca = &tmpa; @@ -983,30 +998,45 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) TOKEN_DATA_LEN (ta)) == 0); /* Convert both arguments to chains, if not one already. */ - // TODO - allow builtin tokens in the comparison? - if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT) + switch (TOKEN_DATA_TYPE (ta)) { + case TOKEN_TEXT: tmpa.next = NULL; tmpa.type = CHAIN_STR; tmpa.u.u_s.str = TOKEN_DATA_TEXT (ta); tmpa.u.u_s.len = TOKEN_DATA_LEN (ta); - } - else - { - assert (TOKEN_DATA_TYPE (ta) == TOKEN_COMP); + break; + case TOKEN_FUNC: + tmpa.next = NULL; + tmpa.type = CHAIN_FUNC; + tmpa.u.func = TOKEN_DATA_FUNC (ta); + break; + case TOKEN_COMP: ca = ta->u.u_c.chain; + break; + default: + assert (!"arg_equal"); + abort (); } - if (TOKEN_DATA_TYPE (tb) == TOKEN_TEXT) + switch (TOKEN_DATA_TYPE (tb)) { + case TOKEN_TEXT: tmpb.next = NULL; tmpb.type = CHAIN_STR; tmpb.u.u_s.str = TOKEN_DATA_TEXT (tb); tmpb.u.u_s.len = TOKEN_DATA_LEN (tb); - } - else - { - assert (TOKEN_DATA_TYPE (tb) == TOKEN_COMP); + break; + case TOKEN_FUNC: + tmpb.next = NULL; + tmpb.type = CHAIN_FUNC; + tmpb.u.func = TOKEN_DATA_FUNC (tb); + break; + case TOKEN_COMP: cb = tb->u.u_c.chain; + break; + default: + assert (!"arg_equal"); + abort (); } /* Compare each link of the chain. */ @@ -1040,6 +1070,14 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) cb = &tmpb; continue; } + if (ca->type == CHAIN_FUNC) + { + if (cb->type != CHAIN_FUNC || ca->u.func != cb->u.func) + return false; + ca = ca->next; + cb = cb->next; + continue; + } assert (ca->type == CHAIN_STR && cb->type == CHAIN_STR); if (ca->u.u_s.len == cb->u.u_s.len) { @@ -1089,7 +1127,7 @@ arg_empty (macro_arguments *argv, unsigned int index) return argv->argv0_len == 0; if (index >= argv->argc) return true; - return arg_token (argv, index, NULL) == &empty_token; + return arg_token (argv, index, NULL, false) == &empty_token; } /* Given ARGV, return the length of argument INDEX. Abort if the @@ -1105,7 +1143,7 @@ arg_len (macro_arguments *argv, unsigned int index) return argv->argv0_len; if (index >= argv->argc) return 0; - token = arg_token (argv, index, NULL); + token = arg_token (argv, index, NULL, false); switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: @@ -1124,6 +1162,10 @@ arg_len (macro_arguments *argv, unsigned int index) case CHAIN_STR: len += chain->u.u_s.len; break; + case CHAIN_FUNC: + // TODO concatenate builtins + assert (!"implemented"); + abort (); case CHAIN_ARGV: i = chain->u.u_a.index; limit = chain->u.u_a.argv->argc - i - chain->u.u_a.skip_last; @@ -1136,8 +1178,8 @@ arg_len (macro_arguments *argv, unsigned int index) while (limit--) { // TODO handle builtin concatenation - if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, - NULL)) == TOKEN_FUNC) + if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, NULL, + false)) == TOKEN_FUNC) assert (argv->flatten); else len += arg_len (chain->u.u_a.argv, i); @@ -1152,6 +1194,7 @@ arg_len (macro_arguments *argv, unsigned int index) } assert (len); return len; + case TOKEN_FUNC: default: break; } @@ -1166,7 +1209,7 @@ arg_func (macro_arguments *argv, unsigned int index) { token_data *token; - token = arg_token (argv, index, NULL); + token = arg_token (argv, index, NULL, false); assert (TOKEN_DATA_TYPE (token) == TOKEN_FUNC); return TOKEN_DATA_FUNC (token); } @@ -1215,7 +1258,7 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index, if (use_sep && obstack_print (obs, sep, sep_len, plen)) return true; use_sep = true; - token = arg_token (argv, i, NULL); + token = arg_token (argv, i, NULL, flatten); switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: @@ -1241,6 +1284,10 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index, &len)) done = true; break; + case CHAIN_FUNC: + func_print (obs, find_builtin_by_addr (chain->u.func), + flatten, quotes); + break; case CHAIN_ARGV: if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, quote_cache (NULL, chain->quote_age, @@ -1298,8 +1345,9 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level, unsigned int i; for (i = 0; i < argv->arraylen; i++) { - if (TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP - && argv->array[i]->u.u_c.wrapper) + if ((TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP + && argv->array[i]->u.u_c.wrapper) + || level >= 0) break; if (index == 1) { @@ -1400,8 +1448,8 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len, return new_argv; } -/* Push argument INDEX from ARGV, which must be a text token, onto the - expansion stack OBS for rescanning. */ +/* Push argument INDEX from ARGV onto the expansion stack OBS for + rescanning. */ void push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) { @@ -1417,18 +1465,17 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) push_arg_quote (obs, argv, index, NULL); } -/* Push argument INDEX from ARGV, which must be a text token, onto the - expansion stack OBS for rescanning. INDEX must be > 0, < argc. - QUOTES determines any quote delimiters that were in effect when the - reference was created. */ +/* Push argument INDEX from ARGV onto the expansion stack OBS for + rescanning. INDEX must be > 0, < argc. QUOTES determines any + quote delimiters that were in effect when the reference was + created. */ void push_arg_quote (struct obstack *obs, macro_arguments *argv, unsigned int index, const string_pair *quotes) { int level; - token_data *token = arg_token (argv, index, &level); + token_data *token = arg_token (argv, index, &level, false); - // TODO handle func tokens? if (quotes) obstack_grow (obs, quotes->str1, quotes->len1); if (push_token (token, level, argv->inuse)) @@ -1457,8 +1504,7 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) return; } - // TODO allow shift, $@, to push builtins without flatten? - token = make_argv_ref_token (&td, obs, -1, argv, i, true, + token = make_argv_ref_token (&td, obs, -1, argv, i, argv->flatten, quote ? &curr_quote : NULL); assert (token); if (push_token (token, -1, argv->inuse)) |