diff options
author | Eric Blake <ebb9@byu.net> | 2007-11-21 10:14:28 -0700 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-03-14 07:03:37 -0600 |
commit | d53cf5ec91d8991f633233ed3bd72384b7cbd8b5 (patch) | |
tree | db91f9e264a6c51a838ffb58a15ab3b9facbffb1 | |
parent | fb8ad0c1e45abc64d8588dc812023a7ecfd8d269 (diff) | |
download | m4-d53cf5ec91d8991f633233ed3bd72384b7cbd8b5.tar.gz |
Stage 19: allow builtin tokens in more macros.
* src/m4.h (enum token_chain_type): Add CHAIN_FUNC.
(struct token_chain): Add func member.
* src/input.c (push_token, pop_input, input_print, peek_input)
(next_char_1, init_macro_token): Handle builtin tokens from
back-references.
(next_token): Flatten builtin tokens inside comments or quotes,
except when a builtin is the only thing inside quotes.
* src/macro.c (expand_argument): Strengthen assertion.
(collect_arguments): Handle builtin tokens.
(expand_macro): Fix harmless typo.
(arg_token): Add parameter.
(arg_type, arg_text, arg_equal, arg_empty, arg_len, arg_func)
(arg_print, push_arg_quote, push_args): Update callers to either
require flattened arguments or to handle builtins.
* src/builtin.c (m4_defn, m4_dumpdef, m4_ifdef, m4_ifelse)
(m4_popdef, m4_shift, m4_traceoff, m4_traceon, m4_undefine):
Handle builtin tokens, either by recognizing invalid macro names
or passing them through transparently.
(define_user_macro): Make all user macros handle builtin token
arguments transparently.
* doc/m4.texinfo (Defn, Ifdef, Ifelse, Debuglen): Document and
test the new behavior.
* NEWS: Document this change.
(cherry picked from commit 434656c96d6486cf959c3050aa85aecb72d948a0)
Signed-off-by: Eric Blake <ebb9@byu.net>
-rw-r--r-- | ChangeLog | 33 | ||||
-rw-r--r-- | NEWS | 8 | ||||
-rw-r--r-- | doc/m4.texinfo | 128 | ||||
-rw-r--r-- | src/builtin.c | 74 | ||||
-rw-r--r-- | src/input.c | 148 | ||||
-rw-r--r-- | src/m4.h | 3 | ||||
-rw-r--r-- | src/macro.c | 125 |
7 files changed, 414 insertions, 105 deletions
@@ -1,3 +1,36 @@ +2008-03-14 Eric Blake <ebb9@byu.net> + + Stage 19: allow builtin tokens in more macros. + Allow builtin tokens inside symbol chains, although for now, they + are not allowed inside comments or quotes. Enable builtin token + handling in more macros, if only to consistently diagnose invalid + macro names. + Memory impact: none. + Speed impact: slight impact, due to more bookkeeping. + * src/m4.h (enum token_chain_type): Add CHAIN_FUNC. + (struct token_chain): Add func member. + * src/input.c (push_token, pop_input, input_print, peek_input) + (next_char_1, init_macro_token): Handle builtin tokens from + back-references. + (next_token): Flatten builtin tokens inside comments or quotes, + except when a builtin is the only thing inside quotes. + * src/macro.c (expand_argument): Strengthen assertion. + (collect_arguments): Handle builtin tokens. + (expand_macro): Fix harmless typo. + (arg_token): Add parameter. + (arg_type, arg_text, arg_equal, arg_empty, arg_len, arg_func) + (arg_print, push_arg_quote, push_args): Update callers to either + require flattened arguments or to handle builtins. + * src/builtin.c (m4_defn, m4_dumpdef, m4_ifdef, m4_ifelse) + (m4_popdef, m4_shift, m4_traceoff, m4_traceon, m4_undefine): + Handle builtin tokens, either by recognizing invalid macro names + or passing them through transparently. + (define_user_macro): Make all user macros handle builtin token + arguments transparently. + * doc/m4.texinfo (Defn, Ifdef, Ifelse, Debuglen): Document and + test the new behavior. + * NEWS: Document this change. + 2008-03-13 Eric Blake <ebb9@byu.net> Consistently cast malloc results, for C++ compilation. @@ -8,6 +8,14 @@ Foundation, Inc. ** Fix regression introduced in 1.4.10b where using `builtin' or `indir' to perform nested `shift' calls triggered an assertion failure. +** Enhance the `ifdef', `ifelse', and `shift' builtins, as well as all + user macros, to transparently handle builtin tokens generated by `defn'. + +** Enhance the `defn', `dumpdef', `ifdef', `popdef', `traceon', `traceoff', + and `undefine' macros to warn when encountering a builtin token in the + context of a macro name, rather than acting on the empty string. This + was already done for `define', `pushdef', `builtin', and `indir'. + * Noteworthy changes in Version 1.4.10b (2008-02-25) [beta] Released by Eric Blake, based on git version 1.4.10a diff --git a/doc/m4.texinfo b/doc/m4.texinfo index 7b249bdf..7ac98679 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -2214,11 +2214,17 @@ defn([l], [r]) @cindex builtins, special tokens @cindex tokens, builtin macro -Using @code{defn} to generate special tokens for builtin macros outside -of expected contexts can sometimes trigger warnings. But most of the -time, such tokens are silently converted to the empty string. +Using @code{defn} to generate special tokens for builtin macros will +generate a warning in contexts where a macro name is expected. But in +contexts that operate on text, the builtin token is just silently +converted to an empty string. As of M4 1.4.11, expansion of user macros +will also preserve builtin tokens. However, any use of builtin tokens +outside of the second argument to @code{define} and @code{pushdef} is +generally not portable, since earlier @acronym{GNU} M4 versions, as well +as other @code{m4} implementations, vary on how such tokens are treated. @example +$ @kbd{m4 -d} defn(`defn') @result{} define(defn(`divnum'), `cannot redefine a builtin token') @@ -2226,12 +2232,50 @@ define(defn(`divnum'), `cannot redefine a builtin token') @result{} divnum @result{}0 +len(defn(`divnum')) +@result{}0 define(`echo', `$@@') @result{} -define(`mydivnum', echo(defn(`divnum'))) +define(`mydivnum', shift(echo(`', defn(`divnum')))) @result{} mydivnum +@result{}0 +define(`', `empty-$1') +@result{} +defn(defn(`divnum')) +@error{}m4:stdin:9: Warning: defn: invalid macro name ignored +@result{} +pushdef(defn(`divnum'), `oops') +@error{}m4:stdin:10: Warning: pushdef: invalid macro name ignored +@result{} +traceon(defn(`divnum')) +@error{}m4:stdin:11: Warning: traceon: invalid macro name ignored +@result{} +indir(defn(`divnum'), `string') +@error{}m4:stdin:12: Warning: indir: invalid macro name ignored +@result{} +indir(`', `string') +@result{}empty-string +traceoff(defn(`divnum')) +@error{}m4:stdin:14: Warning: traceoff: invalid macro name ignored +@result{} +popdef(defn(`divnum')) +@error{}m4:stdin:15: Warning: popdef: invalid macro name ignored +@result{} +dumpdef(defn(`divnum')) +@error{}m4:stdin:16: Warning: dumpdef: invalid macro name ignored +@result{} +undefine(defn(`divnum')) +@error{}m4:stdin:17: Warning: undefine: invalid macro name ignored @result{} +dumpdef(`') +@error{}:@tabchar{}`empty-$1' +@result{} +define(`foo', `define(`$1', $2)')dnl +foo(`bar', defn(`divnum')) +@result{} +bar +@result{}0 @end example Also note that @code{defn} with multiple arguments can only join text @@ -2606,6 +2650,22 @@ ifdef(`no_such_macro', `yes', `no', `extra argument') @result{}no @end example +As of M4 1.4.11, @code{ifdef} transparently handles builtin tokens +generated by @code{defn} (@pxref{Defn}) that occur in either +@var{string}, although a warning is issued for invalid macro names. + +@example +define(`', `empty') +@result{} +ifdef(defn(`defn'), `yes', `no') +@error{}m4:stdin:2: Warning: ifdef: invalid macro name ignored +@result{}no +define(`foo', ifdef(`divnum', defn(`divnum'), `undefined')) +@result{} +foo +@result{}0 +@end example + @node Ifelse @section If-else construct, or multibranch @@ -2706,6 +2766,24 @@ ifelse(`foo', `bar', `3', `gnu', `gnats', `6', `7', `8') @result{}7 @end example +As of M4 1.4.11, @code{ifelse} transparently handles builtin tokens +generated by @code{defn} (@pxref{Defn}). Because of this, it is always +safe to compare two macro definitions, without worrying whether the +macro might be a builtin. + +@example +ifelse(defn(`defn'), `', `yes', `no') +@result{}no +ifelse(defn(`defn'), defn(`divnum'), `yes', `no') +@result{}no +ifelse(defn(`defn'), defn(`defn'), `yes', `no') +@result{}yes +define(`foo', ifelse(`', `', defn(`divnum'))) +@result{} +foo +@result{}0 +@end example + @ignore @comment Stress tests, not worth documenting. @@ -2764,8 +2842,8 @@ ifelse(`-01234567890123456789', `-'e(long)`-', `yes', `no') @result{}no @end example -@comment It would be nice to pass builtin tokens through ifelse, m4wrap, -@comment user macros; hence the fixmes. +@comment It would be nice to pass builtin tokens through m4wrap, as well +@comment as allowing concatenation of builtins in ifelse and user macros. @example define(`e', `$@@')define(`q', ``$@@'')define(`u', `$*') @result{} @@ -2775,28 +2853,40 @@ cmp(`defn(`defn')', `defn(`d')') @result{}yes cmp(`defn(`defn')', ``<defn>'') @result{}no -cmp(`q(defn(`defn'))', `q(defn(`d'))') -@result{}yes -cmp(`q(defn(`defn'))', `q(`<defn>')') -@result{}no -cmp(`q(defn(`defn'))', ``'') -@result{}no -cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', defn(`d'))') -@result{}yes -cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', `<defn>')') -@result{}no -cmp(`q(`1', `2', defn(`defn'))', ```1',`2',<defn>'') -@result{}no +cmp(`q(defn(`defn'))', `q(defn(`d'))')-fixme +@error{}m4:stdin:5: Warning: ifelse: cannot quote builtin +@error{}m4:stdin:5: Warning: ifelse: cannot quote builtin +@result{}yes-fixme +cmp(`q(defn(`defn'))', `q(`<defn>')')-fixme +@error{}m4:stdin:6: Warning: ifelse: cannot quote builtin +@result{}no-fixme +cmp(`q(defn(`defn'))', ``'')-fixme +@error{}m4:stdin:7: Warning: ifelse: cannot quote builtin +@result{}no-fixme +cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', defn(`d'))')-fixme +@error{}m4:stdin:8: Warning: ifelse: cannot quote builtin +@error{}m4:stdin:8: Warning: ifelse: cannot quote builtin +@result{}yes-fixme +cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', `<defn>')')-fixme +@error{}m4:stdin:9: Warning: ifelse: cannot quote builtin +@result{}no-fixme +cmp(`q(`1', `2', defn(`defn'))', ```1',`2',<defn>'')-fixme +@error{}m4:stdin:10: Warning: ifelse: cannot quote builtin +@result{}no-fixme cmp(`q(`1', `2', defn(`defn'))', ```1',`2',`''')-fixme +@error{}m4:stdin:11: Warning: ifelse: cannot quote builtin @result{}yes-fixme define(`cat', `$1`'ifelse(`$#', `1', `', `$0(shift($@@))')') @result{} cat(`define(`foo',', defn(`divnum'), `)foo')-fixme +@error{}m4:stdin:13: Warning: ifelse: cannot quote builtin @result{}-fixme cat(e(`define(`bar',', defn(`divnum'), `)bar'))-fixme +@error{}m4:stdin:14: Warning: ifelse: cannot quote builtin @result{}-fixme m4wrap(`u('q(`cat(`define(`baz','', defn(`divnum'), ``)baz')')`)-fixme ') +@error{}m4:stdin:15: Warning: m4wrap: cannot quote builtin @result{} ^D @result{}-fixme @@ -3592,7 +3682,7 @@ echo(`1', `long string') @result{}1,long string indir(`echo', defn(`changequote')) @error{}m4trace: -2- defn(`change...') -@error{}m4trace: -1- indir(`echo', <changequote>) -> ``'' +@error{}m4trace: -1- indir(`echo', <changequote>) -> ``<changequote>'' @result{} @end example diff --git a/src/builtin.c b/src/builtin.c index a1d4d012..a441c4cf 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -106,17 +106,17 @@ builtin_tab[] = { "debugfile", true, false, false, m4_debugfile }, { "decr", false, false, true, m4_decr }, { "define", false, true, true, m4_define }, - { "defn", false, false, true, m4_defn }, + { "defn", false, true, true, m4_defn }, { "divert", false, false, false, m4_divert }, { "divnum", false, false, false, m4_divnum }, { "dnl", false, false, false, m4_dnl }, - { "dumpdef", false, false, false, m4_dumpdef }, + { "dumpdef", false, true, false, m4_dumpdef }, { "errprint", false, false, true, m4_errprint }, { "esyscmd", true, false, true, m4_esyscmd }, { "eval", false, false, true, m4_eval }, { "format", true, false, true, m4_format }, - { "ifdef", false, false, true, m4_ifdef }, - { "ifelse", false, false, true, m4_ifelse }, + { "ifdef", false, true, true, m4_ifdef }, + { "ifelse", false, true, true, m4_ifelse }, { "include", false, false, true, m4_include }, { "incr", false, false, true, m4_incr }, { "index", false, false, true, m4_index }, @@ -127,18 +127,18 @@ builtin_tab[] = { "maketemp", false, false, true, m4_maketemp }, { "mkstemp", false, false, true, m4_mkstemp }, { "patsubst", true, false, true, m4_patsubst }, - { "popdef", false, false, true, m4_popdef }, + { "popdef", false, true, true, m4_popdef }, { "pushdef", false, true, true, m4_pushdef }, { "regexp", true, false, true, m4_regexp }, - { "shift", false, false, true, m4_shift }, + { "shift", false, true, true, m4_shift }, { "sinclude", false, false, true, m4_sinclude }, { "substr", false, false, true, m4_substr }, { "syscmd", false, false, true, m4_syscmd }, { "sysval", false, false, false, m4_sysval }, - { "traceoff", false, false, false, m4_traceoff }, - { "traceon", false, false, false, m4_traceon }, + { "traceoff", false, true, false, m4_traceoff }, + { "traceon", false, true, false, m4_traceon }, { "translit", false, false, true, m4_translit }, - { "undefine", false, false, true, m4_undefine }, + { "undefine", false, true, true, m4_undefine }, { "undivert", false, false, false, m4_undivert }, { 0, false, false, false, 0 }, @@ -440,6 +440,7 @@ define_user_macro (const char *name, size_t name_len, const char *text, SYMBOL_TYPE (s) = TOKEN_TEXT; SYMBOL_TEXT (s) = defn; + SYMBOL_MACRO_ARGS (s) = true; /* Implement --warn-macro-sequence. */ if (macro_sequence_inuse && text) @@ -693,11 +694,15 @@ m4_define (struct obstack *obs, int argc, macro_arguments *argv) static void m4_undefine (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); int i; - if (bad_argc (ARG (0), argc, 1, -1)) + if (bad_argc (me, argc, 1, -1)) return; for (i = 1; i < argc; i++) - lookup_symbol (ARG (i), SYMBOL_DELETE); + if (arg_type (argv, i) != TOKEN_TEXT) + m4_warn (0, me, _("invalid macro name ignored")); + else + lookup_symbol (ARG (i), SYMBOL_DELETE); } static void @@ -709,11 +714,15 @@ m4_pushdef (struct obstack *obs, int argc, macro_arguments *argv) static void m4_popdef (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); int i; - if (bad_argc (ARG (0), argc, 1, -1)) + if (bad_argc (me, argc, 1, -1)) return; for (i = 1; i < argc; i++) - lookup_symbol (ARG (i), SYMBOL_POPDEF); + if (arg_type (argv, i) != TOKEN_TEXT) + m4_warn (0, me, _("invalid macro name ignored")); + else + lookup_symbol (ARG (i), SYMBOL_POPDEF); } /*---------------------. @@ -723,10 +732,17 @@ m4_popdef (struct obstack *obs, int argc, macro_arguments *argv) static void m4_ifdef (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); symbol *s; - if (bad_argc (ARG (0), argc, 2, 3)) + if (bad_argc (me, argc, 2, 3)) return; + if (arg_type (argv, 1) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + push_arg (obs, argv, 3); + return; + } s = lookup_symbol (ARG (1), SYMBOL_LOOKUP); push_arg (obs, argv, (s && SYMBOL_TYPE (s) != TOKEN_VOID) ? 2 : 3); } @@ -834,6 +850,11 @@ m4_dumpdef (struct obstack *obs, int argc, macro_arguments *argv) { for (i = 1; i < argc; i++) { + if (arg_type (argv, i) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + continue; + } s = lookup_symbol (ARG (i), SYMBOL_LOOKUP); if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID) dump_symbol (s, &data); @@ -968,6 +989,11 @@ m4_defn (struct obstack *obs, int argc, macro_arguments *argv) for (i = 1; i < argc; i++) { + if (arg_type (argv, i) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + continue; + } s = lookup_symbol (ARG (i), SYMBOL_LOOKUP); if (s == NULL) continue; @@ -1289,10 +1315,10 @@ m4_dnl (struct obstack *obs, int argc, macro_arguments *argv) skip_line (me); } -/*-------------------------------------------------------------------------. -| Shift all argument one to the left, discarding the first argument. Each | -| output argument is quoted with the current quotes. | -`-------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------. +| Shift all arguments one to the left, discarding the first | +| argument. Each output argument is quoted with the current quotes. | +`--------------------------------------------------------------------*/ static void m4_shift (struct obstack *obs, int argc, macro_arguments *argv) @@ -1624,6 +1650,7 @@ set_trace (symbol *sym, void *data) static void m4_traceon (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); symbol *s; int i; @@ -1632,6 +1659,11 @@ m4_traceon (struct obstack *obs, int argc, macro_arguments *argv) else for (i = 1; i < argc; i++) { + if (arg_type (argv, i) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + continue; + } s = lookup_symbol (ARG (i), SYMBOL_INSERT); set_trace (s, obs); } @@ -1644,6 +1676,7 @@ m4_traceon (struct obstack *obs, int argc, macro_arguments *argv) static void m4_traceoff (struct obstack *obs, int argc, macro_arguments *argv) { + const char *me = ARG (0); symbol *s; int i; @@ -1652,6 +1685,11 @@ m4_traceoff (struct obstack *obs, int argc, macro_arguments *argv) else for (i = 1; i < argc; i++) { + if (arg_type (argv, i) != TOKEN_TEXT) + { + m4_warn (0, me, _("invalid macro name ignored")); + continue; + } s = lookup_symbol (ARG (i), SYMBOL_LOOKUP); if (s != NULL) set_trace (s, NULL); diff --git a/src/input.c b/src/input.c index 9d8b8f39..b8784d0c 100644 --- a/src/input.c +++ b/src/input.c @@ -361,7 +361,7 @@ push_token (token_data *token, int level, bool inuse) return false; } } - else + else if (TOKEN_DATA_TYPE (token) != TOKEN_FUNC) { /* For composite tokens, if argv is already in use, creating additional references for long text segments is more @@ -407,8 +407,23 @@ push_token (token_data *token, int level, bool inuse) adjust_refcount (level, true); inuse = true; } + else if (TOKEN_DATA_TYPE (token) == TOKEN_FUNC) + { + chain = (token_chain *) obstack_alloc (current_input, sizeof *chain); + if (next->u.u_c.end) + next->u.u_c.end->next = chain; + else + next->u.u_c.chain = chain; + next->u.u_c.end = chain; + chain->next = NULL; + chain->type = CHAIN_FUNC; + chain->quote_age = 0; + chain->u.func = TOKEN_DATA_FUNC (token); + } while (src_chain) { + /* TODO support func concatenation. */ + assert (src_chain->type != CHAIN_FUNC); if (level == -1) { /* Nothing to copy, since link already lives on obstack. */ @@ -568,7 +583,8 @@ pop_input (bool cleanup) break; case INPUT_MACRO: - if (!cleanup) + assert (!isp->u.func || !cleanup); + if (isp->u.func) return false; break; @@ -585,6 +601,10 @@ pop_input (bool cleanup) if (chain->u.u_s.level >= 0) adjust_refcount (chain->u.u_s.level, false); break; + case CHAIN_FUNC: + if (chain->u.func) + return false; + break; case CHAIN_ARGV: if (chain->u.u_a.index < arg_argc (chain->u.u_a.argv)) return false; @@ -708,6 +728,10 @@ input_print (struct obstack *obs, const input_block *input) chain->u.u_s.len, &maxlen)) return; break; + case CHAIN_FUNC: + func_print (obs, find_builtin_by_addr (chain->u.func), false, + NULL); + break; case CHAIN_ARGV: assert (!chain->u.u_a.comma); if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, @@ -770,7 +794,9 @@ peek_input (bool allow_argv) break; case INPUT_MACRO: - return CHAR_MACRO; + if (block->u.func) + return CHAR_MACRO; + break; case INPUT_CHAIN: chain = block->u.u_c.chain; @@ -783,6 +809,10 @@ peek_input (bool allow_argv) if (chain->u.u_s.len) return to_uchar (*chain->u.u_s.str); break; + case CHAIN_FUNC: + if (chain->u.func) + return CHAR_MACRO; + break; case CHAIN_ARGV: argc = arg_argc (chain->u.u_a.argv); if (chain->u.u_a.index == argc) @@ -891,9 +921,9 @@ next_char_1 (bool allow_quote) break; case INPUT_MACRO: - /* INPUT_MACRO input sources has only one token */ - pop_input (true); - return CHAR_MACRO; + if (isp->u.func) + return CHAR_MACRO; + break; case INPUT_CHAIN: chain = isp->u.u_c.chain; @@ -914,6 +944,10 @@ next_char_1 (bool allow_quote) if (chain->u.u_s.level >= 0) adjust_refcount (chain->u.u_s.level, false); break; + case CHAIN_FUNC: + if (chain->u.func) + return CHAR_MACRO; + break; case CHAIN_ARGV: if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv)) { @@ -984,17 +1018,37 @@ skip_line (const char *name) input_change = true; } -/*-------------------------------------------------------------------. -| When a MACRO token is seen, next_token () uses init_macro_token () | -| to retrieve the value of the function pointer and store it in TD. | -`-------------------------------------------------------------------*/ +/*------------------------------------------------------------------. +| When next_token() sees a builtin token with peek_input, this | +| retrieves the value of the function pointer, stores it in TD, and | +| consumes the input so the caller does not need to do next_char. | +| If TD is NULL, discard the token instead. | +`------------------------------------------------------------------*/ static void init_macro_token (token_data *td) { - assert (isp->type == INPUT_MACRO); - TOKEN_DATA_TYPE (td) = TOKEN_FUNC; - TOKEN_DATA_FUNC (td) = isp->u.func; + int ch = next_char (false); + assert (ch == CHAR_MACRO); + if (td) + TOKEN_DATA_TYPE (td) = TOKEN_FUNC; + if (isp->type == INPUT_MACRO) + { + assert (isp->u.func); + if (td) + TOKEN_DATA_FUNC (td) = isp->u.func; + isp->u.func = NULL; + } + else + { + token_chain *chain; + assert (isp->type == INPUT_CHAIN); + chain = isp->u.u_c.chain; + assert (!chain->quote_age && chain->type == CHAIN_FUNC && chain->u.func); + if (td) + TOKEN_DATA_FUNC (td) = chain->u.func; + chain->u.func = NULL; + } } /*-------------------------------------------------------------------. @@ -1516,7 +1570,6 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, if (ch == CHAR_MACRO) { init_macro_token (td); - next_char (false); #ifdef DEBUG_INPUT xfprintf (stderr, "next_token -> MACDEF (%s)\n", find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name); @@ -1543,20 +1596,30 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, if (obs) obs_td = obs; obstack_grow (obs_td, curr_comm.str1, curr_comm.len1); - while ((ch = next_char (false)) < CHAR_EOF - && !MATCH (ch, curr_comm.str2, true)) - obstack_1grow (obs_td, ch); - if (ch != CHAR_EOF) + while (1) { + ch = next_char (false); + if (ch == CHAR_EOF) + /* Current_file changed to "" if we see CHAR_EOF, use the + previous value we stored earlier. */ + m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller, + _("end of file in comment")); + if (ch == CHAR_MACRO) + { + /* TODO support concatenation of builtins. */ + m4_warn_at_line (0, file, *line, caller, + _("cannot comment builtin")); + init_macro_token (NULL); + continue; + } + if (MATCH (ch, curr_comm.str2, true)) + { + obstack_grow (obs_td, curr_comm.str2, curr_comm.len2); + break; + } assert (ch < CHAR_EOF); - obstack_grow (obs_td, curr_comm.str2, curr_comm.len2); + obstack_1grow (obs_td, ch); } - else - /* Current_file changed to "" if we see CHAR_EOF, use the - previous value we stored earlier. */ - m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller, - _("end of file in comment")); - type = TOKEN_STRING; } else if (default_word_regexp && (isalpha (ch) || ch == '_')) @@ -1608,6 +1671,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, else if (!MATCH (ch, curr_quote.str1, true)) { + assert (ch < CHAR_EOF); switch (ch) { case '(': @@ -1630,6 +1694,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, if (obs) obs_td = obs; quote_level = 1; + type = TOKEN_STRING; while (1) { ch = next_char (obs != NULL && current_quote_age); @@ -1639,6 +1704,35 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller, _("end of file in string")); + if (ch == CHAR_MACRO) + { + /* TODO support concatenation of builtins. */ + if (obstack_object_size (obs_td) == 0 + && TOKEN_DATA_TYPE (td) == TOKEN_VOID) + { + assert (quote_level == 1); + init_macro_token (td); + ch = peek_input (false); + if (MATCH (ch, curr_quote.str2, false)) + { +#ifdef DEBUG_INPUT + const builtin *bp + = find_builtin_by_addr (TOKEN_DATA_FUNC (td)); + xfprintf (stderr, "next_token -> MACDEF (%s)\n", + bp->name); +#endif + ch = next_char (false); + MATCH (ch, curr_quote.str2, true); + return TOKEN_MACDEF; + } + TOKEN_DATA_TYPE (td) = TOKEN_VOID; + } + else + init_macro_token (NULL); + m4_warn_at_line (0, file, *line, caller, + _("cannot quote builtin")); + continue; + } if (ch == CHAR_QUOTE) append_quote_token (obs, td); else if (MATCH (ch, curr_quote.str2, true)) @@ -1658,7 +1752,6 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, obstack_1grow (obs_td, ch); } } - type = TOKEN_STRING; } if (TOKEN_DATA_TYPE (td) == TOKEN_VOID) @@ -1707,6 +1800,9 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, xfprintf (stderr, "%s", chain->u.u_s.str); len += chain->u.u_s.len; break; + case CHAIN_FUNC: + xfprintf (stderr, "<func>"); + break; case CHAIN_ARGV: xfprintf (stderr, "{$@}"); break; @@ -282,7 +282,7 @@ enum token_data_type enum token_chain_type { CHAIN_STR, /* Link contains a string, u.u_s is valid. */ - /* TODO add CHAIN_FUNC. */ + CHAIN_FUNC, /* Builtin function definition, u.func is valid. */ CHAIN_ARGV /* Link contains a $@ reference, u.u_a is valid. */ }; @@ -303,6 +303,7 @@ struct token_chain int level; /* Expansion level of link content, or -1. */ } u_s; + builtin_func *func; /* Builtin token from defn. */ struct { macro_arguments *argv; /* Reference to earlier $@. */ diff --git a/src/macro.c b/src/macro.c index 6123f05f..f794d868 100644 --- a/src/macro.c +++ b/src/macro.c @@ -470,6 +470,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) assert (paren_level == 0 && TOKEN_DATA_TYPE (argp) == TOKEN_VOID && obstack_object_size (obs) == 0 && td.u.u_c.chain == td.u.u_c.end + && td.u.u_c.chain->quote_age == age && td.u.u_c.chain->type == CHAIN_ARGV); TOKEN_DATA_TYPE (argp) = TOKEN_COMP; argp->u.u_c.chain = argp->u.u_c.end = td.u.u_c.chain; @@ -581,7 +582,8 @@ collect_arguments (symbol *sym, struct obstack *arguments, argv->wrapper = args.wrapper; argv->has_ref = args.has_ref; argv->has_func = args.has_func; - if (args.quote_age != quote_age ()) + /* TODO allow funcs without crippling quote age. */ + if (args.quote_age != quote_age () || args.has_func) argv->quote_age = 0; argv->arraylen = args.arraylen; return argv; @@ -664,7 +666,7 @@ expand_macro (symbol *sym) stacks[level].args = (struct obstack *) xmalloc (sizeof *stacks[level].args); stacks[level].argv = - (struct obstack *) xmalloc (sizeof *stacks[level].args); + (struct obstack *) xmalloc (sizeof *stacks[level].argv); obstack_init (stacks[level].args); obstack_init (stacks[level].argv); stacks[level].args_base = obstack_finish (stacks[level].args); @@ -816,9 +818,10 @@ arg_adjust_refcount (macro_arguments *argv, bool increase) /* Given ARGV, return the token_data that contains argument INDEX; INDEX must be > 0, < argv->argc. If LEVEL is non-NULL, *LEVEL is set to the obstack level that contains the token (which is not - necessarily the level of ARGV). */ + necessarily the level of ARGV). If FLATTEN, avoid returning a + builtin function. */ static token_data * -arg_token (macro_arguments *argv, unsigned int index, int *level) +arg_token (macro_arguments *argv, unsigned int index, int *level, bool flatten) { unsigned int i; token_data *token; @@ -826,8 +829,14 @@ arg_token (macro_arguments *argv, unsigned int index, int *level) assert (index && index < argv->argc); if (level) *level = argv->level; + flatten |= argv->flatten; if (!argv->wrapper) - return argv->array[index - 1]; + { + token = argv->array[index - 1]; + if (flatten && TOKEN_DATA_TYPE (token) == TOKEN_FUNC) + token = &empty_token; + return token; + } /* Must cycle through all tokens, until we find index, since a ref may occupy multiple indices. */ @@ -842,10 +851,8 @@ arg_token (macro_arguments *argv, unsigned int index, int *level) - chain->u.u_a.skip_last)) { token = arg_token (chain->u.u_a.argv, - chain->u.u_a.index - 1 + index, level); - if (chain->u.u_a.flatten - && TOKEN_DATA_TYPE (token) == TOKEN_FUNC) - token = &empty_token; + chain->u.u_a.index - 1 + index, level, + flatten || chain->u.u_a.flatten); break; } index -= (chain->u.u_a.argv->argc - chain->u.u_a.index @@ -897,7 +904,7 @@ arg_type (macro_arguments *argv, unsigned int index) if (argv->flatten || !argv->has_func || index == 0 || index >= argv->argc) return TOKEN_TEXT; - token = arg_token (argv, index, NULL); + token = arg_token (argv, index, NULL, false); type = TOKEN_DATA_TYPE (token); if (type == TOKEN_COMP && !token->u.u_c.has_func) type = TOKEN_TEXT; @@ -923,7 +930,7 @@ arg_text (macro_arguments *argv, unsigned int index) return argv->argv0; if (index >= argv->argc) return ""; - token = arg_token (argv, index, NULL); + token = arg_token (argv, index, NULL, false); switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: @@ -939,11 +946,16 @@ arg_text (macro_arguments *argv, unsigned int index) case CHAIN_STR: obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); break; + case CHAIN_FUNC: + /* TODO concatenate builtins. */ + assert (!"implemented"); + abort (); case CHAIN_ARGV: arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, quote_cache (NULL, chain->quote_age, chain->u.u_a.quotes), - chain->u.u_a.flatten, NULL, NULL, false); + argv->flatten || chain->u.u_a.flatten, NULL, NULL, + false); break; default: assert (!"arg_text"); @@ -953,6 +965,7 @@ arg_text (macro_arguments *argv, unsigned int index) } obstack_1grow (obs, '\0'); return (char *) obstack_finish (obs); + case TOKEN_FUNC: default: break; } @@ -967,8 +980,8 @@ arg_text (macro_arguments *argv, unsigned int index) bool arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) { - token_data *ta = arg_token (argv, indexa, NULL); - token_data *tb = arg_token (argv, indexb, NULL); + token_data *ta = arg_token (argv, indexa, NULL, false); + token_data *tb = arg_token (argv, indexb, NULL, false); token_chain tmpa; token_chain tmpb; token_chain *ca = &tmpa; @@ -985,30 +998,45 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) TOKEN_DATA_LEN (ta)) == 0); /* Convert both arguments to chains, if not one already. */ - /* TODO - allow builtin tokens in the comparison? */ - if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT) + switch (TOKEN_DATA_TYPE (ta)) { + case TOKEN_TEXT: tmpa.next = NULL; tmpa.type = CHAIN_STR; tmpa.u.u_s.str = TOKEN_DATA_TEXT (ta); tmpa.u.u_s.len = TOKEN_DATA_LEN (ta); - } - else - { - assert (TOKEN_DATA_TYPE (ta) == TOKEN_COMP); + break; + case TOKEN_FUNC: + tmpa.next = NULL; + tmpa.type = CHAIN_FUNC; + tmpa.u.func = TOKEN_DATA_FUNC (ta); + break; + case TOKEN_COMP: ca = ta->u.u_c.chain; + break; + default: + assert (!"arg_equal"); + abort (); } - if (TOKEN_DATA_TYPE (tb) == TOKEN_TEXT) + switch (TOKEN_DATA_TYPE (tb)) { + case TOKEN_TEXT: tmpb.next = NULL; tmpb.type = CHAIN_STR; tmpb.u.u_s.str = TOKEN_DATA_TEXT (tb); tmpb.u.u_s.len = TOKEN_DATA_LEN (tb); - } - else - { - assert (TOKEN_DATA_TYPE (tb) == TOKEN_COMP); + break; + case TOKEN_FUNC: + tmpb.next = NULL; + tmpb.type = CHAIN_FUNC; + tmpb.u.func = TOKEN_DATA_FUNC (tb); + break; + case TOKEN_COMP: cb = tb->u.u_c.chain; + break; + default: + assert (!"arg_equal"); + abort (); } /* Compare each link of the chain. */ @@ -1042,6 +1070,14 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) cb = &tmpb; continue; } + if (ca->type == CHAIN_FUNC) + { + if (cb->type != CHAIN_FUNC || ca->u.func != cb->u.func) + return false; + ca = ca->next; + cb = cb->next; + continue; + } assert (ca->type == CHAIN_STR && cb->type == CHAIN_STR); if (ca->u.u_s.len == cb->u.u_s.len) { @@ -1091,7 +1127,7 @@ arg_empty (macro_arguments *argv, unsigned int index) return argv->argv0_len == 0; if (index >= argv->argc) return true; - return arg_token (argv, index, NULL) == &empty_token; + return arg_token (argv, index, NULL, false) == &empty_token; } /* Given ARGV, return the length of argument INDEX. Abort if the @@ -1107,7 +1143,7 @@ arg_len (macro_arguments *argv, unsigned int index) return argv->argv0_len; if (index >= argv->argc) return 0; - token = arg_token (argv, index, NULL); + token = arg_token (argv, index, NULL, false); switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: @@ -1126,6 +1162,10 @@ arg_len (macro_arguments *argv, unsigned int index) case CHAIN_STR: len += chain->u.u_s.len; break; + case CHAIN_FUNC: + /* TODO concatenate builtins. */ + assert (!"implemented"); + abort (); case CHAIN_ARGV: i = chain->u.u_a.index; limit = chain->u.u_a.argv->argc - i - chain->u.u_a.skip_last; @@ -1138,8 +1178,8 @@ arg_len (macro_arguments *argv, unsigned int index) while (limit--) { /* TODO handle builtin concatenation. */ - if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, - NULL)) == TOKEN_FUNC) + if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, NULL, + false)) == TOKEN_FUNC) assert (argv->flatten); else len += arg_len (chain->u.u_a.argv, i); @@ -1154,6 +1194,7 @@ arg_len (macro_arguments *argv, unsigned int index) } assert (len); return len; + case TOKEN_FUNC: default: break; } @@ -1168,7 +1209,7 @@ arg_func (macro_arguments *argv, unsigned int index) { token_data *token; - token = arg_token (argv, index, NULL); + token = arg_token (argv, index, NULL, false); assert (TOKEN_DATA_TYPE (token) == TOKEN_FUNC); return TOKEN_DATA_FUNC (token); } @@ -1217,7 +1258,7 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index, if (use_sep && shipout_string_trunc (obs, sep, sep_len, plen)) return true; use_sep = true; - token = arg_token (argv, i, NULL); + token = arg_token (argv, i, NULL, flatten); switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: @@ -1247,6 +1288,10 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index, chain->u.u_s.len, &len)) done = true; break; + case CHAIN_FUNC: + func_print (obs, find_builtin_by_addr (chain->u.func), + flatten, quotes); + break; case CHAIN_ARGV: if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, quote_cache (NULL, chain->quote_age, @@ -1408,8 +1453,8 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len, return new_argv; } -/* Push argument INDEX from ARGV, which must be a text token, onto the - expansion stack OBS for rescanning. */ +/* Push argument INDEX from ARGV onto the expansion stack OBS for + rescanning. */ void push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) { @@ -1425,18 +1470,17 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) push_arg_quote (obs, argv, index, NULL); } -/* Push argument INDEX from ARGV, which must be a text token, onto the - expansion stack OBS for rescanning. INDEX must be > 0, < argc. - QUOTES determines any quote delimiters that were in effect when the - reference was created. */ +/* Push argument INDEX from ARGV onto the expansion stack OBS for + rescanning. INDEX must be > 0, < argc. QUOTES determines any + quote delimiters that were in effect when the reference was + created. */ void push_arg_quote (struct obstack *obs, macro_arguments *argv, unsigned int index, const string_pair *quotes) { int level; - token_data *token = arg_token (argv, index, &level); + token_data *token = arg_token (argv, index, &level, false); - /* TODO handle func tokens. */ if (quotes) obstack_grow (obs, quotes->str1, quotes->len1); if (push_token (token, level, argv->inuse)) @@ -1465,8 +1509,7 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) return; } - /* TODO allow shift, $@, to push builtins without flatten. */ - token = make_argv_ref_token (&td, obs, -1, argv, i, true, + token = make_argv_ref_token (&td, obs, -1, argv, i, argv->flatten, quote ? &curr_quote : NULL); assert (token); if (push_token (token, -1, argv->inuse)) |