summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-11-21 10:14:28 -0700
committerEric Blake <ebb9@byu.net>2008-03-14 07:03:37 -0600
commitd53cf5ec91d8991f633233ed3bd72384b7cbd8b5 (patch)
treedb91f9e264a6c51a838ffb58a15ab3b9facbffb1
parentfb8ad0c1e45abc64d8588dc812023a7ecfd8d269 (diff)
downloadm4-d53cf5ec91d8991f633233ed3bd72384b7cbd8b5.tar.gz
Stage 19: allow builtin tokens in more macros.
* src/m4.h (enum token_chain_type): Add CHAIN_FUNC. (struct token_chain): Add func member. * src/input.c (push_token, pop_input, input_print, peek_input) (next_char_1, init_macro_token): Handle builtin tokens from back-references. (next_token): Flatten builtin tokens inside comments or quotes, except when a builtin is the only thing inside quotes. * src/macro.c (expand_argument): Strengthen assertion. (collect_arguments): Handle builtin tokens. (expand_macro): Fix harmless typo. (arg_token): Add parameter. (arg_type, arg_text, arg_equal, arg_empty, arg_len, arg_func) (arg_print, push_arg_quote, push_args): Update callers to either require flattened arguments or to handle builtins. * src/builtin.c (m4_defn, m4_dumpdef, m4_ifdef, m4_ifelse) (m4_popdef, m4_shift, m4_traceoff, m4_traceon, m4_undefine): Handle builtin tokens, either by recognizing invalid macro names or passing them through transparently. (define_user_macro): Make all user macros handle builtin token arguments transparently. * doc/m4.texinfo (Defn, Ifdef, Ifelse, Debuglen): Document and test the new behavior. * NEWS: Document this change. (cherry picked from commit 434656c96d6486cf959c3050aa85aecb72d948a0) Signed-off-by: Eric Blake <ebb9@byu.net>
-rw-r--r--ChangeLog33
-rw-r--r--NEWS8
-rw-r--r--doc/m4.texinfo128
-rw-r--r--src/builtin.c74
-rw-r--r--src/input.c148
-rw-r--r--src/m4.h3
-rw-r--r--src/macro.c125
7 files changed, 414 insertions, 105 deletions
diff --git a/ChangeLog b/ChangeLog
index 9f254999..bce309d6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,36 @@
+2008-03-14 Eric Blake <ebb9@byu.net>
+
+ Stage 19: allow builtin tokens in more macros.
+ Allow builtin tokens inside symbol chains, although for now, they
+ are not allowed inside comments or quotes. Enable builtin token
+ handling in more macros, if only to consistently diagnose invalid
+ macro names.
+ Memory impact: none.
+ Speed impact: slight impact, due to more bookkeeping.
+ * src/m4.h (enum token_chain_type): Add CHAIN_FUNC.
+ (struct token_chain): Add func member.
+ * src/input.c (push_token, pop_input, input_print, peek_input)
+ (next_char_1, init_macro_token): Handle builtin tokens from
+ back-references.
+ (next_token): Flatten builtin tokens inside comments or quotes,
+ except when a builtin is the only thing inside quotes.
+ * src/macro.c (expand_argument): Strengthen assertion.
+ (collect_arguments): Handle builtin tokens.
+ (expand_macro): Fix harmless typo.
+ (arg_token): Add parameter.
+ (arg_type, arg_text, arg_equal, arg_empty, arg_len, arg_func)
+ (arg_print, push_arg_quote, push_args): Update callers to either
+ require flattened arguments or to handle builtins.
+ * src/builtin.c (m4_defn, m4_dumpdef, m4_ifdef, m4_ifelse)
+ (m4_popdef, m4_shift, m4_traceoff, m4_traceon, m4_undefine):
+ Handle builtin tokens, either by recognizing invalid macro names
+ or passing them through transparently.
+ (define_user_macro): Make all user macros handle builtin token
+ arguments transparently.
+ * doc/m4.texinfo (Defn, Ifdef, Ifelse, Debuglen): Document and
+ test the new behavior.
+ * NEWS: Document this change.
+
2008-03-13 Eric Blake <ebb9@byu.net>
Consistently cast malloc results, for C++ compilation.
diff --git a/NEWS b/NEWS
index 7a883830..7dc3aba0 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,14 @@ Foundation, Inc.
** Fix regression introduced in 1.4.10b where using `builtin' or `indir'
to perform nested `shift' calls triggered an assertion failure.
+** Enhance the `ifdef', `ifelse', and `shift' builtins, as well as all
+ user macros, to transparently handle builtin tokens generated by `defn'.
+
+** Enhance the `defn', `dumpdef', `ifdef', `popdef', `traceon', `traceoff',
+ and `undefine' macros to warn when encountering a builtin token in the
+ context of a macro name, rather than acting on the empty string. This
+ was already done for `define', `pushdef', `builtin', and `indir'.
+
* Noteworthy changes in Version 1.4.10b (2008-02-25) [beta]
Released by Eric Blake, based on git version 1.4.10a
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 7b249bdf..7ac98679 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -2214,11 +2214,17 @@ defn([l], [r])
@cindex builtins, special tokens
@cindex tokens, builtin macro
-Using @code{defn} to generate special tokens for builtin macros outside
-of expected contexts can sometimes trigger warnings. But most of the
-time, such tokens are silently converted to the empty string.
+Using @code{defn} to generate special tokens for builtin macros will
+generate a warning in contexts where a macro name is expected. But in
+contexts that operate on text, the builtin token is just silently
+converted to an empty string. As of M4 1.4.11, expansion of user macros
+will also preserve builtin tokens. However, any use of builtin tokens
+outside of the second argument to @code{define} and @code{pushdef} is
+generally not portable, since earlier @acronym{GNU} M4 versions, as well
+as other @code{m4} implementations, vary on how such tokens are treated.
@example
+$ @kbd{m4 -d}
defn(`defn')
@result{}
define(defn(`divnum'), `cannot redefine a builtin token')
@@ -2226,12 +2232,50 @@ define(defn(`divnum'), `cannot redefine a builtin token')
@result{}
divnum
@result{}0
+len(defn(`divnum'))
+@result{}0
define(`echo', `$@@')
@result{}
-define(`mydivnum', echo(defn(`divnum')))
+define(`mydivnum', shift(echo(`', defn(`divnum'))))
@result{}
mydivnum
+@result{}0
+define(`', `empty-$1')
+@result{}
+defn(defn(`divnum'))
+@error{}m4:stdin:9: Warning: defn: invalid macro name ignored
+@result{}
+pushdef(defn(`divnum'), `oops')
+@error{}m4:stdin:10: Warning: pushdef: invalid macro name ignored
+@result{}
+traceon(defn(`divnum'))
+@error{}m4:stdin:11: Warning: traceon: invalid macro name ignored
+@result{}
+indir(defn(`divnum'), `string')
+@error{}m4:stdin:12: Warning: indir: invalid macro name ignored
+@result{}
+indir(`', `string')
+@result{}empty-string
+traceoff(defn(`divnum'))
+@error{}m4:stdin:14: Warning: traceoff: invalid macro name ignored
+@result{}
+popdef(defn(`divnum'))
+@error{}m4:stdin:15: Warning: popdef: invalid macro name ignored
+@result{}
+dumpdef(defn(`divnum'))
+@error{}m4:stdin:16: Warning: dumpdef: invalid macro name ignored
+@result{}
+undefine(defn(`divnum'))
+@error{}m4:stdin:17: Warning: undefine: invalid macro name ignored
@result{}
+dumpdef(`')
+@error{}:@tabchar{}`empty-$1'
+@result{}
+define(`foo', `define(`$1', $2)')dnl
+foo(`bar', defn(`divnum'))
+@result{}
+bar
+@result{}0
@end example
Also note that @code{defn} with multiple arguments can only join text
@@ -2606,6 +2650,22 @@ ifdef(`no_such_macro', `yes', `no', `extra argument')
@result{}no
@end example
+As of M4 1.4.11, @code{ifdef} transparently handles builtin tokens
+generated by @code{defn} (@pxref{Defn}) that occur in either
+@var{string}, although a warning is issued for invalid macro names.
+
+@example
+define(`', `empty')
+@result{}
+ifdef(defn(`defn'), `yes', `no')
+@error{}m4:stdin:2: Warning: ifdef: invalid macro name ignored
+@result{}no
+define(`foo', ifdef(`divnum', defn(`divnum'), `undefined'))
+@result{}
+foo
+@result{}0
+@end example
+
@node Ifelse
@section If-else construct, or multibranch
@@ -2706,6 +2766,24 @@ ifelse(`foo', `bar', `3', `gnu', `gnats', `6', `7', `8')
@result{}7
@end example
+As of M4 1.4.11, @code{ifelse} transparently handles builtin tokens
+generated by @code{defn} (@pxref{Defn}). Because of this, it is always
+safe to compare two macro definitions, without worrying whether the
+macro might be a builtin.
+
+@example
+ifelse(defn(`defn'), `', `yes', `no')
+@result{}no
+ifelse(defn(`defn'), defn(`divnum'), `yes', `no')
+@result{}no
+ifelse(defn(`defn'), defn(`defn'), `yes', `no')
+@result{}yes
+define(`foo', ifelse(`', `', defn(`divnum')))
+@result{}
+foo
+@result{}0
+@end example
+
@ignore
@comment Stress tests, not worth documenting.
@@ -2764,8 +2842,8 @@ ifelse(`-01234567890123456789', `-'e(long)`-', `yes', `no')
@result{}no
@end example
-@comment It would be nice to pass builtin tokens through ifelse, m4wrap,
-@comment user macros; hence the fixmes.
+@comment It would be nice to pass builtin tokens through m4wrap, as well
+@comment as allowing concatenation of builtins in ifelse and user macros.
@example
define(`e', `$@@')define(`q', ``$@@'')define(`u', `$*')
@result{}
@@ -2775,28 +2853,40 @@ cmp(`defn(`defn')', `defn(`d')')
@result{}yes
cmp(`defn(`defn')', ``<defn>'')
@result{}no
-cmp(`q(defn(`defn'))', `q(defn(`d'))')
-@result{}yes
-cmp(`q(defn(`defn'))', `q(`<defn>')')
-@result{}no
-cmp(`q(defn(`defn'))', ``'')
-@result{}no
-cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', defn(`d'))')
-@result{}yes
-cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', `<defn>')')
-@result{}no
-cmp(`q(`1', `2', defn(`defn'))', ```1',`2',<defn>'')
-@result{}no
+cmp(`q(defn(`defn'))', `q(defn(`d'))')-fixme
+@error{}m4:stdin:5: Warning: ifelse: cannot quote builtin
+@error{}m4:stdin:5: Warning: ifelse: cannot quote builtin
+@result{}yes-fixme
+cmp(`q(defn(`defn'))', `q(`<defn>')')-fixme
+@error{}m4:stdin:6: Warning: ifelse: cannot quote builtin
+@result{}no-fixme
+cmp(`q(defn(`defn'))', ``'')-fixme
+@error{}m4:stdin:7: Warning: ifelse: cannot quote builtin
+@result{}no-fixme
+cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', defn(`d'))')-fixme
+@error{}m4:stdin:8: Warning: ifelse: cannot quote builtin
+@error{}m4:stdin:8: Warning: ifelse: cannot quote builtin
+@result{}yes-fixme
+cmp(`q(`1', `2', defn(`defn'))', `q(`1', `2', `<defn>')')-fixme
+@error{}m4:stdin:9: Warning: ifelse: cannot quote builtin
+@result{}no-fixme
+cmp(`q(`1', `2', defn(`defn'))', ```1',`2',<defn>'')-fixme
+@error{}m4:stdin:10: Warning: ifelse: cannot quote builtin
+@result{}no-fixme
cmp(`q(`1', `2', defn(`defn'))', ```1',`2',`''')-fixme
+@error{}m4:stdin:11: Warning: ifelse: cannot quote builtin
@result{}yes-fixme
define(`cat', `$1`'ifelse(`$#', `1', `', `$0(shift($@@))')')
@result{}
cat(`define(`foo',', defn(`divnum'), `)foo')-fixme
+@error{}m4:stdin:13: Warning: ifelse: cannot quote builtin
@result{}-fixme
cat(e(`define(`bar',', defn(`divnum'), `)bar'))-fixme
+@error{}m4:stdin:14: Warning: ifelse: cannot quote builtin
@result{}-fixme
m4wrap(`u('q(`cat(`define(`baz','', defn(`divnum'), ``)baz')')`)-fixme
')
+@error{}m4:stdin:15: Warning: m4wrap: cannot quote builtin
@result{}
^D
@result{}-fixme
@@ -3592,7 +3682,7 @@ echo(`1', `long string')
@result{}1,long string
indir(`echo', defn(`changequote'))
@error{}m4trace: -2- defn(`change...')
-@error{}m4trace: -1- indir(`echo', <changequote>) -> ``''
+@error{}m4trace: -1- indir(`echo', <changequote>) -> ``<changequote>''
@result{}
@end example
diff --git a/src/builtin.c b/src/builtin.c
index a1d4d012..a441c4cf 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -106,17 +106,17 @@ builtin_tab[] =
{ "debugfile", true, false, false, m4_debugfile },
{ "decr", false, false, true, m4_decr },
{ "define", false, true, true, m4_define },
- { "defn", false, false, true, m4_defn },
+ { "defn", false, true, true, m4_defn },
{ "divert", false, false, false, m4_divert },
{ "divnum", false, false, false, m4_divnum },
{ "dnl", false, false, false, m4_dnl },
- { "dumpdef", false, false, false, m4_dumpdef },
+ { "dumpdef", false, true, false, m4_dumpdef },
{ "errprint", false, false, true, m4_errprint },
{ "esyscmd", true, false, true, m4_esyscmd },
{ "eval", false, false, true, m4_eval },
{ "format", true, false, true, m4_format },
- { "ifdef", false, false, true, m4_ifdef },
- { "ifelse", false, false, true, m4_ifelse },
+ { "ifdef", false, true, true, m4_ifdef },
+ { "ifelse", false, true, true, m4_ifelse },
{ "include", false, false, true, m4_include },
{ "incr", false, false, true, m4_incr },
{ "index", false, false, true, m4_index },
@@ -127,18 +127,18 @@ builtin_tab[] =
{ "maketemp", false, false, true, m4_maketemp },
{ "mkstemp", false, false, true, m4_mkstemp },
{ "patsubst", true, false, true, m4_patsubst },
- { "popdef", false, false, true, m4_popdef },
+ { "popdef", false, true, true, m4_popdef },
{ "pushdef", false, true, true, m4_pushdef },
{ "regexp", true, false, true, m4_regexp },
- { "shift", false, false, true, m4_shift },
+ { "shift", false, true, true, m4_shift },
{ "sinclude", false, false, true, m4_sinclude },
{ "substr", false, false, true, m4_substr },
{ "syscmd", false, false, true, m4_syscmd },
{ "sysval", false, false, false, m4_sysval },
- { "traceoff", false, false, false, m4_traceoff },
- { "traceon", false, false, false, m4_traceon },
+ { "traceoff", false, true, false, m4_traceoff },
+ { "traceon", false, true, false, m4_traceon },
{ "translit", false, false, true, m4_translit },
- { "undefine", false, false, true, m4_undefine },
+ { "undefine", false, true, true, m4_undefine },
{ "undivert", false, false, false, m4_undivert },
{ 0, false, false, false, 0 },
@@ -440,6 +440,7 @@ define_user_macro (const char *name, size_t name_len, const char *text,
SYMBOL_TYPE (s) = TOKEN_TEXT;
SYMBOL_TEXT (s) = defn;
+ SYMBOL_MACRO_ARGS (s) = true;
/* Implement --warn-macro-sequence. */
if (macro_sequence_inuse && text)
@@ -693,11 +694,15 @@ m4_define (struct obstack *obs, int argc, macro_arguments *argv)
static void
m4_undefine (struct obstack *obs, int argc, macro_arguments *argv)
{
+ const char *me = ARG (0);
int i;
- if (bad_argc (ARG (0), argc, 1, -1))
+ if (bad_argc (me, argc, 1, -1))
return;
for (i = 1; i < argc; i++)
- lookup_symbol (ARG (i), SYMBOL_DELETE);
+ if (arg_type (argv, i) != TOKEN_TEXT)
+ m4_warn (0, me, _("invalid macro name ignored"));
+ else
+ lookup_symbol (ARG (i), SYMBOL_DELETE);
}
static void
@@ -709,11 +714,15 @@ m4_pushdef (struct obstack *obs, int argc, macro_arguments *argv)
static void
m4_popdef (struct obstack *obs, int argc, macro_arguments *argv)
{
+ const char *me = ARG (0);
int i;
- if (bad_argc (ARG (0), argc, 1, -1))
+ if (bad_argc (me, argc, 1, -1))
return;
for (i = 1; i < argc; i++)
- lookup_symbol (ARG (i), SYMBOL_POPDEF);
+ if (arg_type (argv, i) != TOKEN_TEXT)
+ m4_warn (0, me, _("invalid macro name ignored"));
+ else
+ lookup_symbol (ARG (i), SYMBOL_POPDEF);
}
/*---------------------.
@@ -723,10 +732,17 @@ m4_popdef (struct obstack *obs, int argc, macro_arguments *argv)
static void
m4_ifdef (struct obstack *obs, int argc, macro_arguments *argv)
{
+ const char *me = ARG (0);
symbol *s;
- if (bad_argc (ARG (0), argc, 2, 3))
+ if (bad_argc (me, argc, 2, 3))
return;
+ if (arg_type (argv, 1) != TOKEN_TEXT)
+ {
+ m4_warn (0, me, _("invalid macro name ignored"));
+ push_arg (obs, argv, 3);
+ return;
+ }
s = lookup_symbol (ARG (1), SYMBOL_LOOKUP);
push_arg (obs, argv, (s && SYMBOL_TYPE (s) != TOKEN_VOID) ? 2 : 3);
}
@@ -834,6 +850,11 @@ m4_dumpdef (struct obstack *obs, int argc, macro_arguments *argv)
{
for (i = 1; i < argc; i++)
{
+ if (arg_type (argv, i) != TOKEN_TEXT)
+ {
+ m4_warn (0, me, _("invalid macro name ignored"));
+ continue;
+ }
s = lookup_symbol (ARG (i), SYMBOL_LOOKUP);
if (s != NULL && SYMBOL_TYPE (s) != TOKEN_VOID)
dump_symbol (s, &data);
@@ -968,6 +989,11 @@ m4_defn (struct obstack *obs, int argc, macro_arguments *argv)
for (i = 1; i < argc; i++)
{
+ if (arg_type (argv, i) != TOKEN_TEXT)
+ {
+ m4_warn (0, me, _("invalid macro name ignored"));
+ continue;
+ }
s = lookup_symbol (ARG (i), SYMBOL_LOOKUP);
if (s == NULL)
continue;
@@ -1289,10 +1315,10 @@ m4_dnl (struct obstack *obs, int argc, macro_arguments *argv)
skip_line (me);
}
-/*-------------------------------------------------------------------------.
-| Shift all argument one to the left, discarding the first argument. Each |
-| output argument is quoted with the current quotes. |
-`-------------------------------------------------------------------------*/
+/*--------------------------------------------------------------------.
+| Shift all arguments one to the left, discarding the first |
+| argument. Each output argument is quoted with the current quotes. |
+`--------------------------------------------------------------------*/
static void
m4_shift (struct obstack *obs, int argc, macro_arguments *argv)
@@ -1624,6 +1650,7 @@ set_trace (symbol *sym, void *data)
static void
m4_traceon (struct obstack *obs, int argc, macro_arguments *argv)
{
+ const char *me = ARG (0);
symbol *s;
int i;
@@ -1632,6 +1659,11 @@ m4_traceon (struct obstack *obs, int argc, macro_arguments *argv)
else
for (i = 1; i < argc; i++)
{
+ if (arg_type (argv, i) != TOKEN_TEXT)
+ {
+ m4_warn (0, me, _("invalid macro name ignored"));
+ continue;
+ }
s = lookup_symbol (ARG (i), SYMBOL_INSERT);
set_trace (s, obs);
}
@@ -1644,6 +1676,7 @@ m4_traceon (struct obstack *obs, int argc, macro_arguments *argv)
static void
m4_traceoff (struct obstack *obs, int argc, macro_arguments *argv)
{
+ const char *me = ARG (0);
symbol *s;
int i;
@@ -1652,6 +1685,11 @@ m4_traceoff (struct obstack *obs, int argc, macro_arguments *argv)
else
for (i = 1; i < argc; i++)
{
+ if (arg_type (argv, i) != TOKEN_TEXT)
+ {
+ m4_warn (0, me, _("invalid macro name ignored"));
+ continue;
+ }
s = lookup_symbol (ARG (i), SYMBOL_LOOKUP);
if (s != NULL)
set_trace (s, NULL);
diff --git a/src/input.c b/src/input.c
index 9d8b8f39..b8784d0c 100644
--- a/src/input.c
+++ b/src/input.c
@@ -361,7 +361,7 @@ push_token (token_data *token, int level, bool inuse)
return false;
}
}
- else
+ else if (TOKEN_DATA_TYPE (token) != TOKEN_FUNC)
{
/* For composite tokens, if argv is already in use, creating
additional references for long text segments is more
@@ -407,8 +407,23 @@ push_token (token_data *token, int level, bool inuse)
adjust_refcount (level, true);
inuse = true;
}
+ else if (TOKEN_DATA_TYPE (token) == TOKEN_FUNC)
+ {
+ chain = (token_chain *) obstack_alloc (current_input, sizeof *chain);
+ if (next->u.u_c.end)
+ next->u.u_c.end->next = chain;
+ else
+ next->u.u_c.chain = chain;
+ next->u.u_c.end = chain;
+ chain->next = NULL;
+ chain->type = CHAIN_FUNC;
+ chain->quote_age = 0;
+ chain->u.func = TOKEN_DATA_FUNC (token);
+ }
while (src_chain)
{
+ /* TODO support func concatenation. */
+ assert (src_chain->type != CHAIN_FUNC);
if (level == -1)
{
/* Nothing to copy, since link already lives on obstack. */
@@ -568,7 +583,8 @@ pop_input (bool cleanup)
break;
case INPUT_MACRO:
- if (!cleanup)
+ assert (!isp->u.func || !cleanup);
+ if (isp->u.func)
return false;
break;
@@ -585,6 +601,10 @@ pop_input (bool cleanup)
if (chain->u.u_s.level >= 0)
adjust_refcount (chain->u.u_s.level, false);
break;
+ case CHAIN_FUNC:
+ if (chain->u.func)
+ return false;
+ break;
case CHAIN_ARGV:
if (chain->u.u_a.index < arg_argc (chain->u.u_a.argv))
return false;
@@ -708,6 +728,10 @@ input_print (struct obstack *obs, const input_block *input)
chain->u.u_s.len, &maxlen))
return;
break;
+ case CHAIN_FUNC:
+ func_print (obs, find_builtin_by_addr (chain->u.func), false,
+ NULL);
+ break;
case CHAIN_ARGV:
assert (!chain->u.u_a.comma);
if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
@@ -770,7 +794,9 @@ peek_input (bool allow_argv)
break;
case INPUT_MACRO:
- return CHAR_MACRO;
+ if (block->u.func)
+ return CHAR_MACRO;
+ break;
case INPUT_CHAIN:
chain = block->u.u_c.chain;
@@ -783,6 +809,10 @@ peek_input (bool allow_argv)
if (chain->u.u_s.len)
return to_uchar (*chain->u.u_s.str);
break;
+ case CHAIN_FUNC:
+ if (chain->u.func)
+ return CHAR_MACRO;
+ break;
case CHAIN_ARGV:
argc = arg_argc (chain->u.u_a.argv);
if (chain->u.u_a.index == argc)
@@ -891,9 +921,9 @@ next_char_1 (bool allow_quote)
break;
case INPUT_MACRO:
- /* INPUT_MACRO input sources has only one token */
- pop_input (true);
- return CHAR_MACRO;
+ if (isp->u.func)
+ return CHAR_MACRO;
+ break;
case INPUT_CHAIN:
chain = isp->u.u_c.chain;
@@ -914,6 +944,10 @@ next_char_1 (bool allow_quote)
if (chain->u.u_s.level >= 0)
adjust_refcount (chain->u.u_s.level, false);
break;
+ case CHAIN_FUNC:
+ if (chain->u.func)
+ return CHAR_MACRO;
+ break;
case CHAIN_ARGV:
if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv))
{
@@ -984,17 +1018,37 @@ skip_line (const char *name)
input_change = true;
}
-/*-------------------------------------------------------------------.
-| When a MACRO token is seen, next_token () uses init_macro_token () |
-| to retrieve the value of the function pointer and store it in TD. |
-`-------------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| When next_token() sees a builtin token with peek_input, this |
+| retrieves the value of the function pointer, stores it in TD, and |
+| consumes the input so the caller does not need to do next_char. |
+| If TD is NULL, discard the token instead. |
+`------------------------------------------------------------------*/
static void
init_macro_token (token_data *td)
{
- assert (isp->type == INPUT_MACRO);
- TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
- TOKEN_DATA_FUNC (td) = isp->u.func;
+ int ch = next_char (false);
+ assert (ch == CHAR_MACRO);
+ if (td)
+ TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
+ if (isp->type == INPUT_MACRO)
+ {
+ assert (isp->u.func);
+ if (td)
+ TOKEN_DATA_FUNC (td) = isp->u.func;
+ isp->u.func = NULL;
+ }
+ else
+ {
+ token_chain *chain;
+ assert (isp->type == INPUT_CHAIN);
+ chain = isp->u.u_c.chain;
+ assert (!chain->quote_age && chain->type == CHAIN_FUNC && chain->u.func);
+ if (td)
+ TOKEN_DATA_FUNC (td) = chain->u.func;
+ chain->u.func = NULL;
+ }
}
/*-------------------------------------------------------------------.
@@ -1516,7 +1570,6 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
if (ch == CHAR_MACRO)
{
init_macro_token (td);
- next_char (false);
#ifdef DEBUG_INPUT
xfprintf (stderr, "next_token -> MACDEF (%s)\n",
find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
@@ -1543,20 +1596,30 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
if (obs)
obs_td = obs;
obstack_grow (obs_td, curr_comm.str1, curr_comm.len1);
- while ((ch = next_char (false)) < CHAR_EOF
- && !MATCH (ch, curr_comm.str2, true))
- obstack_1grow (obs_td, ch);
- if (ch != CHAR_EOF)
+ while (1)
{
+ ch = next_char (false);
+ if (ch == CHAR_EOF)
+ /* Current_file changed to "" if we see CHAR_EOF, use the
+ previous value we stored earlier. */
+ m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller,
+ _("end of file in comment"));
+ if (ch == CHAR_MACRO)
+ {
+ /* TODO support concatenation of builtins. */
+ m4_warn_at_line (0, file, *line, caller,
+ _("cannot comment builtin"));
+ init_macro_token (NULL);
+ continue;
+ }
+ if (MATCH (ch, curr_comm.str2, true))
+ {
+ obstack_grow (obs_td, curr_comm.str2, curr_comm.len2);
+ break;
+ }
assert (ch < CHAR_EOF);
- obstack_grow (obs_td, curr_comm.str2, curr_comm.len2);
+ obstack_1grow (obs_td, ch);
}
- else
- /* Current_file changed to "" if we see CHAR_EOF, use the
- previous value we stored earlier. */
- m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller,
- _("end of file in comment"));
-
type = TOKEN_STRING;
}
else if (default_word_regexp && (isalpha (ch) || ch == '_'))
@@ -1608,6 +1671,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
else if (!MATCH (ch, curr_quote.str1, true))
{
+ assert (ch < CHAR_EOF);
switch (ch)
{
case '(':
@@ -1630,6 +1694,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
if (obs)
obs_td = obs;
quote_level = 1;
+ type = TOKEN_STRING;
while (1)
{
ch = next_char (obs != NULL && current_quote_age);
@@ -1639,6 +1704,35 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller,
_("end of file in string"));
+ if (ch == CHAR_MACRO)
+ {
+ /* TODO support concatenation of builtins. */
+ if (obstack_object_size (obs_td) == 0
+ && TOKEN_DATA_TYPE (td) == TOKEN_VOID)
+ {
+ assert (quote_level == 1);
+ init_macro_token (td);
+ ch = peek_input (false);
+ if (MATCH (ch, curr_quote.str2, false))
+ {
+#ifdef DEBUG_INPUT
+ const builtin *bp
+ = find_builtin_by_addr (TOKEN_DATA_FUNC (td));
+ xfprintf (stderr, "next_token -> MACDEF (%s)\n",
+ bp->name);
+#endif
+ ch = next_char (false);
+ MATCH (ch, curr_quote.str2, true);
+ return TOKEN_MACDEF;
+ }
+ TOKEN_DATA_TYPE (td) = TOKEN_VOID;
+ }
+ else
+ init_macro_token (NULL);
+ m4_warn_at_line (0, file, *line, caller,
+ _("cannot quote builtin"));
+ continue;
+ }
if (ch == CHAR_QUOTE)
append_quote_token (obs, td);
else if (MATCH (ch, curr_quote.str2, true))
@@ -1658,7 +1752,6 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
obstack_1grow (obs_td, ch);
}
}
- type = TOKEN_STRING;
}
if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
@@ -1707,6 +1800,9 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
xfprintf (stderr, "%s", chain->u.u_s.str);
len += chain->u.u_s.len;
break;
+ case CHAIN_FUNC:
+ xfprintf (stderr, "<func>");
+ break;
case CHAIN_ARGV:
xfprintf (stderr, "{$@}");
break;
diff --git a/src/m4.h b/src/m4.h
index 7e35accd..ef453598 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -282,7 +282,7 @@ enum token_data_type
enum token_chain_type
{
CHAIN_STR, /* Link contains a string, u.u_s is valid. */
- /* TODO add CHAIN_FUNC. */
+ CHAIN_FUNC, /* Builtin function definition, u.func is valid. */
CHAIN_ARGV /* Link contains a $@ reference, u.u_a is valid. */
};
@@ -303,6 +303,7 @@ struct token_chain
int level; /* Expansion level of link content, or -1. */
}
u_s;
+ builtin_func *func; /* Builtin token from defn. */
struct
{
macro_arguments *argv; /* Reference to earlier $@. */
diff --git a/src/macro.c b/src/macro.c
index 6123f05f..f794d868 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -470,6 +470,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
assert (paren_level == 0 && TOKEN_DATA_TYPE (argp) == TOKEN_VOID
&& obstack_object_size (obs) == 0
&& td.u.u_c.chain == td.u.u_c.end
+ && td.u.u_c.chain->quote_age == age
&& td.u.u_c.chain->type == CHAIN_ARGV);
TOKEN_DATA_TYPE (argp) = TOKEN_COMP;
argp->u.u_c.chain = argp->u.u_c.end = td.u.u_c.chain;
@@ -581,7 +582,8 @@ collect_arguments (symbol *sym, struct obstack *arguments,
argv->wrapper = args.wrapper;
argv->has_ref = args.has_ref;
argv->has_func = args.has_func;
- if (args.quote_age != quote_age ())
+ /* TODO allow funcs without crippling quote age. */
+ if (args.quote_age != quote_age () || args.has_func)
argv->quote_age = 0;
argv->arraylen = args.arraylen;
return argv;
@@ -664,7 +666,7 @@ expand_macro (symbol *sym)
stacks[level].args =
(struct obstack *) xmalloc (sizeof *stacks[level].args);
stacks[level].argv =
- (struct obstack *) xmalloc (sizeof *stacks[level].args);
+ (struct obstack *) xmalloc (sizeof *stacks[level].argv);
obstack_init (stacks[level].args);
obstack_init (stacks[level].argv);
stacks[level].args_base = obstack_finish (stacks[level].args);
@@ -816,9 +818,10 @@ arg_adjust_refcount (macro_arguments *argv, bool increase)
/* Given ARGV, return the token_data that contains argument INDEX;
INDEX must be > 0, < argv->argc. If LEVEL is non-NULL, *LEVEL is
set to the obstack level that contains the token (which is not
- necessarily the level of ARGV). */
+ necessarily the level of ARGV). If FLATTEN, avoid returning a
+ builtin function. */
static token_data *
-arg_token (macro_arguments *argv, unsigned int index, int *level)
+arg_token (macro_arguments *argv, unsigned int index, int *level, bool flatten)
{
unsigned int i;
token_data *token;
@@ -826,8 +829,14 @@ arg_token (macro_arguments *argv, unsigned int index, int *level)
assert (index && index < argv->argc);
if (level)
*level = argv->level;
+ flatten |= argv->flatten;
if (!argv->wrapper)
- return argv->array[index - 1];
+ {
+ token = argv->array[index - 1];
+ if (flatten && TOKEN_DATA_TYPE (token) == TOKEN_FUNC)
+ token = &empty_token;
+ return token;
+ }
/* Must cycle through all tokens, until we find index, since a ref
may occupy multiple indices. */
@@ -842,10 +851,8 @@ arg_token (macro_arguments *argv, unsigned int index, int *level)
- chain->u.u_a.skip_last))
{
token = arg_token (chain->u.u_a.argv,
- chain->u.u_a.index - 1 + index, level);
- if (chain->u.u_a.flatten
- && TOKEN_DATA_TYPE (token) == TOKEN_FUNC)
- token = &empty_token;
+ chain->u.u_a.index - 1 + index, level,
+ flatten || chain->u.u_a.flatten);
break;
}
index -= (chain->u.u_a.argv->argc - chain->u.u_a.index
@@ -897,7 +904,7 @@ arg_type (macro_arguments *argv, unsigned int index)
if (argv->flatten || !argv->has_func || index == 0 || index >= argv->argc)
return TOKEN_TEXT;
- token = arg_token (argv, index, NULL);
+ token = arg_token (argv, index, NULL, false);
type = TOKEN_DATA_TYPE (token);
if (type == TOKEN_COMP && !token->u.u_c.has_func)
type = TOKEN_TEXT;
@@ -923,7 +930,7 @@ arg_text (macro_arguments *argv, unsigned int index)
return argv->argv0;
if (index >= argv->argc)
return "";
- token = arg_token (argv, index, NULL);
+ token = arg_token (argv, index, NULL, false);
switch (TOKEN_DATA_TYPE (token))
{
case TOKEN_TEXT:
@@ -939,11 +946,16 @@ arg_text (macro_arguments *argv, unsigned int index)
case CHAIN_STR:
obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
break;
+ case CHAIN_FUNC:
+ /* TODO concatenate builtins. */
+ assert (!"implemented");
+ abort ();
case CHAIN_ARGV:
arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
quote_cache (NULL, chain->quote_age,
chain->u.u_a.quotes),
- chain->u.u_a.flatten, NULL, NULL, false);
+ argv->flatten || chain->u.u_a.flatten, NULL, NULL,
+ false);
break;
default:
assert (!"arg_text");
@@ -953,6 +965,7 @@ arg_text (macro_arguments *argv, unsigned int index)
}
obstack_1grow (obs, '\0');
return (char *) obstack_finish (obs);
+ case TOKEN_FUNC:
default:
break;
}
@@ -967,8 +980,8 @@ arg_text (macro_arguments *argv, unsigned int index)
bool
arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb)
{
- token_data *ta = arg_token (argv, indexa, NULL);
- token_data *tb = arg_token (argv, indexb, NULL);
+ token_data *ta = arg_token (argv, indexa, NULL, false);
+ token_data *tb = arg_token (argv, indexb, NULL, false);
token_chain tmpa;
token_chain tmpb;
token_chain *ca = &tmpa;
@@ -985,30 +998,45 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb)
TOKEN_DATA_LEN (ta)) == 0);
/* Convert both arguments to chains, if not one already. */
- /* TODO - allow builtin tokens in the comparison? */
- if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT)
+ switch (TOKEN_DATA_TYPE (ta))
{
+ case TOKEN_TEXT:
tmpa.next = NULL;
tmpa.type = CHAIN_STR;
tmpa.u.u_s.str = TOKEN_DATA_TEXT (ta);
tmpa.u.u_s.len = TOKEN_DATA_LEN (ta);
- }
- else
- {
- assert (TOKEN_DATA_TYPE (ta) == TOKEN_COMP);
+ break;
+ case TOKEN_FUNC:
+ tmpa.next = NULL;
+ tmpa.type = CHAIN_FUNC;
+ tmpa.u.func = TOKEN_DATA_FUNC (ta);
+ break;
+ case TOKEN_COMP:
ca = ta->u.u_c.chain;
+ break;
+ default:
+ assert (!"arg_equal");
+ abort ();
}
- if (TOKEN_DATA_TYPE (tb) == TOKEN_TEXT)
+ switch (TOKEN_DATA_TYPE (tb))
{
+ case TOKEN_TEXT:
tmpb.next = NULL;
tmpb.type = CHAIN_STR;
tmpb.u.u_s.str = TOKEN_DATA_TEXT (tb);
tmpb.u.u_s.len = TOKEN_DATA_LEN (tb);
- }
- else
- {
- assert (TOKEN_DATA_TYPE (tb) == TOKEN_COMP);
+ break;
+ case TOKEN_FUNC:
+ tmpb.next = NULL;
+ tmpb.type = CHAIN_FUNC;
+ tmpb.u.func = TOKEN_DATA_FUNC (tb);
+ break;
+ case TOKEN_COMP:
cb = tb->u.u_c.chain;
+ break;
+ default:
+ assert (!"arg_equal");
+ abort ();
}
/* Compare each link of the chain. */
@@ -1042,6 +1070,14 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb)
cb = &tmpb;
continue;
}
+ if (ca->type == CHAIN_FUNC)
+ {
+ if (cb->type != CHAIN_FUNC || ca->u.func != cb->u.func)
+ return false;
+ ca = ca->next;
+ cb = cb->next;
+ continue;
+ }
assert (ca->type == CHAIN_STR && cb->type == CHAIN_STR);
if (ca->u.u_s.len == cb->u.u_s.len)
{
@@ -1091,7 +1127,7 @@ arg_empty (macro_arguments *argv, unsigned int index)
return argv->argv0_len == 0;
if (index >= argv->argc)
return true;
- return arg_token (argv, index, NULL) == &empty_token;
+ return arg_token (argv, index, NULL, false) == &empty_token;
}
/* Given ARGV, return the length of argument INDEX. Abort if the
@@ -1107,7 +1143,7 @@ arg_len (macro_arguments *argv, unsigned int index)
return argv->argv0_len;
if (index >= argv->argc)
return 0;
- token = arg_token (argv, index, NULL);
+ token = arg_token (argv, index, NULL, false);
switch (TOKEN_DATA_TYPE (token))
{
case TOKEN_TEXT:
@@ -1126,6 +1162,10 @@ arg_len (macro_arguments *argv, unsigned int index)
case CHAIN_STR:
len += chain->u.u_s.len;
break;
+ case CHAIN_FUNC:
+ /* TODO concatenate builtins. */
+ assert (!"implemented");
+ abort ();
case CHAIN_ARGV:
i = chain->u.u_a.index;
limit = chain->u.u_a.argv->argc - i - chain->u.u_a.skip_last;
@@ -1138,8 +1178,8 @@ arg_len (macro_arguments *argv, unsigned int index)
while (limit--)
{
/* TODO handle builtin concatenation. */
- if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i,
- NULL)) == TOKEN_FUNC)
+ if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, NULL,
+ false)) == TOKEN_FUNC)
assert (argv->flatten);
else
len += arg_len (chain->u.u_a.argv, i);
@@ -1154,6 +1194,7 @@ arg_len (macro_arguments *argv, unsigned int index)
}
assert (len);
return len;
+ case TOKEN_FUNC:
default:
break;
}
@@ -1168,7 +1209,7 @@ arg_func (macro_arguments *argv, unsigned int index)
{
token_data *token;
- token = arg_token (argv, index, NULL);
+ token = arg_token (argv, index, NULL, false);
assert (TOKEN_DATA_TYPE (token) == TOKEN_FUNC);
return TOKEN_DATA_FUNC (token);
}
@@ -1217,7 +1258,7 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index,
if (use_sep && shipout_string_trunc (obs, sep, sep_len, plen))
return true;
use_sep = true;
- token = arg_token (argv, i, NULL);
+ token = arg_token (argv, i, NULL, flatten);
switch (TOKEN_DATA_TYPE (token))
{
case TOKEN_TEXT:
@@ -1247,6 +1288,10 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index,
chain->u.u_s.len, &len))
done = true;
break;
+ case CHAIN_FUNC:
+ func_print (obs, find_builtin_by_addr (chain->u.func),
+ flatten, quotes);
+ break;
case CHAIN_ARGV:
if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
quote_cache (NULL, chain->quote_age,
@@ -1408,8 +1453,8 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len,
return new_argv;
}
-/* Push argument INDEX from ARGV, which must be a text token, onto the
- expansion stack OBS for rescanning. */
+/* Push argument INDEX from ARGV onto the expansion stack OBS for
+ rescanning. */
void
push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index)
{
@@ -1425,18 +1470,17 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index)
push_arg_quote (obs, argv, index, NULL);
}
-/* Push argument INDEX from ARGV, which must be a text token, onto the
- expansion stack OBS for rescanning. INDEX must be > 0, < argc.
- QUOTES determines any quote delimiters that were in effect when the
- reference was created. */
+/* Push argument INDEX from ARGV onto the expansion stack OBS for
+ rescanning. INDEX must be > 0, < argc. QUOTES determines any
+ quote delimiters that were in effect when the reference was
+ created. */
void
push_arg_quote (struct obstack *obs, macro_arguments *argv, unsigned int index,
const string_pair *quotes)
{
int level;
- token_data *token = arg_token (argv, index, &level);
+ token_data *token = arg_token (argv, index, &level, false);
- /* TODO handle func tokens. */
if (quotes)
obstack_grow (obs, quotes->str1, quotes->len1);
if (push_token (token, level, argv->inuse))
@@ -1465,8 +1509,7 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
return;
}
- /* TODO allow shift, $@, to push builtins without flatten. */
- token = make_argv_ref_token (&td, obs, -1, argv, i, true,
+ token = make_argv_ref_token (&td, obs, -1, argv, i, argv->flatten,
quote ? &curr_quote : NULL);
assert (token);
if (push_token (token, -1, argv->inuse))