summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-11-01 09:28:46 -0600
committerEric Blake <ebb9@byu.net>2008-02-14 14:10:50 -0700
commit1fecefc8b990254aa667a01d12c6c7a2d716df06 (patch)
treed49eb7931fb7b2c3f48ff914cbf5a4de4fd16daa
parent9d08c0c8685fdd749b20062e03c061275dc8afbc (diff)
downloadm4-1fecefc8b990254aa667a01d12c6c7a2d716df06.tar.gz
Stage15: return argv refs back to collect_arguments
-rwxr-xr-xchecks/check-them10
-rw-r--r--doc/m4.texinfo22
-rw-r--r--examples/null.m4bin3671 -> 5764 bytes
-rw-r--r--examples/null.outbin338 -> 400 bytes
-rw-r--r--m4/gnulib-cache.m44
-rw-r--r--src/input.c180
-rw-r--r--src/m4.c30
-rw-r--r--src/m4.h8
-rw-r--r--src/macro.c112
-rw-r--r--src/symtab.c2
10 files changed, 285 insertions, 83 deletions
diff --git a/checks/check-them b/checks/check-them
index daa1b001..9fca39b5 100755
--- a/checks/check-them
+++ b/checks/check-them
@@ -1,6 +1,6 @@
#!/bin/sh
# Check GNU m4 against examples from the manual source.
-# Copyright (C) 1992, 2006, 2007 Free Software Foundation, Inc.
+# Copyright (C) 1992, 2006, 2007, 2008 Free Software Foundation, Inc.
# Sanity check what we are testing
m4 --version
@@ -68,7 +68,7 @@ do
echo "Checking $file"
options=`sed -ne '3s/^dnl @ extra options: //p;3q' "$file"`
sed -e '/^dnl @/d' -e '/^\^D$/q' "$file" \
- | LC_MESSAGES=C m4 -d -I "$examples" $options - >$out 2>$err
+ | LC_MESSAGES=C M4PATH=$examples m4 -d $options - >$out 2>$err
stat=$?
xstat=`sed -ne '2s/^dnl @ expected status: //p;2q' "$file"`
@@ -96,9 +96,11 @@ do
xerrfile=`sed -n 's/^dnl @ expected error: //p' "$file"`
if test -z "$xerrfile" ; then
- sed -e '/^dnl @error{}/!d' -e 's///' -e "s|^m4:|$m4:|" "$file" > $xerr
+ sed '/^dnl @error{}/!d; s///; '"s|^m4:|$m4:|; s|\.\./examples|$examples|" \
+ "$file" > $xerr
else
- cp "$examples/$xerrfile" $xerr
+ sed "s|^m4:|$m4:|; s|\.\./examples|$examples|" \
+ "$examples/$xerrfile" > $xerr
fi
# For the benefit of mingw, normalize \r\n line endings
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 420d8177..b2599c98 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -933,7 +933,13 @@ exception of the @sc{nul} character (the zero byte @samp{'\0'}).
@comment xout: null.out
@comment xerr: null.err
@example
-include(`null.m4')dnl
+define(`m4exit')include(`null.m4')dnl
+@end example
+
+@comment status: 2
+@example
+include(`null.m4')
+@result{}# This file tests m4 behavior on NUL bytes.
@end example
@end ignore
@@ -2408,6 +2414,20 @@ indir(`divert', defn(`foo'))
@result{}
@end example
+Warning messages issued on behalf of an indirect macro use an
+unambiguous representation of the macro name, using escape sequences
+similar to C strings, and with colons also quoted.
+
+@example
+define(`%%:\
+odd', defn(`divnum'))
+@result{}
+indir(`%%:\
+odd', `extra')
+@error{}m4:stdin:3: Warning: %%\:\\\nodd: extra arguments ignored: 1 > 0
+@result{}0
+@end example
+
@node Builtin
@section Indirect call of builtins
diff --git a/examples/null.m4 b/examples/null.m4
index 904a6efb..2632522c 100644
--- a/examples/null.m4
+++ b/examples/null.m4
Binary files differ
diff --git a/examples/null.out b/examples/null.out
index 6e8a114c..c42e03c1 100644
--- a/examples/null.out
+++ b/examples/null.out
Binary files differ
diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4
index 0055a697..be1c1217 100644
--- a/m4/gnulib-cache.m4
+++ b/m4/gnulib-cache.m4
@@ -15,11 +15,11 @@
# Specification in the form of a command-line invocation:
-# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix
+# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix
# Specification in the form of a few gnulib-tool.m4 macro invocations:
gl_LOCAL_DIR([local])
-gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix])
+gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix])
gl_AVOID([])
gl_SOURCE_BASE([lib])
gl_M4_BASE([m4])
diff --git a/src/input.c b/src/input.c
index 8738aebb..5bbaf088 100644
--- a/src/input.c
+++ b/src/input.c
@@ -154,6 +154,7 @@ static bool input_change;
#define CHAR_EOF 256 /* Character return on EOF. */
#define CHAR_MACRO 257 /* Character return for MACRO token. */
#define CHAR_QUOTE 258 /* Character return for quoted string. */
+#define CHAR_ARGV 259 /* Character return for $@ reference. */
/* Quote chars. */
string_pair curr_quote;
@@ -449,7 +450,7 @@ push_token (token_data *token, int level, bool inuse)
next->u.u_c.end = chain;
if (chain->type == CHAIN_ARGV)
{
- assert (!chain->u.u_a.comma);
+ assert (!chain->u.u_a.comma && !chain->u.u_a.skip_last);
inuse |= arg_adjust_refcount (chain->u.u_a.argv, true);
}
else if (chain->type == CHAIN_STR && chain->u.u_s.level >= 0)
@@ -712,17 +713,18 @@ input_print (struct obstack *obs, const input_block *input)
}
-/*-----------------------------------------------------------------.
-| Low level input is done a character at a time. The function |
-| peek_input () is used to look at the next character in the input |
-| stream. At any given time, it reads from the input_block on the |
-| top of the current input stack. The return value is an unsigned |
-| char, or CHAR_EOF if there is no more input, or CHAR_MACRO if a |
-| builtin token occurs next. |
-`-----------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| Low level input is done a character at a time. The function |
+| peek_input () is used to look at the next character in the input |
+| stream. At any given time, it reads from the input_block on the |
+| top of the current input stack. The return value is an unsigned |
+| char, CHAR_EOF if there is no more input, CHAR_MACRO if a builtin |
+| token occurs next, or CHAR_ARGV if ALLOW_ARGV and the input is |
+| visiting an argv reference with the correct quoting. |
+`------------------------------------------------------------------*/
static int
-peek_input (void)
+peek_input (bool allow_argv)
{
int ch;
input_block *block = isp;
@@ -757,6 +759,7 @@ peek_input (void)
chain = block->u.u_c.chain;
while (chain)
{
+ unsigned int argc;
switch (chain->type)
{
case CHAIN_STR:
@@ -764,12 +767,17 @@ peek_input (void)
return to_uchar (*chain->u.u_s.str);
break;
case CHAIN_ARGV:
- // TODO - figure out how to pass multiple arguments to
- // macro.c at once
- if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv))
+ argc = arg_argc (chain->u.u_a.argv);
+ if (chain->u.u_a.index == argc)
break;
if (chain->u.u_a.comma)
return ',';
+ /* Only return a reference if the quoting is correct
+ and the reference has more than one argument
+ left. */
+ if (allow_argv && chain->quote_age == current_quote_age
+ && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc)
+ return CHAR_ARGV;
/* Rather than directly parse argv here, we push
another input block containing the next unparsed
argument from argv. */
@@ -779,7 +787,7 @@ peek_input (void)
chain->u.u_a.index++;
chain->u.u_a.comma = true;
push_string_finish ();
- return peek_input ();
+ return peek_input (allow_argv);
default:
assert (!"peek_input");
abort ();
@@ -872,9 +880,7 @@ next_char_1 (bool allow_quote)
chain = isp->u.u_c.chain;
while (chain)
{
- // TODO also support returning $@ as CHAR_QUOTE
- if (allow_quote && chain->quote_age == current_quote_age
- && chain->type == CHAIN_STR)
+ if (allow_quote && chain->quote_age == current_quote_age)
return CHAR_QUOTE;
switch (chain->type)
{
@@ -890,8 +896,6 @@ next_char_1 (bool allow_quote)
adjust_refcount (chain->u.u_s.level, false);
break;
case CHAIN_ARGV:
- // TODO - figure out how to pass multiple arguments to
- // macro.c at once
if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv))
{
arg_adjust_refcount (chain->u.u_a.argv, false);
@@ -958,7 +962,6 @@ skip_line (const char *name)
if (file != current_file || line != current_line)
input_change = true;
}
-
/*-------------------------------------------------------------------.
| When a MACRO token is seen, next_token () uses init_macro_token () |
@@ -985,20 +988,31 @@ append_quote_token (struct obstack *obs, token_data *td)
token_chain *src_chain = isp->u.u_c.chain;
token_chain *chain;
- assert (isp->type == INPUT_CHAIN && obs && current_quote_age
- && src_chain->type == CHAIN_STR && src_chain->u.u_s.level >= 0);
+ assert (isp->type == INPUT_CHAIN && obs && current_quote_age);
isp->u.u_c.chain = src_chain->next;
/* Speed consideration - for short enough tokens, the speed and
memory overhead of parsing another INPUT_CHAIN link outweighs the
time to inline the token text. */
- if (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
+ if (src_chain->type == CHAIN_STR
+ && src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
{
+ assert (src_chain->u.u_s.level >= 0);
obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len);
adjust_refcount (src_chain->u.u_s.level, false);
return;
}
+ // TODO preserve $@ through a quoted context, in case a later reference
+ // strips those quotes.
+ if (src_chain->type == CHAIN_ARGV)
+ {
+ arg_print (obs, src_chain->u.u_a.argv, src_chain->u.u_a.index,
+ src_chain->u.u_a.quotes, NULL);
+ arg_adjust_refcount (src_chain->u.u_a.argv, false);
+ return;
+ }
+
if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
{
TOKEN_DATA_TYPE (td) = TOKEN_COMP;
@@ -1015,6 +1029,65 @@ append_quote_token (struct obstack *obs, token_data *td)
chain->next = NULL;
}
+
+/*-------------------------------------------------------------------.
+| When an ARGV token is seen, convert TD to point to it via a |
+| composite token. Use OBS for any additional allocations needed to |
+| store the token chain. |
+`-------------------------------------------------------------------*/
+static void
+init_argv_token (struct obstack *obs, token_data *td)
+{
+ token_chain *src_chain;
+ token_chain *chain;
+ int ch = next_char (true);
+
+ assert (ch == CHAR_QUOTE && TOKEN_DATA_TYPE (td) == TOKEN_VOID
+ && isp->type == INPUT_CHAIN && isp->u.u_c.chain->type == CHAIN_ARGV
+ && obs && obstack_object_size (obs) == 0);
+
+ src_chain = isp->u.u_c.chain;
+ isp->u.u_c.chain = src_chain->next;
+ TOKEN_DATA_TYPE (td) = TOKEN_COMP;
+ /* Clone the link, since the input will be discarded soon. */
+ chain = (token_chain *) obstack_copy (obs, src_chain, sizeof *chain);
+ td->u.u_c.chain = td->u.u_c.end = chain;
+ chain->next = NULL;
+
+ /* If the next character is not ',' or ')', then unlink the last
+ argument from argv and schedule it for reparsing. This way,
+ expand_argument never has to deal with concatenation of argv with
+ arbitrary text. Note that the implementation of safe_quotes
+ ensures peek_input won't return CHAR_ARGV if the user is perverse
+ enough to mix comment delimiters with argument separators:
+
+ define(n,`$#')define(echo,$*)changecom(`,,',`)')n(echo(a,`,b`)'',c))
+ => 2 (not 3)
+
+ Therefore, we do not have to worry about calling MATCH, and thus
+ do not have to worry about pop_input being called and
+ invalidating the argv reference.
+
+ When the $@ ref is used unchanged, we completely bypass the
+ decrement of the argv refcount in next_char_1, since the ref is
+ still live via the current collect_arguments. However, when the
+ last element of the $@ ref is reparsed, we must increase the argv
+ refcount here, to compensate for the fact that it will be
+ decreased once the final element is parsed. */
+ assert (*curr_comm.str1 != ',' && *curr_comm.str1 != ')'
+ && *curr_comm.str1 != *curr_quote.str1);
+ ch = peek_input (false);
+ if (ch != ',' && ch != ')')
+ {
+ isp->u.u_c.chain = src_chain;
+ src_chain->u.u_a.index = arg_argc (chain->u.u_a.argv) - 1;
+ src_chain->u.u_a.comma = true;
+ chain->u.u_a.skip_last = true;
+ arg_adjust_refcount (chain->u.u_a.argv, true);
+ }
+}
+
+
/*------------------------------------------------------------------.
| This function is for matching a string against a prefix of the |
| input stream. If the string S matches the input and CONSUME is |
@@ -1031,7 +1104,7 @@ match_input (const char *s, bool consume)
const char *t;
bool result = false;
- ch = peek_input ();
+ ch = peek_input (false);
if (ch != to_uchar (*s))
return false; /* fail */
@@ -1043,7 +1116,7 @@ match_input (const char *s, bool consume)
}
next_char (false);
- for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
+ for (n = 1, t = s++; (ch = peek_input (false)) == to_uchar (*s++); )
{
next_char (false);
n++;
@@ -1324,18 +1397,20 @@ safe_quotes (void)
/*--------------------------------------------------------------------.
-| Parse a single token from the input stream, set TD to its |
-| contents, and return its type. A token is TOKEN_EOF if the |
+| Parse a single token from the input stream, set TD to its |
+| contents, and return its type. A token is TOKEN_EOF if the |
| input_stack is empty; TOKEN_STRING for a quoted string or comment; |
-| TOKEN_WORD for something that is a potential macro name; and |
+| TOKEN_WORD for something that is a potential macro name; and |
| TOKEN_SIMPLE for any single character that is not a part of any of |
| the previous types. If LINE is not NULL, set *LINE to the line |
| where the token starts. If OBS is not NULL, expand TOKEN_STRING |
| directly into OBS rather than in token_stack temporary storage |
-| area, and TD could be a TOKEN_COMP instead of the usual |
-| TOKEN_TEXT. Report errors (unterminated comments or strings) on |
-| behalf of CALLER, if non-NULL. |
-| |
+| area, and TD could be a TOKEN_COMP instead of the usual |
+| TOKEN_TEXT. If ALLOW_ARGV, OBS must be non-NULL, and an entire |
+| series of arguments can be returned as TOKEN_ARGV when a $@ |
+| reference is encountered. Report errors (unterminated comments or |
+| strings) on behalf of CALLER, if non-NULL. |
+| |
| Next_token () returns the token type, and passes back a pointer to |
| the token data through TD. Non-string token text is collected on |
| the obstack token_stack, which never contains more than one token |
@@ -1344,7 +1419,8 @@ safe_quotes (void)
`--------------------------------------------------------------------*/
token_type
-next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
+next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
+ const char *caller)
{
int ch;
int quote_level;
@@ -1366,7 +1442,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
/* Can't consume character until after CHAR_MACRO is handled. */
TOKEN_DATA_TYPE (td) = TOKEN_VOID;
- ch = peek_input ();
+ ch = peek_input (allow_argv && current_quote_age);
if (ch == CHAR_EOF)
{
#ifdef DEBUG_INPUT
@@ -1385,6 +1461,17 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
#endif /* DEBUG_INPUT */
return TOKEN_MACDEF;
}
+ if (ch == CHAR_ARGV)
+ {
+ init_argv_token (obs, td);
+#ifdef DEBUG_INPUT
+ xfprintf (stderr, "next_token -> ARGV (%d args)\n",
+ (arg_argc (td->u.u_c.chain->u.u_a.argv)
+ - td->u.u_c.chain->u.u_a.index
+ - (td->u.u_c.chain->u.u_a.skip_last ? 1 : 0)));
+#endif
+ return TOKEN_ARGV;
+ }
next_char (false); /* Consume character we already peeked at. */
file = current_file;
@@ -1413,7 +1500,8 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
else if (default_word_regexp && (isalpha (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
- while ((ch = peek_input ()) < CHAR_EOF && (isalnum (ch) || ch == '_'))
+ while ((ch = peek_input (false)) < CHAR_EOF
+ && (isalnum (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
next_char (false);
@@ -1428,7 +1516,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
obstack_1grow (&token_stack, ch);
while (1)
{
- ch = peek_input ();
+ ch = peek_input (false);
if (ch >= CHAR_EOF)
break;
obstack_1grow (&token_stack, ch);
@@ -1551,9 +1639,19 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
token_type_string (type));
while (chain)
{
- assert (chain->type == CHAIN_STR);
- xfprintf (stderr, "%s", chain->u.u_s.str);
- len += chain->u.u_s.len;
+ switch (chain->type)
+ {
+ case CHAIN_STR:
+ xfprintf (stderr, "%s", chain->u.u_s.str);
+ len += chain->u.u_s.len;
+ break;
+ case CHAIN_ARGV:
+ xfprintf (stderr, "{$@}");
+ break;
+ default:
+ assert (!"next_token");
+ abort ();
+ }
links++;
chain = chain->next;
}
@@ -1573,7 +1671,7 @@ token_type
peek_token (void)
{
token_type result;
- int ch = peek_input ();
+ int ch = peek_input (false);
if (ch == CHAR_EOF)
{
@@ -1688,7 +1786,7 @@ lex_debug (void)
token_type t;
token_data td;
- while ((t = next_token (&td, NULL, NULL, "<debug>")) != TOKEN_EOF)
+ while ((t = next_token (&td, NULL, NULL, false, "<debug>")) != TOKEN_EOF)
print_token ("lex", t, &td);
}
#endif /* DEBUG_INPUT */
diff --git a/src/m4.c b/src/m4.c
index 2cfed194..a6bc92ad 100644
--- a/src/m4.c
+++ b/src/m4.c
@@ -1,7 +1,7 @@
/* GNU m4 -- A simple macro processor
- Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007
- Free Software Foundation, Inc.
+ Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006,
+ 2007, 2008 Free Software Foundation, Inc.
This file is part of GNU M4.
@@ -98,18 +98,37 @@ m4_verror_at_line (bool warn, int status, int errnum, const char *file,
va_list args)
{
char *full = NULL;
+ char *safe_macro = NULL;
+
+ /* Sanitize MACRO, since we are turning around and using it in a
+ format string. The allocation is overly conservative, but
+ problematic macro names only occur via indir or changeword. */
+ if (macro && strchr (macro, '%'))
+ {
+ char *p = safe_macro = xcharalloc (2 * strlen (macro) + 1);
+ do
+ {
+ if (*macro == '%')
+ *p++ = '%';
+ *p++ = *macro++;
+ }
+ while (*macro);
+ }
/* Prepend warning and the macro name, as needed. But if that fails
for non-memory reasons (unlikely), then still use the original
format. */
if (warn && macro)
- full = xasprintf (_("Warning: %s: %s"), macro, format);
+ full = xasprintf (_("Warning: %s: %s"),
+ quotearg (safe_macro ? safe_macro : macro), format);
else if (warn)
full = xasprintf (_("Warning: %s"), format);
else if (macro)
- full = xasprintf (_("%s: %s"), macro, format);
+ full = xasprintf (_("%s: %s"),
+ quotearg (safe_macro ? safe_macro : macro), format);
verror_at_line (status, errnum, line ? file : NULL, line,
full ? full : format, args);
free (full);
+ free (safe_macro);
if ((!warn || fatal_warnings) && !retcode)
retcode = EXIT_FAILURE;
}
@@ -435,6 +454,8 @@ main (int argc, char *const *argv, char *const *envp)
include_init ();
debug_init ();
+ set_quoting_style (NULL, escape_quoting_style);
+ set_char_quoting (NULL, ':', 1);
#ifdef USE_STACKOVF
setup_stackovf_trap (argv, envp, stackovf_handler);
#endif
@@ -687,6 +708,7 @@ main (int argc, char *const *argv, char *const *envp)
}
output_exit ();
free_regex ();
+ quotearg_free ();
#ifdef DEBUG_REGEX
if (trace_file)
fclose (trace_file);
diff --git a/src/m4.h b/src/m4.h
index 93a023f7..e4ff44ac 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -43,6 +43,7 @@
#include "exitfail.h"
#include "intprops.h"
#include "obstack.h"
+#include "quotearg.h"
#include "stdio--.h"
#include "stdlib--.h"
#include "unistd--.h"
@@ -265,7 +266,8 @@ enum token_type
TOKEN_COMMA, /* Active character `,', TOKEN_TEXT. */
TOKEN_CLOSE, /* Active character `)', TOKEN_TEXT. */
TOKEN_SIMPLE, /* Any other single character, TOKEN_TEXT. */
- TOKEN_MACDEF /* A macro's definition (see "defn"), TOKEN_FUNC. */
+ TOKEN_MACDEF, /* A macro's definition (see "defn"), TOKEN_FUNC. */
+ TOKEN_ARGV /* A series of parameters, TOKEN_COMP. */
};
/* The data for a token, a macro argument, and a macro definition. */
@@ -308,6 +310,7 @@ struct token_chain
unsigned int index; /* Argument index within argv. */
bool_bitfield flatten : 1; /* True to treat builtins as text. */
bool_bitfield comma : 1; /* True when `,' is next input. */
+ bool_bitfield skip_last : 1; /* True if last argument omitted. */
const string_pair *quotes; /* NULL for $*, quotes for $@. */
}
u_a;
@@ -372,7 +375,8 @@ typedef enum token_data_type token_data_type;
void input_init (void);
token_type peek_token (void);
-token_type next_token (token_data *, int *, struct obstack *, const char *);
+token_type next_token (token_data *, int *, struct obstack *, bool,
+ const char *);
void skip_line (const char *);
/* push back input */
diff --git a/src/macro.c b/src/macro.c
index 0cc42225..8341dd2f 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -216,7 +216,7 @@ expand_input (void)
TOKEN_DATA_ORIG_TEXT (&empty_token) = "";
#endif
- while ((t = next_token (&td, &line, NULL, NULL)) != TOKEN_EOF)
+ while ((t = next_token (&td, &line, NULL, false, NULL)) != TOKEN_EOF)
expand_token (NULL, t, &td, line, true);
for (i = 0; i < stacks_count; i++)
@@ -364,7 +364,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
/* Skip leading white space. */
do
{
- t = next_token (&td, NULL, obs, caller);
+ t = next_token (&td, NULL, obs, true, caller);
}
while (t == TOKEN_SIMPLE && isspace (to_uchar (*TOKEN_DATA_TEXT (&td))));
@@ -455,6 +455,20 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
}
break;
+ case TOKEN_ARGV:
+ assert (paren_level == 0 && TOKEN_DATA_TYPE (argp) == TOKEN_VOID
+ && obstack_object_size (obs) == 0
+ && td.u.u_c.chain == td.u.u_c.end
+ && td.u.u_c.chain->type == CHAIN_ARGV);
+ TOKEN_DATA_TYPE (argp) = TOKEN_COMP;
+ argp->u.u_c.chain = argp->u.u_c.end = td.u.u_c.chain;
+ t = next_token (&td, NULL, NULL, false, caller);
+ if (argp->u.u_c.chain->u.u_a.skip_last)
+ assert (t == TOKEN_COMMA);
+ else
+ assert (t == TOKEN_COMMA || t == TOKEN_CLOSE);
+ return t == TOKEN_COMMA;
+
default:
assert (!"expand_argument");
abort ();
@@ -462,7 +476,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
if (TOKEN_DATA_TYPE (argp) != TOKEN_VOID || obstack_object_size (obs))
first = false;
- t = next_token (&td, NULL, obs, caller);
+ t = next_token (&td, NULL, obs, first, caller);
}
}
@@ -496,7 +510,8 @@ collect_arguments (symbol *sym, struct obstack *arguments,
if (peek_token () == TOKEN_OPEN)
{
- next_token (&td, NULL, NULL, SYMBOL_NAME (sym)); /* gobble parenthesis */
+ /* gobble parenthesis */
+ next_token (&td, NULL, NULL, false, SYMBOL_NAME (sym));
do
{
tdp = (token_data *) obstack_alloc (arguments, sizeof *tdp);
@@ -519,12 +534,22 @@ collect_arguments (symbol *sym, struct obstack *arguments,
&& TOKEN_DATA_QUOTE_AGE (tdp) != args.quote_age)
args.quote_age = 0;
else if (TOKEN_DATA_TYPE (tdp) == TOKEN_COMP)
- args.has_ref = true;
+ {
+ args.has_ref = true;
+ if (tdp->u.u_c.chain->type == CHAIN_ARGV)
+ {
+ args.argc += (tdp->u.u_c.chain->u.u_a.argv->argc
+ - tdp->u.u_c.chain->u.u_a.index
+ - tdp->u.u_c.chain->u.u_a.skip_last - 1);
+ args.wrapper = true;
+ }
+ }
}
while (more_args);
}
argv = (macro_arguments *) obstack_finish (argv_stack);
argv->argc = args.argc;
+ argv->wrapper = args.wrapper;
argv->has_ref = args.has_ref;
if (args.quote_age != quote_age ())
argv->quote_age = 0;
@@ -734,9 +759,20 @@ arg_adjust_refcount (macro_arguments *argv, bool increase)
chain = argv->array[i]->u.u_c.chain;
while (chain)
{
- assert (chain->type == CHAIN_STR);
- if (chain->u.u_s.level >= 0)
- adjust_refcount (chain->u.u_s.level, increase);
+ switch (chain->type)
+ {
+ case CHAIN_STR:
+ if (chain->u.u_s.level >= 0)
+ adjust_refcount (chain->u.u_s.level, increase);
+ break;
+ case CHAIN_ARGV:
+ assert (chain->u.u_a.argv->inuse);
+ arg_adjust_refcount (chain->u.u_a.argv, increase);
+ break;
+ default:
+ assert (!"arg_adjust_refcount");
+ abort ();
+ }
chain = chain->next;
}
}
@@ -766,12 +802,14 @@ arg_token (macro_arguments *argv, unsigned int index, int *level)
for (i = 0; i < argv->arraylen; i++)
{
token = argv->array[i];
- if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
+ if (TOKEN_DATA_TYPE (token) == TOKEN_COMP
+ && token->u.u_c.chain->type == CHAIN_ARGV)
{
token_chain *chain = token->u.u_c.chain;
// TODO for now we support only a single-length $@ chain...
- assert (!chain->next && chain->type == CHAIN_ARGV);
- if (index < chain->u.u_a.argv->argc - (chain->u.u_a.index - 1))
+ assert (!chain->next);
+ if (index <= (chain->u.u_a.argv->argc - chain->u.u_a.index
+ - chain->u.u_a.skip_last))
{
token = arg_token (chain->u.u_a.argv,
chain->u.u_a.index - 1 + index, level);
@@ -780,7 +818,8 @@ arg_token (macro_arguments *argv, unsigned int index, int *level)
token = &empty_token;
break;
}
- index -= chain->u.u_a.argv->argc - chain->u.u_a.index;
+ index -= (chain->u.u_a.argv->argc - chain->u.u_a.index
+ - chain->u.u_a.skip_last);
}
else if (--index == 0)
break;
@@ -793,18 +832,24 @@ arg_token (macro_arguments *argv, unsigned int index, int *level)
static void
arg_mark (macro_arguments *argv)
{
+ unsigned int i;
+ token_chain *chain;
+
if (argv->inuse)
return;
argv->inuse = true;
if (argv->wrapper)
- {
- // TODO for now we support only a single-length $@ chain...
- assert (argv->arraylen == 1
- && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP
- && !argv->array[0]->u.u_c.chain->next
- && argv->array[0]->u.u_c.chain->type == CHAIN_ARGV);
- argv->array[0]->u.u_c.chain->u.u_a.argv->inuse = true;
- }
+ for (i = 0; i < argv->arraylen; i++)
+ if (TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP)
+ {
+ chain = argv->array[i]->u.u_c.chain;
+ while (chain)
+ {
+ if (chain->type == CHAIN_ARGV && !chain->u.u_a.argv->inuse)
+ arg_mark (chain->u.u_a.argv);
+ chain = chain->next;
+ }
+ }
}
/* Given ARGV, return how many arguments it refers to. */
@@ -854,14 +899,24 @@ arg_text (macro_arguments *argv, unsigned int index)
case TOKEN_TEXT:
return TOKEN_DATA_TEXT (token);
case TOKEN_COMP:
- // TODO - concatenate argv refs, or even functions? For now, we assume
- // all chain elements are text.
+ // TODO - concatenate functions?
chain = token->u.u_c.chain;
obs = arg_scratch ();
while (chain)
{
- assert (chain->type == CHAIN_STR);
- obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
+ switch (chain->type)
+ {
+ case CHAIN_STR:
+ obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
+ break;
+ case CHAIN_ARGV:
+ arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
+ chain->u.u_a.quotes, NULL);
+ break;
+ default:
+ assert (!"arg_text");
+ abort ();
+ }
chain = chain->next;
}
obstack_1grow (obs, '\0');
@@ -1122,13 +1177,13 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level,
token_chain *chain;
assert (obstack_object_size (obs) == 0);
- if (argv->wrapper)
+ if (argv->wrapper && argv->arraylen == 1)
{
// TODO for now we support only a single-length $@ chain...
- assert (argv->arraylen == 1
- && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP);
+ assert (TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP);
chain = argv->array[0]->u.u_c.chain;
- assert (!chain->next && chain->type == CHAIN_ARGV);
+ assert (!chain->next && chain->type == CHAIN_ARGV
+ && !chain->u.u_a.skip_last);
argv = chain->u.u_a.argv;
index += chain->u.u_a.index - 1;
}
@@ -1145,6 +1200,7 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level,
chain->u.u_a.index = index;
chain->u.u_a.flatten = flatten;
chain->u.u_a.comma = false;
+ chain->u.u_a.skip_last = false;
if (quotes)
{
/* Clone the quotes into the obstack, since a subsequent
diff --git a/src/symtab.c b/src/symtab.c
index 277a79f4..dac49d7c 100644
--- a/src/symtab.c
+++ b/src/symtab.c
@@ -350,7 +350,7 @@ symtab_debug (void)
int delete;
static int i;
- while (next_token (&td, NULL, NULL, "<debug>") == TOKEN_WORD)
+ while (next_token (&td, NULL, NULL, false, "<debug>") == TOKEN_WORD)
{
text = TOKEN_DATA_TEXT (&td);
if (*text == '_')