Stage15: return argv refs back to collect_arguments

author: Eric Blake <ebb9@byu.net> 2007-11-01 09:28:46 -0600
committer: Eric Blake <ebb9@byu.net> 2008-02-14 14:10:50 -0700
commit: 1fecefc8b990254aa667a01d12c6c7a2d716df06 (patch)
tree: d49eb7931fb7b2c3f48ff914cbf5a4de4fd16daa
parent: 9d08c0c8685fdd749b20062e03c061275dc8afbc (diff)
download: m4-1fecefc8b990254aa667a01d12c6c7a2d716df06.tar.gz
10 files changed, 285 insertions, 83 deletions
diff --git a/checks/check-them b/checks/check-them
index daa1b001..9fca39b5 100755
--- a/checks/check-them
+++ b/checks/check-them
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Check GNU m4 against examples from the manual source.
-# Copyright (C) 1992, 2006, 2007 Free Software Foundation, Inc.
+# Copyright (C) 1992, 2006, 2007, 2008 Free Software Foundation, Inc.
 
 # Sanity check what we are testing
 m4 --version
@@ -68,7 +68,7 @@ do
   echo "Checking $file"
   options=`sed -ne '3s/^dnl @ extra options: //p;3q' "$file"`
   sed -e '/^dnl @/d' -e '/^\^D$/q' "$file" \
-    | LC_MESSAGES=C m4 -d -I "$examples" $options - >$out 2>$err
+    | LC_MESSAGES=C M4PATH=$examples m4 -d $options - >$out 2>$err
   stat=$?
 
   xstat=`sed -ne '2s/^dnl @ expected status: //p;2q' "$file"`
@@ -96,9 +96,11 @@ do
 
   xerrfile=`sed -n 's/^dnl @ expected error: //p' "$file"`
   if test -z "$xerrfile" ; then
-    sed -e '/^dnl @error{}/!d' -e 's///' -e "s|^m4:|$m4:|" "$file" > $xerr
+    sed '/^dnl @error{}/!d; s///; '"s|^m4:|$m4:|; s|\.\./examples|$examples|" \
+      "$file" > $xerr
   else
-    cp "$examples/$xerrfile" $xerr
+    sed "s|^m4:|$m4:|; s|\.\./examples|$examples|" \
+      "$examples/$xerrfile" > $xerr
   fi
 
   # For the benefit of mingw, normalize \r\n line endings
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 420d8177..b2599c98 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -933,7 +933,13 @@ exception of the @sc{nul} character (the zero byte @samp{'\0'}).
 @comment xout: null.out
 @comment xerr: null.err
 @example
-include(`null.m4')dnl
+define(`m4exit')include(`null.m4')dnl
+@end example
+
+@comment status: 2
+@example
+include(`null.m4')
+@result{}# This file tests m4 behavior on NUL bytes.
 @end example
 @end ignore
 
@@ -2408,6 +2414,20 @@ indir(`divert', defn(`foo'))
 @result{}
 @end example
 
+Warning messages issued on behalf of an indirect macro use an
+unambiguous representation of the macro name, using escape sequences
+similar to C strings, and with colons also quoted.
+
+@example
+define(`%%:\
+odd', defn(`divnum'))
+@result{}
+indir(`%%:\
+odd', `extra')
+@error{}m4:stdin:3: Warning: %%\:\\\nodd: extra arguments ignored: 1 > 0
+@result{}0
+@end example
+
 @node Builtin
 @section Indirect call of builtins
 
diff --git a/examples/null.m4 b/examples/null.m4
index 904a6efb..2632522c 100644
--- a/examples/null.m4
+++ b/examples/null.m4
diff --git a/examples/null.out b/examples/null.out
index 6e8a114c..c42e03c1 100644
--- a/examples/null.out
+++ b/examples/null.out
diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4
index 0055a697..be1c1217 100644
--- a/m4/gnulib-cache.m4
+++ b/m4/gnulib-cache.m4
@@ -15,11 +15,11 @@
 
 
 # Specification in the form of a command-line invocation:
-#   gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix
+#   gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix
 
 # Specification in the form of a few gnulib-tool.m4 macro invocations:
 gl_LOCAL_DIR([local])
-gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix])
+gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix])
 gl_AVOID([])
 gl_SOURCE_BASE([lib])
 gl_M4_BASE([m4])
diff --git a/src/input.c b/src/input.c
index 8738aebb..5bbaf088 100644
--- a/src/input.c
+++ b/src/input.c
@@ -154,6 +154,7 @@ static bool input_change;
 #define CHAR_EOF	256	/* Character return on EOF.  */
 #define CHAR_MACRO	257	/* Character return for MACRO token.  */
 #define CHAR_QUOTE	258	/* Character return for quoted string.  */
+#define CHAR_ARGV	259	/* Character return for $@ reference.  */
 
 /* Quote chars.  */
 string_pair curr_quote;
@@ -449,7 +450,7 @@ push_token (token_data *token, int level, bool inuse)
       next->u.u_c.end = chain;
       if (chain->type == CHAIN_ARGV)
 	{
-	  assert (!chain->u.u_a.comma);
+	  assert (!chain->u.u_a.comma && !chain->u.u_a.skip_last);
 	  inuse |= arg_adjust_refcount (chain->u.u_a.argv, true);
 	}
       else if (chain->type == CHAIN_STR && chain->u.u_s.level >= 0)
@@ -712,17 +713,18 @@ input_print (struct obstack *obs, const input_block *input)
 }
 
 
-/*-----------------------------------------------------------------.
-| Low level input is done a character at a time.  The function     |
-| peek_input () is used to look at the next character in the input |
-| stream.  At any given time, it reads from the input_block on the |
-| top of the current input stack.  The return value is an unsigned |
-| char, or CHAR_EOF if there is no more input, or CHAR_MACRO if a  |
-| builtin token occurs next.                                       |
-`-----------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| Low level input is done a character at a time.  The function      |
+| peek_input () is used to look at the next character in the input  |
+| stream.  At any given time, it reads from the input_block on the  |
+| top of the current input stack.  The return value is an unsigned  |
+| char, CHAR_EOF if there is no more input, CHAR_MACRO if a builtin |
+| token occurs next, or CHAR_ARGV if ALLOW_ARGV and the input is    |
+| visiting an argv reference with the correct quoting.              |
+`------------------------------------------------------------------*/
 
 static int
-peek_input (void)
+peek_input (bool allow_argv)
 {
   int ch;
   input_block *block = isp;
@@ -757,6 +759,7 @@ peek_input (void)
 	  chain = block->u.u_c.chain;
 	  while (chain)
 	    {
+	      unsigned int argc;
 	      switch (chain->type)
 		{
 		case CHAIN_STR:
@@ -764,12 +767,17 @@ peek_input (void)
 		    return to_uchar (*chain->u.u_s.str);
 		  break;
 		case CHAIN_ARGV:
-		  // TODO - figure out how to pass multiple arguments to
-		  // macro.c at once
-		  if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv))
+		  argc = arg_argc (chain->u.u_a.argv);
+		  if (chain->u.u_a.index == argc)
 		    break;
 		  if (chain->u.u_a.comma)
 		    return ',';
+		  /* Only return a reference if the quoting is correct
+		     and the reference has more than one argument
+		     left.  */
+		  if (allow_argv && chain->quote_age == current_quote_age
+		      && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc)
+		    return CHAR_ARGV;
 		  /* Rather than directly parse argv here, we push
 		     another input block containing the next unparsed
 		     argument from argv.  */
@@ -779,7 +787,7 @@ peek_input (void)
 		  chain->u.u_a.index++;
 		  chain->u.u_a.comma = true;
 		  push_string_finish ();
-		  return peek_input ();
+		  return peek_input (allow_argv);
 		default:
 		  assert (!"peek_input");
 		  abort ();
@@ -872,9 +880,7 @@ next_char_1 (bool allow_quote)
 	  chain = isp->u.u_c.chain;
 	  while (chain)
 	    {
-	      // TODO also support returning $@ as CHAR_QUOTE
-	      if (allow_quote && chain->quote_age == current_quote_age
-		  && chain->type == CHAIN_STR)
+	      if (allow_quote && chain->quote_age == current_quote_age)
 		return CHAR_QUOTE;
 	      switch (chain->type)
 		{
@@ -890,8 +896,6 @@ next_char_1 (bool allow_quote)
 		    adjust_refcount (chain->u.u_s.level, false);
 		  break;
 		case CHAIN_ARGV:
-		  // TODO - figure out how to pass multiple arguments to
-		  // macro.c at once
 		  if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv))
 		    {
 		      arg_adjust_refcount (chain->u.u_a.argv, false);
@@ -958,7 +962,6 @@ skip_line (const char *name)
   if (file != current_file || line != current_line)
     input_change = true;
 }
-
 
 /*-------------------------------------------------------------------.
 | When a MACRO token is seen, next_token () uses init_macro_token () |
@@ -985,20 +988,31 @@ append_quote_token (struct obstack *obs, token_data *td)
   token_chain *src_chain = isp->u.u_c.chain;
   token_chain *chain;
 
-  assert (isp->type == INPUT_CHAIN && obs && current_quote_age
-	  && src_chain->type == CHAIN_STR && src_chain->u.u_s.level >= 0);
+  assert (isp->type == INPUT_CHAIN && obs && current_quote_age);
   isp->u.u_c.chain = src_chain->next;
 
   /* Speed consideration - for short enough tokens, the speed and
      memory overhead of parsing another INPUT_CHAIN link outweighs the
      time to inline the token text.  */
-  if (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
+  if (src_chain->type == CHAIN_STR
+      && src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
     {
+      assert (src_chain->u.u_s.level >= 0);
       obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len);
       adjust_refcount (src_chain->u.u_s.level, false);
       return;
     }
 
+  // TODO preserve $@ through a quoted context, in case a later reference
+  // strips those quotes.
+  if (src_chain->type == CHAIN_ARGV)
+    {
+      arg_print (obs, src_chain->u.u_a.argv, src_chain->u.u_a.index,
+		 src_chain->u.u_a.quotes, NULL);
+      arg_adjust_refcount (src_chain->u.u_a.argv, false);
+      return;
+    }
+
   if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
     {
       TOKEN_DATA_TYPE (td) = TOKEN_COMP;
@@ -1015,6 +1029,65 @@ append_quote_token (struct obstack *obs, token_data *td)
   chain->next = NULL;
 }
 
+
+/*-------------------------------------------------------------------.
+| When an ARGV token is seen, convert TD to point to it via a	     |
+| composite token.  Use OBS for any additional allocations needed to |
+| store the token chain.					     |
+`-------------------------------------------------------------------*/
+static void
+init_argv_token (struct obstack *obs, token_data *td)
+{
+  token_chain *src_chain;
+  token_chain *chain;
+  int ch = next_char (true);
+
+  assert (ch == CHAR_QUOTE && TOKEN_DATA_TYPE (td) == TOKEN_VOID
+	  && isp->type == INPUT_CHAIN && isp->u.u_c.chain->type == CHAIN_ARGV
+	  && obs && obstack_object_size (obs) == 0);
+
+  src_chain = isp->u.u_c.chain;
+  isp->u.u_c.chain = src_chain->next;
+  TOKEN_DATA_TYPE (td) = TOKEN_COMP;
+  /* Clone the link, since the input will be discarded soon.  */
+  chain = (token_chain *) obstack_copy (obs, src_chain, sizeof *chain);
+  td->u.u_c.chain = td->u.u_c.end = chain;
+  chain->next = NULL;
+
+  /* If the next character is not ',' or ')', then unlink the last
+     argument from argv and schedule it for reparsing.  This way,
+     expand_argument never has to deal with concatenation of argv with
+     arbitrary text.  Note that the implementation of safe_quotes
+     ensures peek_input won't return CHAR_ARGV if the user is perverse
+     enough to mix comment delimiters with argument separators:
+
+       define(n,`$#')define(echo,$*)changecom(`,,',`)')n(echo(a,`,b`)'',c))
+       => 2 (not 3)
+
+     Therefore, we do not have to worry about calling MATCH, and thus
+     do not have to worry about pop_input being called and
+     invalidating the argv reference.
+
+     When the $@ ref is used unchanged, we completely bypass the
+     decrement of the argv refcount in next_char_1, since the ref is
+     still live via the current collect_arguments.  However, when the
+     last element of the $@ ref is reparsed, we must increase the argv
+     refcount here, to compensate for the fact that it will be
+     decreased once the final element is parsed.  */
+  assert (*curr_comm.str1 != ',' && *curr_comm.str1 != ')'
+	  && *curr_comm.str1 != *curr_quote.str1);
+  ch = peek_input (false);
+  if (ch != ',' && ch != ')')
+    {
+      isp->u.u_c.chain = src_chain;
+      src_chain->u.u_a.index = arg_argc (chain->u.u_a.argv) - 1;
+      src_chain->u.u_a.comma = true;
+      chain->u.u_a.skip_last = true;
+      arg_adjust_refcount (chain->u.u_a.argv, true);
+    }
+}
+
+
 /*------------------------------------------------------------------.
 | This function is for matching a string against a prefix of the    |
 | input stream.  If the string S matches the input and CONSUME is   |
@@ -1031,7 +1104,7 @@ match_input (const char *s, bool consume)
   const char *t;
   bool result = false;
 
-  ch = peek_input ();
+  ch = peek_input (false);
   if (ch != to_uchar (*s))
     return false;			/* fail */
 
@@ -1043,7 +1116,7 @@ match_input (const char *s, bool consume)
     }
 
   next_char (false);
-  for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
+  for (n = 1, t = s++; (ch = peek_input (false)) == to_uchar (*s++); )
     {
       next_char (false);
       n++;
@@ -1324,18 +1397,20 @@ safe_quotes (void)
 
 
 /*--------------------------------------------------------------------.
-| Parse a single token from the input stream, set TD to its           |
-| contents, and return its type.  A token is TOKEN_EOF if the         |
+| Parse a single token from the input stream, set TD to its	      |
+| contents, and return its type.  A token is TOKEN_EOF if the	      |
 | input_stack is empty; TOKEN_STRING for a quoted string or comment;  |
-| TOKEN_WORD for something that is a potential macro name; and        |
+| TOKEN_WORD for something that is a potential macro name; and	      |
 | TOKEN_SIMPLE for any single character that is not a part of any of  |
 | the previous types.  If LINE is not NULL, set *LINE to the line     |
 | where the token starts.  If OBS is not NULL, expand TOKEN_STRING    |
 | directly into OBS rather than in token_stack temporary storage      |
-| area, and TD could be a TOKEN_COMP instead of the usual             |
-| TOKEN_TEXT.  Report errors (unterminated comments or strings) on    |
-| behalf of CALLER, if non-NULL.                                      |
-|                                                                     |
+| area, and TD could be a TOKEN_COMP instead of the usual	      |
+| TOKEN_TEXT.  If ALLOW_ARGV, OBS must be non-NULL, and an entire     |
+| series of arguments can be returned as TOKEN_ARGV when a $@	      |
+| reference is encountered.  Report errors (unterminated comments or  |
+| strings) on behalf of CALLER, if non-NULL.			      |
+|								      |
 | Next_token () returns the token type, and passes back a pointer to  |
 | the token data through TD.  Non-string token text is collected on   |
 | the obstack token_stack, which never contains more than one token   |
@@ -1344,7 +1419,8 @@ safe_quotes (void)
 `--------------------------------------------------------------------*/
 
 token_type
-next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
+next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
+	    const char *caller)
 {
   int ch;
   int quote_level;
@@ -1366,7 +1442,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
 
   /* Can't consume character until after CHAR_MACRO is handled.  */
   TOKEN_DATA_TYPE (td) = TOKEN_VOID;
-  ch = peek_input ();
+  ch = peek_input (allow_argv && current_quote_age);
   if (ch == CHAR_EOF)
     {
 #ifdef DEBUG_INPUT
@@ -1385,6 +1461,17 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
 #endif /* DEBUG_INPUT */
       return TOKEN_MACDEF;
     }
+  if (ch == CHAR_ARGV)
+    {
+      init_argv_token (obs, td);
+#ifdef DEBUG_INPUT
+      xfprintf (stderr, "next_token -> ARGV (%d args)\n",
+		(arg_argc (td->u.u_c.chain->u.u_a.argv)
+		 - td->u.u_c.chain->u.u_a.index
+		 - (td->u.u_c.chain->u.u_a.skip_last ? 1 : 0)));
+#endif
+      return TOKEN_ARGV;
+    }
 
   next_char (false); /* Consume character we already peeked at.  */
   file = current_file;
@@ -1413,7 +1500,8 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
   else if (default_word_regexp && (isalpha (ch) || ch == '_'))
     {
       obstack_1grow (&token_stack, ch);
-      while ((ch = peek_input ()) < CHAR_EOF && (isalnum (ch) || ch == '_'))
+      while ((ch = peek_input (false)) < CHAR_EOF
+	     && (isalnum (ch) || ch == '_'))
 	{
 	  obstack_1grow (&token_stack, ch);
 	  next_char (false);
@@ -1428,7 +1516,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
       obstack_1grow (&token_stack, ch);
       while (1)
 	{
-	  ch = peek_input ();
+	  ch = peek_input (false);
 	  if (ch >= CHAR_EOF)
 	    break;
 	  obstack_1grow (&token_stack, ch);
@@ -1551,9 +1639,19 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
 		  token_type_string (type));
 	while (chain)
 	  {
-	    assert (chain->type == CHAIN_STR);
-	    xfprintf (stderr, "%s", chain->u.u_s.str);
-	    len += chain->u.u_s.len;
+	    switch (chain->type)
+	      {
+	      case CHAIN_STR:
+		xfprintf (stderr, "%s", chain->u.u_s.str);
+		len += chain->u.u_s.len;
+		break;
+	      case CHAIN_ARGV:
+		xfprintf (stderr, "{$@}");
+		break;
+	      default:
+		assert (!"next_token");
+		abort ();
+	      }
 	    links++;
 	    chain = chain->next;
 	  }
@@ -1573,7 +1671,7 @@ token_type
 peek_token (void)
 {
   token_type result;
-  int ch = peek_input ();
+  int ch = peek_input (false);
 
   if (ch == CHAR_EOF)
     {
@@ -1688,7 +1786,7 @@ lex_debug (void)
   token_type t;
   token_data td;
 
-  while ((t = next_token (&td, NULL, NULL, "<debug>")) != TOKEN_EOF)
+  while ((t = next_token (&td, NULL, NULL, false, "<debug>")) != TOKEN_EOF)
     print_token ("lex", t, &td);
 }
 #endif /* DEBUG_INPUT */
diff --git a/src/m4.c b/src/m4.c
index 2cfed194..a6bc92ad 100644
--- a/src/m4.c
+++ b/src/m4.c
@@ -1,7 +1,7 @@
 /* GNU m4 -- A simple macro processor
 
-   Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007
-   Free Software Foundation, Inc.
+   Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006,
+   2007, 2008 Free Software Foundation, Inc.
 
    This file is part of GNU M4.
 
@@ -98,18 +98,37 @@ m4_verror_at_line (bool warn, int status, int errnum, const char *file,
 		   va_list args)
 {
   char *full = NULL;
+  char *safe_macro = NULL;
+
+  /* Sanitize MACRO, since we are turning around and using it in a
+     format string.  The allocation is overly conservative, but
+     problematic macro names only occur via indir or changeword.  */
+  if (macro && strchr (macro, '%'))
+    {
+      char *p = safe_macro = xcharalloc (2 * strlen (macro) + 1);
+      do
+	{
+	  if (*macro == '%')
+	    *p++ = '%';
+	  *p++ = *macro++;
+	}
+      while (*macro);
+    }
   /* Prepend warning and the macro name, as needed.  But if that fails
      for non-memory reasons (unlikely), then still use the original
      format.  */
   if (warn && macro)
-    full = xasprintf (_("Warning: %s: %s"), macro, format);
+    full = xasprintf (_("Warning: %s: %s"),
+		      quotearg (safe_macro ? safe_macro : macro), format);
   else if (warn)
     full = xasprintf (_("Warning: %s"), format);
   else if (macro)
-    full = xasprintf (_("%s: %s"), macro, format);
+    full = xasprintf (_("%s: %s"),
+		      quotearg (safe_macro ? safe_macro : macro), format);
   verror_at_line (status, errnum, line ? file : NULL, line,
 		  full ? full : format, args);
   free (full);
+  free (safe_macro);
   if ((!warn || fatal_warnings) && !retcode)
     retcode = EXIT_FAILURE;
 }
@@ -435,6 +454,8 @@ main (int argc, char *const *argv, char *const *envp)
 
   include_init ();
   debug_init ();
+  set_quoting_style (NULL, escape_quoting_style);
+  set_char_quoting (NULL, ':', 1);
 #ifdef USE_STACKOVF
   setup_stackovf_trap (argv, envp, stackovf_handler);
 #endif
@@ -687,6 +708,7 @@ main (int argc, char *const *argv, char *const *envp)
     }
   output_exit ();
   free_regex ();
+  quotearg_free ();
 #ifdef DEBUG_REGEX
   if (trace_file)
     fclose (trace_file);
diff --git a/src/m4.h b/src/m4.h
index 93a023f7..e4ff44ac 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -43,6 +43,7 @@
 #include "exitfail.h"
 #include "intprops.h"
 #include "obstack.h"
+#include "quotearg.h"
 #include "stdio--.h"
 #include "stdlib--.h"
 #include "unistd--.h"
@@ -265,7 +266,8 @@ enum token_type
   TOKEN_COMMA,	/* Active character `,', TOKEN_TEXT.  */
   TOKEN_CLOSE,	/* Active character `)', TOKEN_TEXT.  */
   TOKEN_SIMPLE,	/* Any other single character, TOKEN_TEXT.  */
-  TOKEN_MACDEF	/* A macro's definition (see "defn"), TOKEN_FUNC.  */
+  TOKEN_MACDEF,	/* A macro's definition (see "defn"), TOKEN_FUNC.  */
+  TOKEN_ARGV	/* A series of parameters, TOKEN_COMP.  */
 };
 
 /* The data for a token, a macro argument, and a macro definition.  */
@@ -308,6 +310,7 @@ struct token_chain
 	  unsigned int index;		/* Argument index within argv.  */
 	  bool_bitfield flatten : 1;	/* True to treat builtins as text.  */
 	  bool_bitfield comma : 1;	/* True when `,' is next input.  */
+	  bool_bitfield skip_last : 1;	/* True if last argument omitted.  */
 	  const string_pair *quotes;	/* NULL for $*, quotes for $@.  */
 	}
       u_a;
@@ -372,7 +375,8 @@ typedef enum token_data_type token_data_type;
 
 void input_init (void);
 token_type peek_token (void);
-token_type next_token (token_data *, int *, struct obstack *, const char *);
+token_type next_token (token_data *, int *, struct obstack *, bool,
+		       const char *);
 void skip_line (const char *);
 
 /* push back input */
diff --git a/src/macro.c b/src/macro.c
index 0cc42225..8341dd2f 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -216,7 +216,7 @@ expand_input (void)
   TOKEN_DATA_ORIG_TEXT (&empty_token) = "";
 #endif
 
-  while ((t = next_token (&td, &line, NULL, NULL)) != TOKEN_EOF)
+  while ((t = next_token (&td, &line, NULL, false, NULL)) != TOKEN_EOF)
     expand_token (NULL, t, &td, line, true);
 
   for (i = 0; i < stacks_count; i++)
@@ -364,7 +364,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
   /* Skip leading white space.  */
   do
     {
-      t = next_token (&td, NULL, obs, caller);
+      t = next_token (&td, NULL, obs, true, caller);
     }
   while (t == TOKEN_SIMPLE && isspace (to_uchar (*TOKEN_DATA_TEXT (&td))));
 
@@ -455,6 +455,20 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
 	    }
 	  break;
 
+	case TOKEN_ARGV:
+	  assert (paren_level == 0 && TOKEN_DATA_TYPE (argp) == TOKEN_VOID
+		  && obstack_object_size (obs) == 0
+		  && td.u.u_c.chain == td.u.u_c.end
+		  && td.u.u_c.chain->type == CHAIN_ARGV);
+	  TOKEN_DATA_TYPE (argp) = TOKEN_COMP;
+	  argp->u.u_c.chain = argp->u.u_c.end = td.u.u_c.chain;
+	  t = next_token (&td, NULL, NULL, false, caller);
+	  if (argp->u.u_c.chain->u.u_a.skip_last)
+	    assert (t == TOKEN_COMMA);
+	  else
+	    assert (t == TOKEN_COMMA || t == TOKEN_CLOSE);
+	  return t == TOKEN_COMMA;
+
 	default:
 	  assert (!"expand_argument");
 	  abort ();
@@ -462,7 +476,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
 
       if (TOKEN_DATA_TYPE (argp) != TOKEN_VOID || obstack_object_size (obs))
 	first = false;
-      t = next_token (&td, NULL, obs, caller);
+      t = next_token (&td, NULL, obs, first, caller);
     }
 }
 
@@ -496,7 +510,8 @@ collect_arguments (symbol *sym, struct obstack *arguments,
 
   if (peek_token () == TOKEN_OPEN)
     {
-      next_token (&td, NULL, NULL, SYMBOL_NAME (sym)); /* gobble parenthesis */
+      /* gobble parenthesis */
+      next_token (&td, NULL, NULL, false, SYMBOL_NAME (sym));
       do
 	{
 	  tdp = (token_data *) obstack_alloc (arguments, sizeof *tdp);
@@ -519,12 +534,22 @@ collect_arguments (symbol *sym, struct obstack *arguments,
 	      && TOKEN_DATA_QUOTE_AGE (tdp) != args.quote_age)
 	    args.quote_age = 0;
 	  else if (TOKEN_DATA_TYPE (tdp) == TOKEN_COMP)
-	    args.has_ref = true;
+	    {
+	      args.has_ref = true;
+	      if (tdp->u.u_c.chain->type == CHAIN_ARGV)
+		{
+		  args.argc += (tdp->u.u_c.chain->u.u_a.argv->argc
+				- tdp->u.u_c.chain->u.u_a.index
+				- tdp->u.u_c.chain->u.u_a.skip_last - 1);
+		  args.wrapper = true;
+		}
+	    }
 	}
       while (more_args);
     }
   argv = (macro_arguments *) obstack_finish (argv_stack);
   argv->argc = args.argc;
+  argv->wrapper = args.wrapper;
   argv->has_ref = args.has_ref;
   if (args.quote_age != quote_age ())
     argv->quote_age = 0;
@@ -734,9 +759,20 @@ arg_adjust_refcount (macro_arguments *argv, bool increase)
 	  chain = argv->array[i]->u.u_c.chain;
 	  while (chain)
 	    {
-	      assert (chain->type == CHAIN_STR);
-	      if (chain->u.u_s.level >= 0)
-		adjust_refcount (chain->u.u_s.level, increase);
+	      switch (chain->type)
+		{
+		case CHAIN_STR:
+		  if (chain->u.u_s.level >= 0)
+		    adjust_refcount (chain->u.u_s.level, increase);
+		  break;
+		case CHAIN_ARGV:
+		  assert (chain->u.u_a.argv->inuse);
+		  arg_adjust_refcount (chain->u.u_a.argv, increase);
+		  break;
+		default:
+		  assert (!"arg_adjust_refcount");
+		  abort ();
+		}
 	      chain = chain->next;
 	    }
 	}
@@ -766,12 +802,14 @@ arg_token (macro_arguments *argv, unsigned int index, int *level)
   for (i = 0; i < argv->arraylen; i++)
     {
       token = argv->array[i];
-      if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
+      if (TOKEN_DATA_TYPE (token) == TOKEN_COMP
+	  && token->u.u_c.chain->type == CHAIN_ARGV)
 	{
 	  token_chain *chain = token->u.u_c.chain;
 	  // TODO for now we support only a single-length $@ chain...
-	  assert (!chain->next && chain->type == CHAIN_ARGV);
-	  if (index < chain->u.u_a.argv->argc - (chain->u.u_a.index - 1))
+	  assert (!chain->next);
+	  if (index <= (chain->u.u_a.argv->argc - chain->u.u_a.index
+			- chain->u.u_a.skip_last))
 	    {
 	      token = arg_token (chain->u.u_a.argv,
 				 chain->u.u_a.index - 1 + index, level);
@@ -780,7 +818,8 @@ arg_token (macro_arguments *argv, unsigned int index, int *level)
 		token = &empty_token;
 	      break;
 	    }
-	  index -= chain->u.u_a.argv->argc - chain->u.u_a.index;
+	  index -= (chain->u.u_a.argv->argc - chain->u.u_a.index
+		    - chain->u.u_a.skip_last);
 	}
       else if (--index == 0)
 	break;
@@ -793,18 +832,24 @@ arg_token (macro_arguments *argv, unsigned int index, int *level)
 static void
 arg_mark (macro_arguments *argv)
 {
+  unsigned int i;
+  token_chain *chain;
+
   if (argv->inuse)
     return;
   argv->inuse = true;
   if (argv->wrapper)
-    {
-      // TODO for now we support only a single-length $@ chain...
-      assert (argv->arraylen == 1
-	      && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP
-	      && !argv->array[0]->u.u_c.chain->next
-	      && argv->array[0]->u.u_c.chain->type == CHAIN_ARGV);
-      argv->array[0]->u.u_c.chain->u.u_a.argv->inuse = true;
-    }
+    for (i = 0; i < argv->arraylen; i++)
+      if (TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP)
+	{
+	  chain = argv->array[i]->u.u_c.chain;
+	  while (chain)
+	    {
+	      if (chain->type == CHAIN_ARGV && !chain->u.u_a.argv->inuse)
+		arg_mark (chain->u.u_a.argv);
+	      chain = chain->next;
+	    }
+	}
 }
 
 /* Given ARGV, return how many arguments it refers to.  */
@@ -854,14 +899,24 @@ arg_text (macro_arguments *argv, unsigned int index)
     case TOKEN_TEXT:
       return TOKEN_DATA_TEXT (token);
     case TOKEN_COMP:
-      // TODO - concatenate argv refs, or even functions?  For now, we assume
-      // all chain elements are text.
+      // TODO - concatenate functions?
       chain = token->u.u_c.chain;
       obs = arg_scratch ();
       while (chain)
 	{
-	  assert (chain->type == CHAIN_STR);
-	  obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
+	  switch (chain->type)
+	    {
+	    case CHAIN_STR:
+	      obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
+	      break;
+	    case CHAIN_ARGV:
+	      arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
+			 chain->u.u_a.quotes, NULL);
+	      break;
+	    default:
+	      assert (!"arg_text");
+	      abort ();
+	    }
 	  chain = chain->next;
 	}
       obstack_1grow (obs, '\0');
@@ -1122,13 +1177,13 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level,
   token_chain *chain;
 
   assert (obstack_object_size (obs) == 0);
-  if (argv->wrapper)
+  if (argv->wrapper && argv->arraylen == 1)
     {
       // TODO for now we support only a single-length $@ chain...
-      assert (argv->arraylen == 1
-	      && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP);
+      assert (TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP);
       chain = argv->array[0]->u.u_c.chain;
-      assert (!chain->next && chain->type == CHAIN_ARGV);
+      assert (!chain->next && chain->type == CHAIN_ARGV
+	      && !chain->u.u_a.skip_last);
       argv = chain->u.u_a.argv;
       index += chain->u.u_a.index - 1;
     }
@@ -1145,6 +1200,7 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level,
   chain->u.u_a.index = index;
   chain->u.u_a.flatten = flatten;
   chain->u.u_a.comma = false;
+  chain->u.u_a.skip_last = false;
   if (quotes)
     {
       /* Clone the quotes into the obstack, since a subsequent
diff --git a/src/symtab.c b/src/symtab.c
index 277a79f4..dac49d7c 100644
--- a/src/symtab.c
+++ b/src/symtab.c
@@ -350,7 +350,7 @@ symtab_debug (void)
   int delete;
   static int i;
 
-  while (next_token (&td, NULL, NULL, "<debug>") == TOKEN_WORD)
+  while (next_token (&td, NULL, NULL, false, "<debug>") == TOKEN_WORD)
     {
       text = TOKEN_DATA_TEXT (&td);
       if (*text == '_')
author	Eric Blake <ebb9@byu.net>	2007-11-01 09:28:46 -0600
committer	Eric Blake <ebb9@byu.net>	2008-02-14 14:10:50 -0700
commit	1fecefc8b990254aa667a01d12c6c7a2d716df06 (patch)
tree	d49eb7931fb7b2c3f48ff914cbf5a4de4fd16daa
parent	9d08c0c8685fdd749b20062e03c061275dc8afbc (diff)
download	m4-1fecefc8b990254aa667a01d12c6c7a2d716df06.tar.gz