Stage16: cache quotes and improve arg_print

author: Eric Blake <ebb9@byu.net> 2007-11-13 06:55:27 -0700
committer: Eric Blake <ebb9@byu.net> 2008-02-20 19:48:16 -0700
commit: 44740d89961c48b712562dfc650dc0cb57898aa0 (patch)
tree: e4ee391b2a73fffac48ff427df40882bc89a53d3
parent: 1fecefc8b990254aa667a01d12c6c7a2d716df06 (diff)
download: m4-44740d89961c48b712562dfc650dc0cb57898aa0.tar.gz
13 files changed, 302 insertions, 159 deletions
diff --git a/checks/get-them b/checks/get-them
index e034962c..803f413c 100755
--- a/checks/get-them
+++ b/checks/get-them
@@ -1,11 +1,13 @@
 #!/bin/sh
 # -*- AWK -*-
 # Extract all examples from the manual source.
-# Copyright (C) 1992, 2005, 2006, 2007 Free Software Foundation, Inc.
+# Copyright (C) 1992, 2005, 2006, 2007, 2008 Free Software Foundation,
+# Inc.
 
 # This script is for use with GNU awk.
 
 FILE=${1-/dev/null}
+: ${AWK=awk}
 
 $AWK '
 
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index b2599c98..2c6079bb 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -2900,6 +2900,7 @@ An actual implementation of these three macros is distributed as
 @file{m4-@value{VERSION}/@/examples/@/quote.m4} in this package.  First,
 let's examine their usage:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`quote.m4')
@@ -2932,6 +2933,7 @@ other hand, results in a string no matter what, since it is still
 possible to tell whether it was invoked without arguments based on the
 resulting string.
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 undivert(`quote.m4')dnl
@@ -2993,6 +2995,7 @@ invocation is restored.
 
 It can, for example, be used for simple counting:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`forloop.m4')
@@ -3003,6 +3006,7 @@ forloop(`i', `1', `8', `i ')
 
 For-loops can be nested, like:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`forloop.m4')
@@ -3030,6 +3034,7 @@ not finished, it increments the iterator (using the predefined macro
 Here is an actual implementation of @code{forloop}, distributed as
 @file{m4-@value{VERSION}/@/examples/@/forloop.m4} in this package:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 undivert(`forloop.m4')dnl
@@ -3076,6 +3081,7 @@ using an implementation of @code{foreach} distributed as
 @file{m4-@value{VERSION}/@/examples/@/foreach.m4}, and @code{foreachq}
 in @file{m4-@value{VERSION}/@/examples/@/foreachq.m4}.
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`foreach.m4')
@@ -3098,6 +3104,7 @@ It is possible to be more complex; each element of the @var{paren-list}
 or @var{quote-list} can itself be a list, to pass as further arguments
 to a helper macro.  This example generates a shell case statement:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`foreach.m4')
@@ -3127,6 +3134,7 @@ needed to grab the first element of a list.  Second,
 through the original list.  Here is a simple implementation of
 @code{foreach}:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 undivert(`foreach.m4')dnl
@@ -3153,6 +3161,7 @@ expecting the macro name on output after one layer of quotes is removed
 during list iteration and the final layer removed during the final
 rescan:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 define(`a', `1')define(`b', `2')define(`c', `3')
@@ -3177,6 +3186,7 @@ foreachq(`x', ```a'', ``(b'', ``c)''', `x
 
 Obviously, @code{foreachq} did a better job; here is its implementation:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 undivert(`foreachq.m4')dnl
@@ -3422,7 +3432,8 @@ following:
 In trace output, show the actual arguments that were collected before
 invoking the macro.  This applies to all macro calls if the @samp{t}
 flag is used, otherwise only the macros covered by calls of
-@code{traceon}.
+@code{traceon}.  Arguments are subject to length truncation specified by
+the command line option @option{--arglength} (or @option{-l}).
 
 @item c
 In trace output, show several trace lines for each macro call.  A line
@@ -3433,7 +3444,9 @@ after the call has completed.
 @item e
 In trace output, show the expansion of each macro call, if it is not
 void.  This applies to all macro calls if the @samp{t} flag is used,
-otherwise only the macros covered by calls of @code{traceon}.
+otherwise only the macros covered by calls of @code{traceon}.  The
+expansion is subject to length truncation specified by the command line
+option @option{--arglength} (or @option{-l}).
 
 @item f
 In debug and trace output, include the name of the current input file in
@@ -3513,6 +3526,25 @@ foo
 @result{}FOO
 @end example
 
+The following example demonstrates the behavior of length truncation,
+when specified on the command line.  Note that each argument and the
+final result are individually truncated.  Also, the special tokens for
+builtin functions are not truncated.
+
+@comment options: -l6
+@example
+$ @kbd{m4 -d -l 6}
+define(`echo', `$@@')debugmode(`+t')
+@result{}
+echo(`1', `long string')
+@error{}m4trace: -1- echo(`1', `long s...') -> ``1',`l...'
+@result{}1,long string
+indir(`echo', defn(`changequote'))
+@error{}m4trace: -2- defn(`change...')
+@error{}m4trace: -1- indir(`echo', <changequote>) -> ``''
+@result{}
+@end example
+
 @node Debug Output
 @section Saving debugging output
 
@@ -4417,6 +4449,7 @@ Normally file inclusion is used to insert the contents of a file
 into the input stream.  The contents of the file will be read by
 @code{m4} and macro calls in the file will be expanded:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 define(`foo', `FOO')
@@ -4433,6 +4466,7 @@ of the file can be used to define macros that operate on entire files.
 Here is an example, which defines @samp{bar} to expand to the contents
 of @file{incl.m4}:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 define(`bar', include(`incl.m4'))
@@ -5217,6 +5251,7 @@ word to upper case and the remaining characters to lower case.
 First, an example of their usage, using implementations distributed in
 @file{m4-@value{VERSION}/@/examples/@/capitalize.m4}.
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`capitalize.m4')
@@ -5236,6 +5271,7 @@ merely parses out the words, and replaces them with an invocation of
 some subtle flaws.  You should try to see if you can find and correct
 them; or @pxref{Improved capitalize, , Answers}).
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 undivert(`capitalize.m4')dnl
@@ -5327,6 +5363,7 @@ ifelse(format(`%.1A', `1.999'), `0X1.0P+1', `success',
 Using the @code{forloop} macro defined earlier (@pxref{Forloop}), this
 example shows how @code{format} can be used to produce tabular output.
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`forloop.m4')
@@ -6186,6 +6223,7 @@ message output.
 This example reuses the file @file{incl.m4} mentioned earlier
 (@pxref{Include}):
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 define(`foo', ``$0' called at __file__:__line__')
@@ -6944,6 +6982,7 @@ shipped as @file{m4-@value{VERSION}/@/examples/@/forloop2.m4}; this
 version also optimizes based on the fact that the starting bound does
 not need to be passed to the helper @code{@w{_forloop}}.
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 undivert(`forloop2.m4')dnl
@@ -6953,7 +6992,7 @@ undivert(`forloop2.m4')dnl
 @result{}#   performs sanity check that FROM is larger than TO
 @result{}#   allows complex numerical expressions in TO and FROM
 @result{}define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
-@result{}  `pushdef(`$1', eval(`$2'))_forloop(`$1',
+@result{}  `pushdef(`$1', eval(`$2'))_$0(`$1',
 @result{}    eval(`$3'), `$4')popdef(`$1')')')
 @result{}define(`_forloop',
 @result{}  `$3`'ifelse(indir(`$1'), `$2', `',
@@ -6972,6 +7011,48 @@ forloop(`i', `a', `b', `non-numeric bounds')
 @result{}
 @end example
 
+One other change to notice is that the improved version used @samp{_$0}
+rather than @samp{_foreach} to invoke the helper routine.  In general,
+this is a good practice to follow, because then the set of macros can be
+uniformly transformed.  The following example shows a transformation
+that doubles the current quoting and appends a suffix @samp{2} to each
+transformed macro.  If @code{foreach} refers to the literal
+@samp{_foreach}, then @code{foreach2} invokes @code{_foreach} instead of
+the intended @code{_foreach2}, and the mixing of quoting paradigms leads
+to an infinite recursion loop in this example.
+
+@comment options: -L9
+@comment status: 1
+@comment examples
+@example
+$ @kbd{m4 -d -L 9 -I examples}
+define(`arg1', `$1')include(`forloop2.m4')include(`quote.m4')
+@result{}
+define(`double', `define(`$1'`2',
+  arg1(patsubst(dquote(defn(`$1')), `[`']', `\&\&')))')
+@result{}
+double(`forloop')double(`_forloop')defn(`forloop2')
+@result{}ifelse(eval(``($3) >= ($2)''), ``1'',
+@result{}  ``pushdef(``$1'', eval(``$2''))_$0(``$1'',
+@result{}    eval(``$3''), ``$4'')popdef(``$1'')'')
+forloop(i, 1, 5, `ifelse(')forloop(i, 1, 5, `)')
+@result{}
+changequote(`[', `]')changequote([``], [''])
+@result{}
+forloop2(i, 1, 5, ``ifelse('')forloop2(i, 1, 5, ``)'')
+@result{}
+changequote`'include(`forloop.m4')
+@result{}
+double(`forloop')double(`_forloop')defn(`forloop2')
+@result{}pushdef(``$1'', ``$2'')_forloop($@@)popdef(``$1'')
+forloop(i, 1, 5, `ifelse(')forloop(i, 1, 5, `)')
+@result{}
+changequote(`[', `]')changequote([``], [''])
+@result{}
+forloop2(i, 1, 5, ``ifelse('')forloop2(i, 1, 5, ``)'')
+@error{}m4:stdin:12: recursion limit of 9 exceeded, use -L<N> to change it
+@end example
+
 Of course, it is possible to make even more improvements, such as
 adding an optional step argument, or allowing iteration through
 descending sequences.  @acronym{GNU} Autoconf provides some of these
@@ -6984,6 +7065,7 @@ The @code{foreach} and @code{foreachq} macros (@pxref{Foreach}) as
 presented earlier each have flaws.  First, we will examine and fix the
 quadratic behavior of @code{foreachq}:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`foreachq.m4')
@@ -7025,6 +7107,7 @@ fewer macros, is less likely to run into machine limits, and most
 importantly, performs faster.  The fixed version of @code{foreachq} can
 be found in @file{m4-@value{VERSION}/@/examples/@/foreachq2.m4}:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`foreachq2.m4')
@@ -7034,7 +7117,7 @@ undivert(`foreachq2.m4')dnl
 @result{}divert(`-1')
 @result{}# foreachq(x, `item_1, item_2, ..., item_n', stmt)
 @result{}#   quoted list, improved version
-@result{}define(`foreachq', `pushdef(`$1')_foreachq($@@)popdef(`$1')')
+@result{}define(`foreachq', `pushdef(`$1')_$0($@@)popdef(`$1')')
 @result{}define(`_arg1q', ``$1'')
 @result{}define(`_rest', `ifelse(`$#', `1', `', `dquote(shift($@@))')')
 @result{}define(`_foreachq', `ifelse(`$2', `', `',
@@ -7073,6 +7156,7 @@ instead of an arbitrary length list as the key to end recursion.  This
 alternative approach is available as
 @file{m4-@value{VERSION}/@/examples/@/foreach3.m4}:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`foreachq3.m4')
@@ -7120,6 +7204,7 @@ overquotes the arguments to @code{@w{_foreach}} to begin with, using
 @code{@w{_arg1}} to remove the extra layer of quoting that was added up
 front:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`foreach2.m4')
@@ -7129,7 +7214,7 @@ undivert(`foreach2.m4')dnl
 @result{}divert(`-1')
 @result{}# foreach(x, (item_1, item_2, ..., item_n), stmt)
 @result{}#   parenthesized list, improved version
-@result{}define(`foreach', `pushdef(`$1')_foreach(`$1',
+@result{}define(`foreach', `pushdef(`$1')_$0(`$1',
 @result{}  (dquote(dquote_elt$2)), `$3')popdef(`$1')')
 @result{}define(`_arg1', `$1')
 @result{}define(`_foreach', `ifelse(`$2', `(`')', `',
@@ -7167,6 +7252,7 @@ deciding which list style to use, one must take into account whether
 repeating the side effects of unquoted list elements will have any
 detrimental effects.
 
+@comment examples
 @example
 $ @kbd{m4 -d -I examples}
 include(`foreach2.m4')
@@ -7279,6 +7365,7 @@ difference between calling @code{capitalize} with the expansion of a
 macro, expanding the result of a case change, and changing the case of a
 double-quoted string:
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`capitalize.m4')dnl
@@ -7355,6 +7442,7 @@ must be redefined as @code{_upcase_alt} and @code{_downcase_alt}, since
 they contain nested quotes but are invoked with the alternate quoting
 scheme in effect.
 
+@comment examples
 @example
 $ @kbd{m4 -I examples}
 include(`capitalize2.m4')dnl
diff --git a/examples/foreach2.m4 b/examples/foreach2.m4
index 4acf0c26..74d00fb6 100644
--- a/examples/foreach2.m4
+++ b/examples/foreach2.m4
@@ -2,7 +2,7 @@ include(`quote.m4')dnl
 divert(`-1')
 # foreach(x, (item_1, item_2, ..., item_n), stmt)
 #   parenthesized list, improved version
-define(`foreach', `pushdef(`$1')_foreach(`$1',
+define(`foreach', `pushdef(`$1')_$0(`$1',
   (dquote(dquote_elt$2)), `$3')popdef(`$1')')
 define(`_arg1', `$1')
 define(`_foreach', `ifelse(`$2', `(`')', `',
diff --git a/examples/foreachq2.m4 b/examples/foreachq2.m4
index 345ddfed..f57d3edf 100644
--- a/examples/foreachq2.m4
+++ b/examples/foreachq2.m4
@@ -2,7 +2,7 @@ include(`quote.m4')dnl
 divert(`-1')
 # foreachq(x, `item_1, item_2, ..., item_n', stmt)
 #   quoted list, improved version
-define(`foreachq', `pushdef(`$1')_foreachq($@)popdef(`$1')')
+define(`foreachq', `pushdef(`$1')_$0($@)popdef(`$1')')
 define(`_arg1q', ``$1'')
 define(`_rest', `ifelse(`$#', `1', `', `dquote(shift($@))')')
 define(`_foreachq', `ifelse(`$2', `', `',
diff --git a/examples/forloop2.m4 b/examples/forloop2.m4
index f1bdf0ef..41e0e165 100644
--- a/examples/forloop2.m4
+++ b/examples/forloop2.m4
@@ -4,7 +4,7 @@ divert(`-1')
 #   performs sanity check that FROM is larger than TO
 #   allows complex numerical expressions in TO and FROM
 define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
-  `pushdef(`$1', eval(`$2'))_forloop(`$1',
+  `pushdef(`$1', eval(`$2'))_$0(`$1',
     eval(`$3'), `$4')popdef(`$1')')')
 define(`_forloop',
   `$3`'ifelse(indir(`$1'), `$2', `',
diff --git a/examples/null.m4 b/examples/null.m4
index 2632522c..79f4715f 100644
--- a/examples/null.m4
+++ b/examples/null.m4
diff --git a/examples/null.out b/examples/null.out
index c42e03c1..aca4b785 100644
--- a/examples/null.out
+++ b/examples/null.out
diff --git a/src/builtin.c b/src/builtin.c
index c89ad44e..beb8e350 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -198,6 +198,28 @@ find_builtin_by_name (const char *name)
       return bp;
   return bp + 1;
 }
+
+/*------------------------------------------------------------------.
+| Print a representation of FUNC to OBS.  If FLATTEN, output QUOTES |
+| around an empty string instead.                                   |
+`------------------------------------------------------------------*/
+void
+func_print (struct obstack *obs, const builtin *func, bool flatten,
+	    const string_pair *quotes)
+{
+  assert (func);
+  if (flatten && quotes)
+    {
+      obstack_grow (obs, quotes->str1, quotes->len1);
+      obstack_grow (obs, quotes->str2, quotes->len2);
+    }
+  else if (!flatten)
+    {
+      obstack_1grow (obs, '<');
+      obstack_grow (obs, func->name, strlen (func->name));
+      obstack_1grow (obs, '>');
+    }
+}
 
 /*-------------------------------------------------------------------------.
 | Install a builtin macro with name NAME, bound to the C function given in |
@@ -396,14 +418,15 @@ free_regex (void)
       }
 }
 
-/*-------------------------------------------------------------------------.
-| Define a predefined or user-defined macro, with name NAME, and expansion |
-| TEXT.  MODE destinguishes between the "define" and the "pushdef" case.   |
-| It is also used from main ().						   |
-`-------------------------------------------------------------------------*/
+/*-----------------------------------------------------------------.
+| Define a predefined or user-defined macro, with name NAME of     |
+| length NAME_LEN, and expansion TEXT.  MODE is SYMBOL_INSERT for  |
+| "define" or SYMBOL_PUSHDEF for "pushdef".  This function is also |
+| used from main ().                                               |
+`-----------------------------------------------------------------*/
 
 void
-define_user_macro (const char *name, size_t len, const char *text,
+define_user_macro (const char *name, size_t name_len, const char *text,
 		   symbol_lookup mode)
 {
   symbol *s;
@@ -420,24 +443,23 @@ define_user_macro (const char *name, size_t len, const char *text,
   if (macro_sequence_inuse && text)
     {
       regoff_t offset = 0;
-      len = strlen (defn);
+      struct re_registers *regs = &macro_sequence_regs;
+      size_t len = strlen (defn);
 
       while (offset < len
 	     && (offset = re_search (&macro_sequence_buf, defn, len, offset,
-				     len - offset, &macro_sequence_regs)) >= 0)
+				     len - offset, regs)) >= 0)
 	{
 	  /* Skip empty matches.  */
-	  if (macro_sequence_regs.start[0] == macro_sequence_regs.end[0])
+	  if (regs->start[0] == regs->end[0])
 	    offset++;
 	  else
 	    {
-	      char tmp;
-	      offset = macro_sequence_regs.end[0];
-	      tmp = defn[offset];
-	      defn[offset] = '\0';
-	      m4_warn (0, NULL, _("definition of `%s' contains sequence `%s'"),
-		       name, defn + macro_sequence_regs.start[0]);
-	      defn[offset] = tmp;
+	      offset = regs->end[0];
+	      m4_warn (0, NULL,
+		       _("definition of `%s' contains sequence `%.*s'"),
+		       name, regs->end[0] - regs->start[0],
+		       defn + regs->start[0]);
 	    }
 	}
       if (offset == -2)
@@ -597,34 +619,6 @@ shipout_int (struct obstack *obs, int val)
   obstack_grow (obs, s, strlen (s));
 }
 
-/*------------------------------------------------------------------.
-| Print arguments from the table ARGV to obstack OBS, starting with |
-| START, separated by SEP, and quoted by the current quotes if	    |
-| QUOTED is true.						    |
-`------------------------------------------------------------------*/
-
-static void
-dump_args (struct obstack *obs, int start, macro_arguments *argv,
-	   const char *sep, bool quoted)
-{
-  unsigned int i;
-  bool dump_sep = false;
-  size_t len = strlen (sep);
-  unsigned int argc = arg_argc (argv);
-
-  for (i = start; i < argc; i++)
-    {
-      if (dump_sep)
-	obstack_grow (obs, sep, len);
-      else
-	dump_sep = true;
-      if (quoted)
-	obstack_grow (obs, curr_quote.str1, curr_quote.len1);
-      obstack_grow (obs, ARG (i), ARG_LEN (i));
-      if (quoted)
-	obstack_grow (obs, curr_quote.str2, curr_quote.len2);
-    }
-}
 
 /* The rest of this file is code for builtins and expansion of user
    defined macros.  All the functions for builtins have a prototype as:
@@ -1516,7 +1510,7 @@ m4_errprint (struct obstack *obs, int argc, macro_arguments *argv)
 
   if (bad_argc (ARG (0), argc, 1, -1))
     return;
-  dump_args (obs, 1, argv, " ", false);
+  arg_print (obs, argv, 1, NULL, true, " ", NULL, false);
   debug_flush_files ();
   len = obstack_object_size (obs);
   /* The close_stdin module makes it safe to skip checking the return
@@ -1597,12 +1591,13 @@ m4_m4wrap (struct obstack *obs, int argc, macro_arguments *argv)
 {
   if (bad_argc (ARG (0), argc, 1, -1))
     return;
+  obs = push_wrapup_init ();
   if (no_gnu_extensions)
     obstack_grow (obs, ARG (1), ARG_LEN (1));
   else
-    dump_args (obs, 1, argv, " ", false);
-  obstack_1grow (obs, '\0');
-  push_wrapup ((char *) obstack_finish (obs));
+    // TODO - allow builtins, rather than always flattening
+    arg_print (obs, argv, 1, NULL, true, " ", NULL, false);
+  push_wrapup_finish ();
 }
 
 /* Enable tracing of all specified macros, or all, if none is specified.
diff --git a/src/debug.c b/src/debug.c
index d6b2ddc9..737ee524 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -359,44 +359,16 @@ trace_prepre (const char *name, int id)
 void
 trace_pre (const char *name, int id, macro_arguments *argv)
 {
-  int i;
-  const builtin *bp;
-  int argc = arg_argc (argv);
-
   trace_header (id);
   trace_format ("%s", name);
 
-  if (argc > 1 && (debug_level & DEBUG_TRACE_ARGS))
+  if (arg_argc (argv) > 1 && (debug_level & DEBUG_TRACE_ARGS))
     {
+      int len = max_debug_argument_length;
       trace_format ("(");
-
-      for (i = 1; i < argc; i++)
-	{
-	  if (i != 1)
-	    trace_format (", ");
-
-	  switch (arg_type (argv, i))
-	    {
-	    case TOKEN_TEXT:
-	      trace_format ("%l%S%r", ARG (i));
-	      break;
-
-	    case TOKEN_FUNC:
-	      bp = find_builtin_by_addr (arg_func (argv, i));
-	      if (bp == NULL)
-		{
-		  assert (!"trace_pre");
-		  abort ();
-		}
-	      trace_format ("<%s>", bp->name);
-	      break;
-
-	    default:
-	      assert (!"trace_pre");
-	      abort ();
-	    }
-
-	}
+      arg_print (&trace, argv, 1,
+		 (debug_level & DEBUG_TRACE_QUOTE) ? &curr_quote : NULL,
+		 false, ", ", &len, true);
       trace_format (")");
     }
 
diff --git a/src/input.c b/src/input.c
index 5bbaf088..063186a4 100644
--- a/src/input.c
+++ b/src/input.c
@@ -42,14 +42,14 @@
    loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
 
    Pushing new input on the input stack is done by push_file (),
-   push_string (), push_wrapup () (for wrapup text), and push_macro ()
-   (for macro definitions).  Because macro expansion needs direct
-   access to the current input obstack (for optimization), push_string
-   () is split in two functions, push_string_init (), which returns a
-   pointer to the current input stack, and push_string_finish (),
-   which returns a pointer to the final text.  The input_block *next
-   is used to manage the coordination between the different push
-   routines.
+   push_string (), push_wrapup_init/push_wrapup_finish () (for wrapup
+   text), and push_macro () (for macro definitions).  Because macro
+   expansion needs direct access to the current input obstack (for
+   optimization), push_string () is split in two functions,
+   push_string_init (), which returns a pointer to the current input
+   stack, and push_string_finish (), which returns a pointer to the
+   final text.  The input_block *next is used to manage the
+   coordination between the different push routines.
 
    The current file and line number are stored in two global
    variables, for use by the error handling functions in m4.c.  Macro
@@ -188,6 +188,9 @@ static struct re_registers regs;
    context.  */
 static unsigned int current_quote_age;
 
+/* Cache a quote pair.  See quote_cache.  */
+static string_pair *cached_quote;
+
 static bool pop_input (bool);
 static void set_quote_age (void);
 
@@ -503,17 +506,14 @@ push_string_finish (void)
   return ret;
 }
 
-/*------------------------------------------------------------------.
-| The function push_wrapup () pushes a string on the wrapup stack.  |
-| When the normal input stack gets empty, the wrapup stack will     |
-| become the input stack, and push_string () and push_file () will  |
-| operate on wrapup_stack.  Push_wrapup should be done as           |
-| push_string (), but this will suffice, as long as arguments to    |
-| m4_m4wrap () are moderate in size.                                |
-`------------------------------------------------------------------*/
+/*--------------------------------------------------------------.
+| The function push_wrapup_init () returns an obstack ready for |
+| direct expansion of wrapup text, and should be followed by    |
+| push_wrapup_finish ().                                        |
+`--------------------------------------------------------------*/
 
-void
-push_wrapup (const char *s)
+struct obstack *
+push_wrapup_init (void)
 {
   input_block *i;
   i = (input_block *) obstack_alloc (wrapup_stack, sizeof *i);
@@ -521,9 +521,28 @@ push_wrapup (const char *s)
   i->type = INPUT_STRING;
   i->file = current_file;
   i->line = current_line;
-  i->u.u_s.len = strlen (s);
-  i->u.u_s.str = (char *) obstack_copy (wrapup_stack, s, i->u.u_s.len);
   wsp = i;
+  return wrapup_stack;
+}
+
+/*---------------------------------------------------------------.
+| After pushing wrapup text, push_wrapup_finish () completes the |
+| bookkeeping.                                                   |
+`---------------------------------------------------------------*/
+void
+push_wrapup_finish (void)
+{
+  input_block *i = wsp;
+  if (obstack_object_size (wrapup_stack) == 0)
+    {
+      wsp = i->prev;
+      obstack_free (wrapup_stack, i);
+    }
+  else
+    {
+      i->u.u_s.len = obstack_object_size (wrapup_stack);
+      i->u.u_s.str = (char *) obstack_finish (wrapup_stack);
+    }
 }
 
 
@@ -610,6 +629,7 @@ pop_input (bool cleanup)
       abort ();
     }
   obstack_free (current_input, isp);
+  cached_quote = NULL;
   next = NULL;			/* might be set in push_string_init () */
 
   isp = tmp;
@@ -674,13 +694,7 @@ input_print (struct obstack *obs, const input_block *input)
       obstack_1grow (obs, '>');
       break;
     case INPUT_MACRO:
-      {
-	const builtin *bp = find_builtin_by_addr (input->u.func);
-	assert (bp);
-	obstack_1grow (obs, '<');
-	obstack_grow (obs, bp->name, strlen (bp->name));
-	obstack_1grow (obs, '>');
-      }
+      func_print (obs, find_builtin_by_addr (input->u.func), false, NULL);
       break;
     case INPUT_CHAIN:
       chain = input->u.u_c.chain;
@@ -696,7 +710,9 @@ input_print (struct obstack *obs, const input_block *input)
 	    case CHAIN_ARGV:
 	      assert (!chain->u.u_a.comma);
 	      if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
-			     chain->u.u_a.quotes, &maxlen))
+			     quote_cache (NULL, chain->quote_age,
+					  chain->u.u_a.quotes),
+			     chain->u.u_a.flatten, NULL, &maxlen, false))
 		return;
 	      break;
 	    default:
@@ -783,7 +799,9 @@ peek_input (bool allow_argv)
 		     argument from argv.  */
 		  push_string_init ();
 		  push_arg_quote (current_input, chain->u.u_a.argv,
-				  chain->u.u_a.index, chain->u.u_a.quotes);
+				  chain->u.u_a.index,
+				  quote_cache (NULL, chain->quote_age,
+					       chain->u.u_a.quotes));
 		  chain->u.u_a.index++;
 		  chain->u.u_a.comma = true;
 		  push_string_finish ();
@@ -911,7 +929,9 @@ next_char_1 (bool allow_quote)
 		     argument from argv.  */
 		  push_string_init ();
 		  push_arg_quote (current_input, chain->u.u_a.argv,
-				  chain->u.u_a.index, chain->u.u_a.quotes);
+				  chain->u.u_a.index,
+				  quote_cache (NULL, chain->quote_age,
+					       chain->u.u_a.quotes));
 		  chain->u.u_a.index++;
 		  chain->u.u_a.comma = true;
 		  push_string_finish ();
@@ -1008,7 +1028,9 @@ append_quote_token (struct obstack *obs, token_data *td)
   if (src_chain->type == CHAIN_ARGV)
     {
       arg_print (obs, src_chain->u.u_a.argv, src_chain->u.u_a.index,
-		 src_chain->u.u_a.quotes, NULL);
+		 quote_cache (NULL, src_chain->quote_age,
+			      src_chain->u.u_a.quotes),
+		 src_chain->u.u_a.flatten, NULL, NULL, false);
       arg_adjust_refcount (src_chain->u.u_a.argv, false);
       return;
     }
@@ -1369,6 +1391,7 @@ set_quote_age (void)
 			 | (*curr_quote.str2 & 0xff));
   else
     current_quote_age = 0;
+  cached_quote = NULL;
 }
 
 /* Return the current quote age.  Each non-trivial changequote alters
@@ -1394,6 +1417,53 @@ safe_quotes (void)
 {
   return current_quote_age != 0;
 }
+
+/* Interface for caching frequently used quote pairs, using AGE for
+   optimization.  If QUOTES is NULL, don't use quoting.  If OBS is
+   non-NULL, AGE should be the current quote age, and QUOTES should be
+   &curr_quote; the return value will be a cached quote pair, where
+   the pointer is valid at least as long as OBS is not reset, but
+   whose contents are only guaranteed until the next changequote or
+   quote_cache.  Otherwise, OBS is NULL, AGE should be the same as
+   before, and QUOTES should be a previously returned cache value;
+   used to refresh the contents of the result.  */
+const string_pair *
+quote_cache (struct obstack *obs, unsigned int age, const string_pair *quotes)
+{
+  static char lquote[2];
+  static char rquote[2];
+  static string_pair simple = {lquote, 1, rquote, 1};
+
+  /* Implementation - if AGE is non-zero, then the implementation of
+     set_quote_age guarantees that we can recreate the return value on
+     the fly; so we use static storage, and the contents must be used
+     immediately.  If AGE is zero, then we must copy QUOTES onto OBS
+     (since changequote will invalidate the original), but we might as
+     well cache that copy (in case the current expansion contains more
+     than one instance of $@).  */
+  if (!quotes)
+    return NULL;
+  if (age)
+    {
+      *lquote = (age >> 8) & 0xff;
+      *rquote = age & 0xff;
+      return &simple;
+    }
+  if (!obs)
+    return quotes;
+  assert (next && quotes == &curr_quote);
+  if (!cached_quote)
+    {
+      assert (obs == current_input && obstack_object_size (obs) == 0);
+      cached_quote = (string_pair *) obstack_copy (obs, quotes,
+						   sizeof *quotes);
+      cached_quote->str1 = (char *) obstack_copy0 (obs, quotes->str1,
+						   quotes->len1);
+      cached_quote->str2 = (char *) obstack_copy0 (obs, quotes->str2,
+						   quotes->len2);
+    }
+  return cached_quote;
+}
 
 
 /*--------------------------------------------------------------------.
diff --git a/src/m4.c b/src/m4.c
index a6bc92ad..af4991f9 100644
--- a/src/m4.c
+++ b/src/m4.c
@@ -113,6 +113,7 @@ m4_verror_at_line (bool warn, int status, int errnum, const char *file,
 	  *p++ = *macro++;
 	}
       while (*macro);
+      *p = '\0';
     }
   /* Prepend warning and the macro name, as needed.  But if that fails
      for non-memory reasons (unlikely), then still use the original
diff --git a/src/m4.h b/src/m4.h
index e4ff44ac..7a1364b8 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -386,7 +386,8 @@ void push_macro (builtin_func *);
 struct obstack *push_string_init (void);
 bool push_token (token_data *, int, bool);
 const input_block *push_string_finish (void);
-void push_wrapup (const char *);
+struct obstack *push_wrapup_init (void);
+void push_wrapup_finish (void);
 bool pop_wrapup (void);
 void input_print (struct obstack *, const input_block *);
 
@@ -410,6 +411,8 @@ void set_word_regexp (const char *, const char *);
 #endif
 unsigned int quote_age (void);
 bool safe_quotes (void);
+const string_pair *quote_cache (struct obstack *, unsigned int,
+				const string_pair *);
 
 /* File: output.c --- output functions.  */
 extern int current_diversion;
@@ -494,7 +497,7 @@ size_t arg_len (macro_arguments *, unsigned int);
 builtin_func *arg_func (macro_arguments *, unsigned int);
 struct obstack *arg_scratch (void);
 bool arg_print (struct obstack *, macro_arguments *, unsigned int,
-		const string_pair *, int *);
+		const string_pair *, bool, const char *, int *, bool);
 macro_arguments *make_argv_ref (macro_arguments *, const char *, size_t,
 				bool, bool);
 void push_arg (struct obstack *, macro_arguments *, unsigned int);
@@ -553,6 +556,7 @@ const char *ntoa (int32_t, int);
 
 const builtin *find_builtin_by_addr (builtin_func *);
 const builtin *find_builtin_by_name (const char *);
+void func_print (struct obstack *, const builtin *, bool, const string_pair *);
 
 /* File: path.c  --- path search for include files.  */
 
diff --git a/src/macro.c b/src/macro.c
index 8341dd2f..7f817b39 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -911,7 +911,9 @@ arg_text (macro_arguments *argv, unsigned int index)
 	      break;
 	    case CHAIN_ARGV:
 	      arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
-			 chain->u.u_a.quotes, NULL);
+			 quote_cache (NULL, chain->quote_age,
+				      chain->u.u_a.quotes),
+			 chain->u.u_a.flatten, NULL, NULL, false);
 	      break;
 	    default:
 	      assert (!"arg_text");
@@ -1097,50 +1099,70 @@ arg_scratch (void)
 
 /* Dump a representation of ARGV to the obstack OBS, starting with
    argument INDEX.  If QUOTES is non-NULL, each argument is displayed
-   with those quotes.  If MAX_LEN is non-NULL, truncate the output
-   after *MAX_LEN bytes are output and return true; otherwise, return
-   false, and reduce *MAX_LEN by the number of bytes output.  */
+   with those quotes.  If FLATTEN, builtins are ignored.  Separate
+   arguments with SEP, which defaults to a comma.  If MAX_LEN is
+   non-NULL, truncate the output after *MAX_LEN bytes are output and
+   return true; otherwise, return false, and reduce *MAX_LEN by the
+   number of bytes output.  If QUOTE_EACH, the truncation length is
+   reset for each argument, quotes do not count against length, and
+   all arguments are printed; otherwise, quotes count against the
+   length and trailing arguments may be discarded.  */
 bool
 arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index,
-	   const string_pair *quotes, int *max_len)
+	   const string_pair *quotes, bool flatten, const char *sep,
+	   int *max_len, bool quote_each)
 {
   int len = max_len ? *max_len : INT_MAX;
   unsigned int i;
   token_data *token;
   token_chain *chain;
-  bool comma = false;
-
+  bool use_sep = false;
+  bool done;
+  size_t sep_len;
+  size_t *plen = quote_each ? NULL : &len;
+
+  if (!sep)
+    sep = ",";
+  sep_len = strlen (sep);
   for (i = index; i < argv->argc; i++)
     {
-      if (comma && obstack_print (obs, ",", 1, &len))
+      if (quote_each && max_len)
+	len = *max_len;
+      if (use_sep && obstack_print (obs, sep, sep_len, plen))
 	return true;
-      else
-	comma = true;
+      use_sep = true;
       token = arg_token (argv, i, NULL);
-      if (quotes && obstack_print (obs, quotes->str1, quotes->len1, &len))
-	return true;
       switch (TOKEN_DATA_TYPE (token))
 	{
 	case TOKEN_TEXT:
+	  if (quotes && obstack_print (obs, quotes->str1, quotes->len1, plen))
+	    return true;
 	  if (obstack_print (obs, TOKEN_DATA_TEXT (token),
-			     TOKEN_DATA_LEN (token), &len))
+			     TOKEN_DATA_LEN (token), &len) && !quote_each)
+	    return true;
+	  if (quotes && obstack_print (obs, quotes->str2, quotes->len2, plen))
 	    return true;
 	  break;
 	case TOKEN_COMP:
+	  if (quotes && obstack_print (obs, quotes->str1, quotes->len1, plen))
+	    return true;
 	  chain = token->u.u_c.chain;
-	  while (chain)
+	  done = false;
+	  while (chain && !done)
 	    {
 	      switch (chain->type)
 		{
 		case CHAIN_STR:
 		  if (obstack_print (obs, chain->u.u_s.str, chain->u.u_s.len,
 				     &len))
-		    return true;
+		    done = true;
 		  break;
 		case CHAIN_ARGV:
 		  if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
-				 chain->u.u_a.quotes, &len))
-		    return true;
+				 quote_cache (NULL, chain->quote_age,
+					      chain->u.u_a.quotes),
+				 flatten, NULL, &len, false))
+		    done = true;
 		  break;
 		default:
 		  assert (!"arg_print");
@@ -1148,16 +1170,19 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index,
 		}
 	      chain = chain->next;
 	    }
+	  if (done && !quote_each)
+	    return true;
+	  if (quotes && obstack_print (obs, quotes->str2, quotes->len2, plen))
+	    return true;
 	  break;
 	case TOKEN_FUNC:
-	  // TODO - support func?
+	  func_print (obs, find_builtin_by_addr (TOKEN_DATA_FUNC (token)),
+		      flatten, quotes);
+	  break;
 	default:
 	  assert (!"arg_print");
 	  abort ();
 	}
-      if (quotes && obstack_print (obs, quotes->str2, quotes->len2,
-				   &len))
-	return true;
     }
   if (max_len)
     *max_len = len;
@@ -1201,21 +1226,7 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level,
   chain->u.u_a.flatten = flatten;
   chain->u.u_a.comma = false;
   chain->u.u_a.skip_last = false;
-  if (quotes)
-    {
-      /* Clone the quotes into the obstack, since a subsequent
-	 changequote may take effect before the $@ ref is
-	 rescanned.  */
-      // TODO - optimize when quote_age is nonzero?  Cache in argv in case
-      // user macro expands to multiple refs?
-      string_pair *tmp = (string_pair *) obstack_copy (obs, quotes,
-						       sizeof *quotes);
-      tmp->str1 = (char *) obstack_copy0 (obs, quotes->str1, quotes->len1);
-      tmp->str2 = (char *) obstack_copy0 (obs, quotes->str2, quotes->len2);
-      chain->u.u_a.quotes = tmp;
-    }
-  else
-    chain->u.u_a.quotes = NULL;
+  chain->u.u_a.quotes = quote_cache (obs, chain->quote_age, quotes);
   return token;
 }
author	Eric Blake <ebb9@byu.net>	2007-11-13 06:55:27 -0700
committer	Eric Blake <ebb9@byu.net>	2008-02-20 19:48:16 -0700
commit	44740d89961c48b712562dfc650dc0cb57898aa0 (patch)
tree	e4ee391b2a73fffac48ff427df40882bc89a53d3
parent	1fecefc8b990254aa667a01d12c6c7a2d716df06 (diff)
download	m4-44740d89961c48b712562dfc650dc0cb57898aa0.tar.gz