summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-11-13 06:55:27 -0700
committerEric Blake <ebb9@byu.net>2008-02-20 19:48:16 -0700
commit44740d89961c48b712562dfc650dc0cb57898aa0 (patch)
treee4ee391b2a73fffac48ff427df40882bc89a53d3
parent1fecefc8b990254aa667a01d12c6c7a2d716df06 (diff)
downloadm4-44740d89961c48b712562dfc650dc0cb57898aa0.tar.gz
Stage16: cache quotes and improve arg_print
-rwxr-xr-xchecks/get-them4
-rw-r--r--doc/m4.texinfo98
-rw-r--r--examples/foreach2.m42
-rw-r--r--examples/foreachq2.m42
-rw-r--r--examples/forloop2.m42
-rw-r--r--examples/null.m4bin5764 -> 5747 bytes
-rw-r--r--examples/null.outbin400 -> 402 bytes
-rw-r--r--src/builtin.c91
-rw-r--r--src/debug.c38
-rw-r--r--src/input.c132
-rw-r--r--src/m4.c1
-rw-r--r--src/m4.h8
-rw-r--r--src/macro.c83
13 files changed, 302 insertions, 159 deletions
diff --git a/checks/get-them b/checks/get-them
index e034962c..803f413c 100755
--- a/checks/get-them
+++ b/checks/get-them
@@ -1,11 +1,13 @@
#!/bin/sh
# -*- AWK -*-
# Extract all examples from the manual source.
-# Copyright (C) 1992, 2005, 2006, 2007 Free Software Foundation, Inc.
+# Copyright (C) 1992, 2005, 2006, 2007, 2008 Free Software Foundation,
+# Inc.
# This script is for use with GNU awk.
FILE=${1-/dev/null}
+: ${AWK=awk}
$AWK '
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index b2599c98..2c6079bb 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -2900,6 +2900,7 @@ An actual implementation of these three macros is distributed as
@file{m4-@value{VERSION}/@/examples/@/quote.m4} in this package. First,
let's examine their usage:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`quote.m4')
@@ -2932,6 +2933,7 @@ other hand, results in a string no matter what, since it is still
possible to tell whether it was invoked without arguments based on the
resulting string.
+@comment examples
@example
$ @kbd{m4 -I examples}
undivert(`quote.m4')dnl
@@ -2993,6 +2995,7 @@ invocation is restored.
It can, for example, be used for simple counting:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`forloop.m4')
@@ -3003,6 +3006,7 @@ forloop(`i', `1', `8', `i ')
For-loops can be nested, like:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`forloop.m4')
@@ -3030,6 +3034,7 @@ not finished, it increments the iterator (using the predefined macro
Here is an actual implementation of @code{forloop}, distributed as
@file{m4-@value{VERSION}/@/examples/@/forloop.m4} in this package:
+@comment examples
@example
$ @kbd{m4 -I examples}
undivert(`forloop.m4')dnl
@@ -3076,6 +3081,7 @@ using an implementation of @code{foreach} distributed as
@file{m4-@value{VERSION}/@/examples/@/foreach.m4}, and @code{foreachq}
in @file{m4-@value{VERSION}/@/examples/@/foreachq.m4}.
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`foreach.m4')
@@ -3098,6 +3104,7 @@ It is possible to be more complex; each element of the @var{paren-list}
or @var{quote-list} can itself be a list, to pass as further arguments
to a helper macro. This example generates a shell case statement:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`foreach.m4')
@@ -3127,6 +3134,7 @@ needed to grab the first element of a list. Second,
through the original list. Here is a simple implementation of
@code{foreach}:
+@comment examples
@example
$ @kbd{m4 -I examples}
undivert(`foreach.m4')dnl
@@ -3153,6 +3161,7 @@ expecting the macro name on output after one layer of quotes is removed
during list iteration and the final layer removed during the final
rescan:
+@comment examples
@example
$ @kbd{m4 -I examples}
define(`a', `1')define(`b', `2')define(`c', `3')
@@ -3177,6 +3186,7 @@ foreachq(`x', ```a'', ``(b'', ``c)''', `x
Obviously, @code{foreachq} did a better job; here is its implementation:
+@comment examples
@example
$ @kbd{m4 -I examples}
undivert(`foreachq.m4')dnl
@@ -3422,7 +3432,8 @@ following:
In trace output, show the actual arguments that were collected before
invoking the macro. This applies to all macro calls if the @samp{t}
flag is used, otherwise only the macros covered by calls of
-@code{traceon}.
+@code{traceon}. Arguments are subject to length truncation specified by
+the command line option @option{--arglength} (or @option{-l}).
@item c
In trace output, show several trace lines for each macro call. A line
@@ -3433,7 +3444,9 @@ after the call has completed.
@item e
In trace output, show the expansion of each macro call, if it is not
void. This applies to all macro calls if the @samp{t} flag is used,
-otherwise only the macros covered by calls of @code{traceon}.
+otherwise only the macros covered by calls of @code{traceon}. The
+expansion is subject to length truncation specified by the command line
+option @option{--arglength} (or @option{-l}).
@item f
In debug and trace output, include the name of the current input file in
@@ -3513,6 +3526,25 @@ foo
@result{}FOO
@end example
+The following example demonstrates the behavior of length truncation,
+when specified on the command line. Note that each argument and the
+final result are individually truncated. Also, the special tokens for
+builtin functions are not truncated.
+
+@comment options: -l6
+@example
+$ @kbd{m4 -d -l 6}
+define(`echo', `$@@')debugmode(`+t')
+@result{}
+echo(`1', `long string')
+@error{}m4trace: -1- echo(`1', `long s...') -> ``1',`l...'
+@result{}1,long string
+indir(`echo', defn(`changequote'))
+@error{}m4trace: -2- defn(`change...')
+@error{}m4trace: -1- indir(`echo', <changequote>) -> ``''
+@result{}
+@end example
+
@node Debug Output
@section Saving debugging output
@@ -4417,6 +4449,7 @@ Normally file inclusion is used to insert the contents of a file
into the input stream. The contents of the file will be read by
@code{m4} and macro calls in the file will be expanded:
+@comment examples
@example
$ @kbd{m4 -I examples}
define(`foo', `FOO')
@@ -4433,6 +4466,7 @@ of the file can be used to define macros that operate on entire files.
Here is an example, which defines @samp{bar} to expand to the contents
of @file{incl.m4}:
+@comment examples
@example
$ @kbd{m4 -I examples}
define(`bar', include(`incl.m4'))
@@ -5217,6 +5251,7 @@ word to upper case and the remaining characters to lower case.
First, an example of their usage, using implementations distributed in
@file{m4-@value{VERSION}/@/examples/@/capitalize.m4}.
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`capitalize.m4')
@@ -5236,6 +5271,7 @@ merely parses out the words, and replaces them with an invocation of
some subtle flaws. You should try to see if you can find and correct
them; or @pxref{Improved capitalize, , Answers}).
+@comment examples
@example
$ @kbd{m4 -I examples}
undivert(`capitalize.m4')dnl
@@ -5327,6 +5363,7 @@ ifelse(format(`%.1A', `1.999'), `0X1.0P+1', `success',
Using the @code{forloop} macro defined earlier (@pxref{Forloop}), this
example shows how @code{format} can be used to produce tabular output.
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`forloop.m4')
@@ -6186,6 +6223,7 @@ message output.
This example reuses the file @file{incl.m4} mentioned earlier
(@pxref{Include}):
+@comment examples
@example
$ @kbd{m4 -I examples}
define(`foo', ``$0' called at __file__:__line__')
@@ -6944,6 +6982,7 @@ shipped as @file{m4-@value{VERSION}/@/examples/@/forloop2.m4}; this
version also optimizes based on the fact that the starting bound does
not need to be passed to the helper @code{@w{_forloop}}.
+@comment examples
@example
$ @kbd{m4 -I examples}
undivert(`forloop2.m4')dnl
@@ -6953,7 +6992,7 @@ undivert(`forloop2.m4')dnl
@result{}# performs sanity check that FROM is larger than TO
@result{}# allows complex numerical expressions in TO and FROM
@result{}define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
-@result{} `pushdef(`$1', eval(`$2'))_forloop(`$1',
+@result{} `pushdef(`$1', eval(`$2'))_$0(`$1',
@result{} eval(`$3'), `$4')popdef(`$1')')')
@result{}define(`_forloop',
@result{} `$3`'ifelse(indir(`$1'), `$2', `',
@@ -6972,6 +7011,48 @@ forloop(`i', `a', `b', `non-numeric bounds')
@result{}
@end example
+One other change to notice is that the improved version used @samp{_$0}
+rather than @samp{_foreach} to invoke the helper routine. In general,
+this is a good practice to follow, because then the set of macros can be
+uniformly transformed. The following example shows a transformation
+that doubles the current quoting and appends a suffix @samp{2} to each
+transformed macro. If @code{foreach} refers to the literal
+@samp{_foreach}, then @code{foreach2} invokes @code{_foreach} instead of
+the intended @code{_foreach2}, and the mixing of quoting paradigms leads
+to an infinite recursion loop in this example.
+
+@comment options: -L9
+@comment status: 1
+@comment examples
+@example
+$ @kbd{m4 -d -L 9 -I examples}
+define(`arg1', `$1')include(`forloop2.m4')include(`quote.m4')
+@result{}
+define(`double', `define(`$1'`2',
+ arg1(patsubst(dquote(defn(`$1')), `[`']', `\&\&')))')
+@result{}
+double(`forloop')double(`_forloop')defn(`forloop2')
+@result{}ifelse(eval(``($3) >= ($2)''), ``1'',
+@result{} ``pushdef(``$1'', eval(``$2''))_$0(``$1'',
+@result{} eval(``$3''), ``$4'')popdef(``$1'')'')
+forloop(i, 1, 5, `ifelse(')forloop(i, 1, 5, `)')
+@result{}
+changequote(`[', `]')changequote([``], [''])
+@result{}
+forloop2(i, 1, 5, ``ifelse('')forloop2(i, 1, 5, ``)'')
+@result{}
+changequote`'include(`forloop.m4')
+@result{}
+double(`forloop')double(`_forloop')defn(`forloop2')
+@result{}pushdef(``$1'', ``$2'')_forloop($@@)popdef(``$1'')
+forloop(i, 1, 5, `ifelse(')forloop(i, 1, 5, `)')
+@result{}
+changequote(`[', `]')changequote([``], [''])
+@result{}
+forloop2(i, 1, 5, ``ifelse('')forloop2(i, 1, 5, ``)'')
+@error{}m4:stdin:12: recursion limit of 9 exceeded, use -L<N> to change it
+@end example
+
Of course, it is possible to make even more improvements, such as
adding an optional step argument, or allowing iteration through
descending sequences. @acronym{GNU} Autoconf provides some of these
@@ -6984,6 +7065,7 @@ The @code{foreach} and @code{foreachq} macros (@pxref{Foreach}) as
presented earlier each have flaws. First, we will examine and fix the
quadratic behavior of @code{foreachq}:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`foreachq.m4')
@@ -7025,6 +7107,7 @@ fewer macros, is less likely to run into machine limits, and most
importantly, performs faster. The fixed version of @code{foreachq} can
be found in @file{m4-@value{VERSION}/@/examples/@/foreachq2.m4}:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`foreachq2.m4')
@@ -7034,7 +7117,7 @@ undivert(`foreachq2.m4')dnl
@result{}divert(`-1')
@result{}# foreachq(x, `item_1, item_2, ..., item_n', stmt)
@result{}# quoted list, improved version
-@result{}define(`foreachq', `pushdef(`$1')_foreachq($@@)popdef(`$1')')
+@result{}define(`foreachq', `pushdef(`$1')_$0($@@)popdef(`$1')')
@result{}define(`_arg1q', ``$1'')
@result{}define(`_rest', `ifelse(`$#', `1', `', `dquote(shift($@@))')')
@result{}define(`_foreachq', `ifelse(`$2', `', `',
@@ -7073,6 +7156,7 @@ instead of an arbitrary length list as the key to end recursion. This
alternative approach is available as
@file{m4-@value{VERSION}/@/examples/@/foreach3.m4}:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`foreachq3.m4')
@@ -7120,6 +7204,7 @@ overquotes the arguments to @code{@w{_foreach}} to begin with, using
@code{@w{_arg1}} to remove the extra layer of quoting that was added up
front:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`foreach2.m4')
@@ -7129,7 +7214,7 @@ undivert(`foreach2.m4')dnl
@result{}divert(`-1')
@result{}# foreach(x, (item_1, item_2, ..., item_n), stmt)
@result{}# parenthesized list, improved version
-@result{}define(`foreach', `pushdef(`$1')_foreach(`$1',
+@result{}define(`foreach', `pushdef(`$1')_$0(`$1',
@result{} (dquote(dquote_elt$2)), `$3')popdef(`$1')')
@result{}define(`_arg1', `$1')
@result{}define(`_foreach', `ifelse(`$2', `(`')', `',
@@ -7167,6 +7252,7 @@ deciding which list style to use, one must take into account whether
repeating the side effects of unquoted list elements will have any
detrimental effects.
+@comment examples
@example
$ @kbd{m4 -d -I examples}
include(`foreach2.m4')
@@ -7279,6 +7365,7 @@ difference between calling @code{capitalize} with the expansion of a
macro, expanding the result of a case change, and changing the case of a
double-quoted string:
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`capitalize.m4')dnl
@@ -7355,6 +7442,7 @@ must be redefined as @code{_upcase_alt} and @code{_downcase_alt}, since
they contain nested quotes but are invoked with the alternate quoting
scheme in effect.
+@comment examples
@example
$ @kbd{m4 -I examples}
include(`capitalize2.m4')dnl
diff --git a/examples/foreach2.m4 b/examples/foreach2.m4
index 4acf0c26..74d00fb6 100644
--- a/examples/foreach2.m4
+++ b/examples/foreach2.m4
@@ -2,7 +2,7 @@ include(`quote.m4')dnl
divert(`-1')
# foreach(x, (item_1, item_2, ..., item_n), stmt)
# parenthesized list, improved version
-define(`foreach', `pushdef(`$1')_foreach(`$1',
+define(`foreach', `pushdef(`$1')_$0(`$1',
(dquote(dquote_elt$2)), `$3')popdef(`$1')')
define(`_arg1', `$1')
define(`_foreach', `ifelse(`$2', `(`')', `',
diff --git a/examples/foreachq2.m4 b/examples/foreachq2.m4
index 345ddfed..f57d3edf 100644
--- a/examples/foreachq2.m4
+++ b/examples/foreachq2.m4
@@ -2,7 +2,7 @@ include(`quote.m4')dnl
divert(`-1')
# foreachq(x, `item_1, item_2, ..., item_n', stmt)
# quoted list, improved version
-define(`foreachq', `pushdef(`$1')_foreachq($@)popdef(`$1')')
+define(`foreachq', `pushdef(`$1')_$0($@)popdef(`$1')')
define(`_arg1q', ``$1'')
define(`_rest', `ifelse(`$#', `1', `', `dquote(shift($@))')')
define(`_foreachq', `ifelse(`$2', `', `',
diff --git a/examples/forloop2.m4 b/examples/forloop2.m4
index f1bdf0ef..41e0e165 100644
--- a/examples/forloop2.m4
+++ b/examples/forloop2.m4
@@ -4,7 +4,7 @@ divert(`-1')
# performs sanity check that FROM is larger than TO
# allows complex numerical expressions in TO and FROM
define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
- `pushdef(`$1', eval(`$2'))_forloop(`$1',
+ `pushdef(`$1', eval(`$2'))_$0(`$1',
eval(`$3'), `$4')popdef(`$1')')')
define(`_forloop',
`$3`'ifelse(indir(`$1'), `$2', `',
diff --git a/examples/null.m4 b/examples/null.m4
index 2632522c..79f4715f 100644
--- a/examples/null.m4
+++ b/examples/null.m4
Binary files differ
diff --git a/examples/null.out b/examples/null.out
index c42e03c1..aca4b785 100644
--- a/examples/null.out
+++ b/examples/null.out
Binary files differ
diff --git a/src/builtin.c b/src/builtin.c
index c89ad44e..beb8e350 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -198,6 +198,28 @@ find_builtin_by_name (const char *name)
return bp;
return bp + 1;
}
+
+/*------------------------------------------------------------------.
+| Print a representation of FUNC to OBS. If FLATTEN, output QUOTES |
+| around an empty string instead. |
+`------------------------------------------------------------------*/
+void
+func_print (struct obstack *obs, const builtin *func, bool flatten,
+ const string_pair *quotes)
+{
+ assert (func);
+ if (flatten && quotes)
+ {
+ obstack_grow (obs, quotes->str1, quotes->len1);
+ obstack_grow (obs, quotes->str2, quotes->len2);
+ }
+ else if (!flatten)
+ {
+ obstack_1grow (obs, '<');
+ obstack_grow (obs, func->name, strlen (func->name));
+ obstack_1grow (obs, '>');
+ }
+}
/*-------------------------------------------------------------------------.
| Install a builtin macro with name NAME, bound to the C function given in |
@@ -396,14 +418,15 @@ free_regex (void)
}
}
-/*-------------------------------------------------------------------------.
-| Define a predefined or user-defined macro, with name NAME, and expansion |
-| TEXT. MODE destinguishes between the "define" and the "pushdef" case. |
-| It is also used from main (). |
-`-------------------------------------------------------------------------*/
+/*-----------------------------------------------------------------.
+| Define a predefined or user-defined macro, with name NAME of |
+| length NAME_LEN, and expansion TEXT. MODE is SYMBOL_INSERT for |
+| "define" or SYMBOL_PUSHDEF for "pushdef". This function is also |
+| used from main (). |
+`-----------------------------------------------------------------*/
void
-define_user_macro (const char *name, size_t len, const char *text,
+define_user_macro (const char *name, size_t name_len, const char *text,
symbol_lookup mode)
{
symbol *s;
@@ -420,24 +443,23 @@ define_user_macro (const char *name, size_t len, const char *text,
if (macro_sequence_inuse && text)
{
regoff_t offset = 0;
- len = strlen (defn);
+ struct re_registers *regs = &macro_sequence_regs;
+ size_t len = strlen (defn);
while (offset < len
&& (offset = re_search (&macro_sequence_buf, defn, len, offset,
- len - offset, &macro_sequence_regs)) >= 0)
+ len - offset, regs)) >= 0)
{
/* Skip empty matches. */
- if (macro_sequence_regs.start[0] == macro_sequence_regs.end[0])
+ if (regs->start[0] == regs->end[0])
offset++;
else
{
- char tmp;
- offset = macro_sequence_regs.end[0];
- tmp = defn[offset];
- defn[offset] = '\0';
- m4_warn (0, NULL, _("definition of `%s' contains sequence `%s'"),
- name, defn + macro_sequence_regs.start[0]);
- defn[offset] = tmp;
+ offset = regs->end[0];
+ m4_warn (0, NULL,
+ _("definition of `%s' contains sequence `%.*s'"),
+ name, regs->end[0] - regs->start[0],
+ defn + regs->start[0]);
}
}
if (offset == -2)
@@ -597,34 +619,6 @@ shipout_int (struct obstack *obs, int val)
obstack_grow (obs, s, strlen (s));
}
-/*------------------------------------------------------------------.
-| Print arguments from the table ARGV to obstack OBS, starting with |
-| START, separated by SEP, and quoted by the current quotes if |
-| QUOTED is true. |
-`------------------------------------------------------------------*/
-
-static void
-dump_args (struct obstack *obs, int start, macro_arguments *argv,
- const char *sep, bool quoted)
-{
- unsigned int i;
- bool dump_sep = false;
- size_t len = strlen (sep);
- unsigned int argc = arg_argc (argv);
-
- for (i = start; i < argc; i++)
- {
- if (dump_sep)
- obstack_grow (obs, sep, len);
- else
- dump_sep = true;
- if (quoted)
- obstack_grow (obs, curr_quote.str1, curr_quote.len1);
- obstack_grow (obs, ARG (i), ARG_LEN (i));
- if (quoted)
- obstack_grow (obs, curr_quote.str2, curr_quote.len2);
- }
-}
/* The rest of this file is code for builtins and expansion of user
defined macros. All the functions for builtins have a prototype as:
@@ -1516,7 +1510,7 @@ m4_errprint (struct obstack *obs, int argc, macro_arguments *argv)
if (bad_argc (ARG (0), argc, 1, -1))
return;
- dump_args (obs, 1, argv, " ", false);
+ arg_print (obs, argv, 1, NULL, true, " ", NULL, false);
debug_flush_files ();
len = obstack_object_size (obs);
/* The close_stdin module makes it safe to skip checking the return
@@ -1597,12 +1591,13 @@ m4_m4wrap (struct obstack *obs, int argc, macro_arguments *argv)
{
if (bad_argc (ARG (0), argc, 1, -1))
return;
+ obs = push_wrapup_init ();
if (no_gnu_extensions)
obstack_grow (obs, ARG (1), ARG_LEN (1));
else
- dump_args (obs, 1, argv, " ", false);
- obstack_1grow (obs, '\0');
- push_wrapup ((char *) obstack_finish (obs));
+ // TODO - allow builtins, rather than always flattening
+ arg_print (obs, argv, 1, NULL, true, " ", NULL, false);
+ push_wrapup_finish ();
}
/* Enable tracing of all specified macros, or all, if none is specified.
diff --git a/src/debug.c b/src/debug.c
index d6b2ddc9..737ee524 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -359,44 +359,16 @@ trace_prepre (const char *name, int id)
void
trace_pre (const char *name, int id, macro_arguments *argv)
{
- int i;
- const builtin *bp;
- int argc = arg_argc (argv);
-
trace_header (id);
trace_format ("%s", name);
- if (argc > 1 && (debug_level & DEBUG_TRACE_ARGS))
+ if (arg_argc (argv) > 1 && (debug_level & DEBUG_TRACE_ARGS))
{
+ int len = max_debug_argument_length;
trace_format ("(");
-
- for (i = 1; i < argc; i++)
- {
- if (i != 1)
- trace_format (", ");
-
- switch (arg_type (argv, i))
- {
- case TOKEN_TEXT:
- trace_format ("%l%S%r", ARG (i));
- break;
-
- case TOKEN_FUNC:
- bp = find_builtin_by_addr (arg_func (argv, i));
- if (bp == NULL)
- {
- assert (!"trace_pre");
- abort ();
- }
- trace_format ("<%s>", bp->name);
- break;
-
- default:
- assert (!"trace_pre");
- abort ();
- }
-
- }
+ arg_print (&trace, argv, 1,
+ (debug_level & DEBUG_TRACE_QUOTE) ? &curr_quote : NULL,
+ false, ", ", &len, true);
trace_format (")");
}
diff --git a/src/input.c b/src/input.c
index 5bbaf088..063186a4 100644
--- a/src/input.c
+++ b/src/input.c
@@ -42,14 +42,14 @@
loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
Pushing new input on the input stack is done by push_file (),
- push_string (), push_wrapup () (for wrapup text), and push_macro ()
- (for macro definitions). Because macro expansion needs direct
- access to the current input obstack (for optimization), push_string
- () is split in two functions, push_string_init (), which returns a
- pointer to the current input stack, and push_string_finish (),
- which returns a pointer to the final text. The input_block *next
- is used to manage the coordination between the different push
- routines.
+ push_string (), push_wrapup_init/push_wrapup_finish () (for wrapup
+ text), and push_macro () (for macro definitions). Because macro
+ expansion needs direct access to the current input obstack (for
+ optimization), push_string () is split in two functions,
+ push_string_init (), which returns a pointer to the current input
+ stack, and push_string_finish (), which returns a pointer to the
+ final text. The input_block *next is used to manage the
+ coordination between the different push routines.
The current file and line number are stored in two global
variables, for use by the error handling functions in m4.c. Macro
@@ -188,6 +188,9 @@ static struct re_registers regs;
context. */
static unsigned int current_quote_age;
+/* Cache a quote pair. See quote_cache. */
+static string_pair *cached_quote;
+
static bool pop_input (bool);
static void set_quote_age (void);
@@ -503,17 +506,14 @@ push_string_finish (void)
return ret;
}
-/*------------------------------------------------------------------.
-| The function push_wrapup () pushes a string on the wrapup stack. |
-| When the normal input stack gets empty, the wrapup stack will |
-| become the input stack, and push_string () and push_file () will |
-| operate on wrapup_stack. Push_wrapup should be done as |
-| push_string (), but this will suffice, as long as arguments to |
-| m4_m4wrap () are moderate in size. |
-`------------------------------------------------------------------*/
+/*--------------------------------------------------------------.
+| The function push_wrapup_init () returns an obstack ready for |
+| direct expansion of wrapup text, and should be followed by |
+| push_wrapup_finish (). |
+`--------------------------------------------------------------*/
-void
-push_wrapup (const char *s)
+struct obstack *
+push_wrapup_init (void)
{
input_block *i;
i = (input_block *) obstack_alloc (wrapup_stack, sizeof *i);
@@ -521,9 +521,28 @@ push_wrapup (const char *s)
i->type = INPUT_STRING;
i->file = current_file;
i->line = current_line;
- i->u.u_s.len = strlen (s);
- i->u.u_s.str = (char *) obstack_copy (wrapup_stack, s, i->u.u_s.len);
wsp = i;
+ return wrapup_stack;
+}
+
+/*---------------------------------------------------------------.
+| After pushing wrapup text, push_wrapup_finish () completes the |
+| bookkeeping. |
+`---------------------------------------------------------------*/
+void
+push_wrapup_finish (void)
+{
+ input_block *i = wsp;
+ if (obstack_object_size (wrapup_stack) == 0)
+ {
+ wsp = i->prev;
+ obstack_free (wrapup_stack, i);
+ }
+ else
+ {
+ i->u.u_s.len = obstack_object_size (wrapup_stack);
+ i->u.u_s.str = (char *) obstack_finish (wrapup_stack);
+ }
}
@@ -610,6 +629,7 @@ pop_input (bool cleanup)
abort ();
}
obstack_free (current_input, isp);
+ cached_quote = NULL;
next = NULL; /* might be set in push_string_init () */
isp = tmp;
@@ -674,13 +694,7 @@ input_print (struct obstack *obs, const input_block *input)
obstack_1grow (obs, '>');
break;
case INPUT_MACRO:
- {
- const builtin *bp = find_builtin_by_addr (input->u.func);
- assert (bp);
- obstack_1grow (obs, '<');
- obstack_grow (obs, bp->name, strlen (bp->name));
- obstack_1grow (obs, '>');
- }
+ func_print (obs, find_builtin_by_addr (input->u.func), false, NULL);
break;
case INPUT_CHAIN:
chain = input->u.u_c.chain;
@@ -696,7 +710,9 @@ input_print (struct obstack *obs, const input_block *input)
case CHAIN_ARGV:
assert (!chain->u.u_a.comma);
if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
- chain->u.u_a.quotes, &maxlen))
+ quote_cache (NULL, chain->quote_age,
+ chain->u.u_a.quotes),
+ chain->u.u_a.flatten, NULL, &maxlen, false))
return;
break;
default:
@@ -783,7 +799,9 @@ peek_input (bool allow_argv)
argument from argv. */
push_string_init ();
push_arg_quote (current_input, chain->u.u_a.argv,
- chain->u.u_a.index, chain->u.u_a.quotes);
+ chain->u.u_a.index,
+ quote_cache (NULL, chain->quote_age,
+ chain->u.u_a.quotes));
chain->u.u_a.index++;
chain->u.u_a.comma = true;
push_string_finish ();
@@ -911,7 +929,9 @@ next_char_1 (bool allow_quote)
argument from argv. */
push_string_init ();
push_arg_quote (current_input, chain->u.u_a.argv,
- chain->u.u_a.index, chain->u.u_a.quotes);
+ chain->u.u_a.index,
+ quote_cache (NULL, chain->quote_age,
+ chain->u.u_a.quotes));
chain->u.u_a.index++;
chain->u.u_a.comma = true;
push_string_finish ();
@@ -1008,7 +1028,9 @@ append_quote_token (struct obstack *obs, token_data *td)
if (src_chain->type == CHAIN_ARGV)
{
arg_print (obs, src_chain->u.u_a.argv, src_chain->u.u_a.index,
- src_chain->u.u_a.quotes, NULL);
+ quote_cache (NULL, src_chain->quote_age,
+ src_chain->u.u_a.quotes),
+ src_chain->u.u_a.flatten, NULL, NULL, false);
arg_adjust_refcount (src_chain->u.u_a.argv, false);
return;
}
@@ -1369,6 +1391,7 @@ set_quote_age (void)
| (*curr_quote.str2 & 0xff));
else
current_quote_age = 0;
+ cached_quote = NULL;
}
/* Return the current quote age. Each non-trivial changequote alters
@@ -1394,6 +1417,53 @@ safe_quotes (void)
{
return current_quote_age != 0;
}
+
+/* Interface for caching frequently used quote pairs, using AGE for
+ optimization. If QUOTES is NULL, don't use quoting. If OBS is
+ non-NULL, AGE should be the current quote age, and QUOTES should be
+ &curr_quote; the return value will be a cached quote pair, where
+ the pointer is valid at least as long as OBS is not reset, but
+ whose contents are only guaranteed until the next changequote or
+ quote_cache. Otherwise, OBS is NULL, AGE should be the same as
+ before, and QUOTES should be a previously returned cache value;
+ used to refresh the contents of the result. */
+const string_pair *
+quote_cache (struct obstack *obs, unsigned int age, const string_pair *quotes)
+{
+ static char lquote[2];
+ static char rquote[2];
+ static string_pair simple = {lquote, 1, rquote, 1};
+
+ /* Implementation - if AGE is non-zero, then the implementation of
+ set_quote_age guarantees that we can recreate the return value on
+ the fly; so we use static storage, and the contents must be used
+ immediately. If AGE is zero, then we must copy QUOTES onto OBS
+ (since changequote will invalidate the original), but we might as
+ well cache that copy (in case the current expansion contains more
+ than one instance of $@). */
+ if (!quotes)
+ return NULL;
+ if (age)
+ {
+ *lquote = (age >> 8) & 0xff;
+ *rquote = age & 0xff;
+ return &simple;
+ }
+ if (!obs)
+ return quotes;
+ assert (next && quotes == &curr_quote);
+ if (!cached_quote)
+ {
+ assert (obs == current_input && obstack_object_size (obs) == 0);
+ cached_quote = (string_pair *) obstack_copy (obs, quotes,
+ sizeof *quotes);
+ cached_quote->str1 = (char *) obstack_copy0 (obs, quotes->str1,
+ quotes->len1);
+ cached_quote->str2 = (char *) obstack_copy0 (obs, quotes->str2,
+ quotes->len2);
+ }
+ return cached_quote;
+}
/*--------------------------------------------------------------------.
diff --git a/src/m4.c b/src/m4.c
index a6bc92ad..af4991f9 100644
--- a/src/m4.c
+++ b/src/m4.c
@@ -113,6 +113,7 @@ m4_verror_at_line (bool warn, int status, int errnum, const char *file,
*p++ = *macro++;
}
while (*macro);
+ *p = '\0';
}
/* Prepend warning and the macro name, as needed. But if that fails
for non-memory reasons (unlikely), then still use the original
diff --git a/src/m4.h b/src/m4.h
index e4ff44ac..7a1364b8 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -386,7 +386,8 @@ void push_macro (builtin_func *);
struct obstack *push_string_init (void);
bool push_token (token_data *, int, bool);
const input_block *push_string_finish (void);
-void push_wrapup (const char *);
+struct obstack *push_wrapup_init (void);
+void push_wrapup_finish (void);
bool pop_wrapup (void);
void input_print (struct obstack *, const input_block *);
@@ -410,6 +411,8 @@ void set_word_regexp (const char *, const char *);
#endif
unsigned int quote_age (void);
bool safe_quotes (void);
+const string_pair *quote_cache (struct obstack *, unsigned int,
+ const string_pair *);
/* File: output.c --- output functions. */
extern int current_diversion;
@@ -494,7 +497,7 @@ size_t arg_len (macro_arguments *, unsigned int);
builtin_func *arg_func (macro_arguments *, unsigned int);
struct obstack *arg_scratch (void);
bool arg_print (struct obstack *, macro_arguments *, unsigned int,
- const string_pair *, int *);
+ const string_pair *, bool, const char *, int *, bool);
macro_arguments *make_argv_ref (macro_arguments *, const char *, size_t,
bool, bool);
void push_arg (struct obstack *, macro_arguments *, unsigned int);
@@ -553,6 +556,7 @@ const char *ntoa (int32_t, int);
const builtin *find_builtin_by_addr (builtin_func *);
const builtin *find_builtin_by_name (const char *);
+void func_print (struct obstack *, const builtin *, bool, const string_pair *);
/* File: path.c --- path search for include files. */
diff --git a/src/macro.c b/src/macro.c
index 8341dd2f..7f817b39 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -911,7 +911,9 @@ arg_text (macro_arguments *argv, unsigned int index)
break;
case CHAIN_ARGV:
arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
- chain->u.u_a.quotes, NULL);
+ quote_cache (NULL, chain->quote_age,
+ chain->u.u_a.quotes),
+ chain->u.u_a.flatten, NULL, NULL, false);
break;
default:
assert (!"arg_text");
@@ -1097,50 +1099,70 @@ arg_scratch (void)
/* Dump a representation of ARGV to the obstack OBS, starting with
argument INDEX. If QUOTES is non-NULL, each argument is displayed
- with those quotes. If MAX_LEN is non-NULL, truncate the output
- after *MAX_LEN bytes are output and return true; otherwise, return
- false, and reduce *MAX_LEN by the number of bytes output. */
+ with those quotes. If FLATTEN, builtins are ignored. Separate
+ arguments with SEP, which defaults to a comma. If MAX_LEN is
+ non-NULL, truncate the output after *MAX_LEN bytes are output and
+ return true; otherwise, return false, and reduce *MAX_LEN by the
+ number of bytes output. If QUOTE_EACH, the truncation length is
+ reset for each argument, quotes do not count against length, and
+ all arguments are printed; otherwise, quotes count against the
+ length and trailing arguments may be discarded. */
bool
arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index,
- const string_pair *quotes, int *max_len)
+ const string_pair *quotes, bool flatten, const char *sep,
+ int *max_len, bool quote_each)
{
int len = max_len ? *max_len : INT_MAX;
unsigned int i;
token_data *token;
token_chain *chain;
- bool comma = false;
-
+ bool use_sep = false;
+ bool done;
+ size_t sep_len;
+ size_t *plen = quote_each ? NULL : &len;
+
+ if (!sep)
+ sep = ",";
+ sep_len = strlen (sep);
for (i = index; i < argv->argc; i++)
{
- if (comma && obstack_print (obs, ",", 1, &len))
+ if (quote_each && max_len)
+ len = *max_len;
+ if (use_sep && obstack_print (obs, sep, sep_len, plen))
return true;
- else
- comma = true;
+ use_sep = true;
token = arg_token (argv, i, NULL);
- if (quotes && obstack_print (obs, quotes->str1, quotes->len1, &len))
- return true;
switch (TOKEN_DATA_TYPE (token))
{
case TOKEN_TEXT:
+ if (quotes && obstack_print (obs, quotes->str1, quotes->len1, plen))
+ return true;
if (obstack_print (obs, TOKEN_DATA_TEXT (token),
- TOKEN_DATA_LEN (token), &len))
+ TOKEN_DATA_LEN (token), &len) && !quote_each)
+ return true;
+ if (quotes && obstack_print (obs, quotes->str2, quotes->len2, plen))
return true;
break;
case TOKEN_COMP:
+ if (quotes && obstack_print (obs, quotes->str1, quotes->len1, plen))
+ return true;
chain = token->u.u_c.chain;
- while (chain)
+ done = false;
+ while (chain && !done)
{
switch (chain->type)
{
case CHAIN_STR:
if (obstack_print (obs, chain->u.u_s.str, chain->u.u_s.len,
&len))
- return true;
+ done = true;
break;
case CHAIN_ARGV:
if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
- chain->u.u_a.quotes, &len))
- return true;
+ quote_cache (NULL, chain->quote_age,
+ chain->u.u_a.quotes),
+ flatten, NULL, &len, false))
+ done = true;
break;
default:
assert (!"arg_print");
@@ -1148,16 +1170,19 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index,
}
chain = chain->next;
}
+ if (done && !quote_each)
+ return true;
+ if (quotes && obstack_print (obs, quotes->str2, quotes->len2, plen))
+ return true;
break;
case TOKEN_FUNC:
- // TODO - support func?
+ func_print (obs, find_builtin_by_addr (TOKEN_DATA_FUNC (token)),
+ flatten, quotes);
+ break;
default:
assert (!"arg_print");
abort ();
}
- if (quotes && obstack_print (obs, quotes->str2, quotes->len2,
- &len))
- return true;
}
if (max_len)
*max_len = len;
@@ -1201,21 +1226,7 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level,
chain->u.u_a.flatten = flatten;
chain->u.u_a.comma = false;
chain->u.u_a.skip_last = false;
- if (quotes)
- {
- /* Clone the quotes into the obstack, since a subsequent
- changequote may take effect before the $@ ref is
- rescanned. */
- // TODO - optimize when quote_age is nonzero? Cache in argv in case
- // user macro expands to multiple refs?
- string_pair *tmp = (string_pair *) obstack_copy (obs, quotes,
- sizeof *quotes);
- tmp->str1 = (char *) obstack_copy0 (obs, quotes->str1, quotes->len1);
- tmp->str2 = (char *) obstack_copy0 (obs, quotes->str2, quotes->len2);
- chain->u.u_a.quotes = tmp;
- }
- else
- chain->u.u_a.quotes = NULL;
+ chain->u.u_a.quotes = quote_cache (obs, chain->quote_age, quotes);
return token;
}