summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-12-03 11:53:45 -0700
committerEric Blake <ebb9@byu.net>2008-03-17 07:06:17 -0600
commitf7f45337fa1bfba9512841e8d3d2251359944681 (patch)
treead5eaad82fe1e706ced0ab6d25173e83686ad28d
parent434656c96d6486cf959c3050aa85aecb72d948a0 (diff)
downloadm4-f7f45337fa1bfba9512841e8d3d2251359944681.tar.gz
Stage20: make m4wrap obey POSIX fifo ordering
-rw-r--r--NEWS4
-rw-r--r--doc/m4.texinfo342
-rw-r--r--examples/Makefile.am6
-rw-r--r--examples/join.m415
-rw-r--r--examples/wraplifo2.m49
-rw-r--r--m4/gnulib-cache.m42
-rw-r--r--src/builtin.c25
-rw-r--r--src/input.c126
-rw-r--r--src/m4.h10
-rw-r--r--src/macro.c68
10 files changed, 495 insertions, 112 deletions
diff --git a/NEWS b/NEWS
index b858fa84..73adcf26 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,10 @@ Foundation, Inc.
** Fix regression introduced in 1.4.10b where using `builtin' or `indir'
to perform nested `shift' calls triggered an assertion failure.
+** Fix the `m4wrap' builtin to accumulate wrapped text in FIFO order, as
+ required by POSIX. The manual mentions a way to restore the LIFO order
+ present in earlier GNU M4 versions.
+
** Enhance the `ifdef', `ifelse', and `shift' builtins, as well as all
user macros, to transparently handle builtin tokens generated by `defn'.
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index b271e92a..00cff97a 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -269,6 +269,7 @@ Correct version of some examples
* Improved exch:: Solution for @code{exch}
* Improved forloop:: Solution for @code{forloop}
* Improved foreach:: Solution for @code{foreach}
+* Improved m4wrap:: Solution for @code{m4wrap}
* Improved cleardivert:: Solution for @code{cleardivert}
* Improved capitalize:: Solution for @code{capitalize}
* Improved fatal_error:: Solution for @code{fatal_error}
@@ -2283,25 +2284,40 @@ builtin token is preserved only when it occurs in isolation. A future
version of @acronym{GNU} M4 may lift these restrictions.
@example
+$ @kbd{m4 -d}
define(`a', `A')define(`AA', `b')
@result{}
+traceon(`defn', `define')
+@result{}
defn(`a', `divnum', `a')
-@error{}m4:stdin:2: Warning: defn: cannot concatenate builtin `divnum'
+@error{}m4:stdin:3: Warning: defn: cannot concatenate builtin `divnum'
+@error{}m4trace: -1- defn(`a', `divnum', `a') -> ``A'`A''
@result{}AA
define(`mydivnum', defn(`divnum', `divnum'))mydivnum
-@error{}m4:stdin:3: Warning: defn: cannot concatenate builtin `divnum'
-@error{}m4:stdin:3: Warning: defn: cannot concatenate builtin `divnum'
+@error{}m4:stdin:4: Warning: defn: cannot concatenate builtin `divnum'
+@error{}m4:stdin:4: Warning: defn: cannot concatenate builtin `divnum'
+@error{}m4trace: -2- defn(`divnum', `divnum')
+@error{}m4trace: -1- define(`mydivnum', `')
+@result{}
+traceoff(`defn', `define')
@result{}
define(`mydivnum', defn(`divnum')defn(`divnum'))mydivnum
-@error{}m4:stdin:4: Warning: define: cannot concatenate builtin `divnum'
-@error{}m4:stdin:4: Warning: define: cannot concatenate builtin `divnum'
+@error{}m4:stdin:6: Warning: define: cannot concatenate builtin `divnum'
+@error{}m4:stdin:6: Warning: define: cannot concatenate builtin `divnum'
@result{}
define(`mydivnum', defn(`divnum')`a')mydivnum
-@error{}m4:stdin:5: Warning: define: cannot concatenate builtin `divnum'
+@error{}m4:stdin:7: Warning: define: cannot concatenate builtin `divnum'
@result{}A
define(`mydivnum', `a'defn(`divnum'))mydivnum
-@error{}m4:stdin:6: Warning: define: cannot concatenate builtin `divnum'
+@error{}m4:stdin:8: Warning: define: cannot concatenate builtin `divnum'
@result{}A
+define(`q', ``$@@'')
+@result{}
+define(`foo', q(`a', defn(`divnum')))foo
+@error{}m4:stdin:10: Warning: define: cannot quote builtin
+@result{}a,
+ifdef(`foo', `yes', `no')
+@result{}yes
@end example
@node Pushdef
@@ -2930,6 +2946,8 @@ shift(`foo', `bar', `baz')
An example of the use of @code{shift} is this macro:
+@cindex reversing arguments
+@cindex arguments, reversing
@deffn Composite reverse (@dots{})
Takes any number of arguments, and reverses their order.
@end deffn
@@ -3007,6 +3025,113 @@ example2(`feeling rather indecisive today')
@result{}default answer: 4
@end example
+@cindex joining arguments
+@cindex arguments, joining
+@cindex concatenating arguments
+Another common task that requires iteration is joining a list of
+arguments into a single string.
+
+@deffn Composite join (@ovar{separator}, @ovar{args@dots{}})
+@deffnx Composite joinall (@ovar{separator}, @ovar{args@dots{}})
+Generate a single-quoted string, consisting of each @var{arg} separated
+by @var{separator}. While @code{joinall} always outputs a
+@var{separator} between arguments, @code{join} avoids the
+@var{separator} for an empty @var{arg}.
+@end deffn
+
+Here are some examples of its usage, based on the implementation
+@file{m4-@value{VERSION}/@/examples/@/join.m4} distributed in this
+package:
+
+@comment examples
+@example
+$ @kbd{m4 -I examples}
+include(`join.m4')
+@result{}
+join,join(`-'),join(`-', `'),join(`-', `', `')
+@result{},,,
+joinall,joinall(`-'),joinall(`-', `'),joinall(`-', `', `')
+@result{},,,-
+join(`-', `1')
+@result{}1
+join(`-', `1', `2', `3')
+@result{}1-2-3
+join(`', `1', `2', `3')
+@result{}123
+join(`-', `', `1', `', `', `2', `')
+@result{}1-2
+joinall(`-', `', `1', `', `', `2', `')
+@result{}-1---2-
+join(`,', `1', `2', `3')
+@result{}1,2,3
+define(`nargs', `$#')dnl
+nargs(join(`,', `1', `2', `3'))
+@result{}1
+@end example
+
+Examining the implementation shows some interesting points about several
+m4 programming idioms.
+
+@comment examples
+@example
+$ @kbd{m4 -I examples}
+undivert(`join.m4')dnl
+@result{}divert(`-1')
+@result{}# join(sep, args) - join each non-empty ARG into a single
+@result{}# string, with each element separated by SEP
+@result{}define(`join',
+@result{}`ifelse(`$#', `2', ``$2'',
+@result{} `ifelse(`$2', `', `', ``$2'_')$0(`$1', shift(shift($@@)))')')
+@result{}define(`_join',
+@result{}`ifelse(`$#$2', `2', `',
+@result{} `ifelse(`$2', `', `', ``$1$2'')$0(`$1', shift(shift($@@)))')')
+@result{}# joinall(sep, args) - join each ARG, including empty ones,
+@result{}# into a single string, with each element separated by SEP
+@result{}define(`joinall', ``$2'_$0(`$1', shift($@@))')
+@result{}define(`_joinall',
+@result{}`ifelse(`$#', `2', `', ``$1$3'$0(`$1', shift(shift($@@)))')')
+@result{}divert`'dnl
+@end example
+
+First, notice that this implementation creates helper macros
+@code{_join} and @code{_joinall}. This division of labor makes it
+easier to output the correct number of @var{separator} instances:
+@code{join} and @code{joinall} are responsible for the first argument,
+without a separator, while @code{_join} and @code{_joinall} are
+responsible for all remaining arguments, always outputting a separator
+when outputting an argument.
+
+Next, observe how @code{join} decides to iterate to itself, because the
+first @var{arg} was empty, or to output the argument and swap over to
+@code{_join}. If the argument is non-empty, then the nested
+@code{ifelse} results in an unquoted @samp{_}, which is concatenated
+with the @samp{$0} to form the next macro name to invoke. The
+@code{joinall} implementation is simpler since it does not have to
+suppress empty @var{arg}; it always executes once then defers to
+@code{_joinall}.
+
+Another important idiom is the idea that @var{separator} is reused for
+each iteration. Each iteration has one less argument, but rather than
+discarding @samp{$1} by iterating with @code{$0(shift($@@))}, the macro
+discards @samp{$2} by using @code{$0(`$1', shift(shift($@@)))}.
+
+Next, notice that it is possible to compare more than one condition in a
+single @code{ifelse} test. The test of @samp{$#$2} against @samp{2}
+allows @code{_join} to iterate for two separate reasons---either there
+are still more than two arguments, or there are exactly two arguments
+but the last argument is not empty.
+
+Finally, notice that these macros require exactly two arguments to
+terminate recursion, but that they still correctly result in empty
+output when given no @var{args} (i.e., zero or one macro argument). On
+the first pass when there are too few arguments, the @code{shift}
+results in no output, but leaves an empty string to serve as the
+required second argument for the second pass. Put another way,
+@samp{`$1', shift($@@)} is not the same as @samp{$@@}, since only the
+former guarantees at least two arguments.
+
+@cindex quote manipulation
+@cindex manipulating quotes
Sometimes, a recursive algorithm requires adding quotes to each element,
or treating multiple arguments as a single element:
@@ -3073,6 +3198,9 @@ undivert(`quote.m4')dnl
@result{}divert`'dnl
@end example
+It is worth pointing out that @samp{quote(@var{args})} is more efficient
+than @samp{joinall(`,', @var{args})} for producing the same output.
+
@cindex nine arguments, more than
@cindex more than nine arguments
@cindex arguments, more than nine
@@ -4449,6 +4577,9 @@ Stores @var{string} in a safe place, to be reread when end of input is
reached. As a @acronym{GNU} extension, additional arguments are
concatenated with a space to the @var{string}.
+Successive invocations of @code{m4wrap} accumulate saved text in
+first-in, first-out order, as required by @acronym{POSIX}.
+
The expansion of @code{m4wrap} is void.
The macro @code{m4wrap} is recognized only with parameters.
@end deffn
@@ -4468,16 +4599,83 @@ This is the first and last normal input line.
The saved input is only reread when the end of normal input is seen, and
not if @code{m4exit} is used to exit @code{m4}.
-@comment FIXME: this contradicts POSIX, which requires that "If the
-@comment m4wrap macro is used multiple times, the arguments specified
-@comment shall be processed in the order in which the m4wrap macros were
-@comment processed."
-It is safe to call @code{m4wrap} from saved text, but then the order in
-which the saved text is reread is undefined. If @code{m4wrap} is not used
-recursively, the saved pieces of text are reread in the opposite order
-in which they were saved (LIFO---last in, first out). However, this
-behavior is likely to change in a future release, to match
-@acronym{POSIX}, so you should not depend on this order.
+It is safe to call @code{m4wrap} from wrapped text, where all the
+recursively wrapped text is deferred until the current wrapped text is
+exhausted. As of M4 1.4.11, when @code{m4wrap} is not used recursively,
+the saved pieces of text are reread in the same order in which they were
+saved (FIFO---first in, first out), as required by @acronym{POSIX}.
+
+@example
+m4wrap(`1
+')
+@result{}
+m4wrap(`2', `3
+')
+@result{}
+^D
+@result{}1
+@result{}2 3
+@end example
+
+However, earlier versions had reverse ordering (LIFO---last in, first
+out), as this behavior is more like the semantics of the C function
+@code{atexit}. It is possible to emulate @acronym{POSIX} behavior even
+with older versions of @acronym{GNU} M4 by including the file
+@file{m4-@value{VERSION}/@/examples/@/wrapfifo.m4} from the
+distribution:
+
+@comment examples
+@example
+$ @kbd{m4 -I examples}
+undivert(`wrapfifo.m4')dnl
+@result{}dnl Redefine m4wrap to have FIFO semantics.
+@result{}define(`_m4wrap_level', `0')dnl
+@result{}define(`m4wrap',
+@result{}`ifdef(`m4wrap'_m4wrap_level,
+@result{} `define(`m4wrap'_m4wrap_level,
+@result{} defn(`m4wrap'_m4wrap_level)`$1')',
+@result{} `builtin(`m4wrap', `define(`_m4wrap_level',
+@result{} incr(_m4wrap_level))dnl
+@result{}m4wrap'_m4wrap_level)dnl
+@result{}define(`m4wrap'_m4wrap_level, `$1')')')dnl
+include(`wrapfifo.m4')
+@result{}
+m4wrap(`a`'m4wrap(`c
+', `d')')m4wrap(`b')
+@result{}
+^D
+@result{}abc
+@end example
+
+It is likewise possible to emulate LIFO behavior without resorting to
+the @acronym{GNU} M4 extension of @code{builtin}, by including the file
+@file{m4-@value{VERSION}/@/examples/@/wraplifo.m4} from the
+distribution. (Unfortunately, both examples shown here share some
+subtle bugs. See if you can find and correct them; or @pxref{Improved
+m4wrap, , Answers}).
+
+@comment examples
+@example
+$ @kbd{m4 -I examples}
+undivert(`wraplifo.m4')dnl
+@result{}dnl Redefine m4wrap to have LIFO semantics.
+@result{}define(`_m4wrap_level', `0')dnl
+@result{}define(`_m4wrap', defn(`m4wrap'))dnl
+@result{}define(`m4wrap',
+@result{}`ifdef(`m4wrap'_m4wrap_level,
+@result{} `define(`m4wrap'_m4wrap_level,
+@result{} `$1'defn(`m4wrap'_m4wrap_level))',
+@result{} `_m4wrap(`define(`_m4wrap_level', incr(_m4wrap_level))dnl
+@result{}m4wrap'_m4wrap_level)dnl
+@result{}define(`m4wrap'_m4wrap_level, `$1')')')dnl
+include(`wraplifo.m4')
+@result{}
+m4wrap(`a`'m4wrap(`c
+', `d')')m4wrap(`b')
+@result{}
+^D
+@result{}bac
+@end example
Here is an example of implementing a factorial function using
@code{m4wrap}:
@@ -4497,13 +4695,13 @@ Invocations of @code{m4wrap} at the same recursion level are
concatenated and rescanned as usual:
@example
-define(`aa', `AA
+define(`ab', `AB
')
@result{}
-m4wrap(`a')m4wrap(`a')
+m4wrap(`a')m4wrap(`b')
@result{}
^D
-@result{}AA
+@result{}AB
@end example
@noindent
@@ -6417,9 +6615,13 @@ __line__
@result{}8
__line__
@result{}11
+m4wrap(`__line__
+')
+@result{}
^D
@result{}6
@result{}6
+@result{}12
@end example
The @code{@w{__program__}} macro behaves like @samp{$0} in shell
@@ -6861,37 +7063,13 @@ of @samp{-} on the command line.
@item
@acronym{POSIX} requires @code{m4wrap} (@pxref{M4wrap}) to act in FIFO
-(first-in, first-out) order, but @acronym{GNU} @code{m4} currently uses
+(first-in, first-out) order, and most other implementations obey this.
+However, versions of @acronym{GNU} @code{m4} earlier than 1.4.11 used
LIFO order. Furthermore, @acronym{POSIX} states that only the first
argument to @code{m4wrap} is saved for later evaluation, but
@acronym{GNU} @code{m4} saves and processes all arguments, with output
separated by spaces.
-However, it is possible to emulate @acronym{POSIX} behavior by
-including the file @file{m4-@value{VERSION}/@/examples/@/wrapfifo.m4}
-from the distribution:
-
-@example
-undivert(`wrapfifo.m4')dnl
-@result{}dnl Redefine m4wrap to have FIFO semantics.
-@result{}define(`_m4wrap_level', `0')dnl
-@result{}define(`m4wrap',
-@result{}`ifdef(`m4wrap'_m4wrap_level,
-@result{} `define(`m4wrap'_m4wrap_level,
-@result{} defn(`m4wrap'_m4wrap_level)`$1')',
-@result{} `builtin(`m4wrap', `define(`_m4wrap_level',
-@result{} incr(_m4wrap_level))dnl
-@result{}m4wrap'_m4wrap_level)dnl
-@result{}define(`m4wrap'_m4wrap_level, `$1')')')dnl
-include(`wrapfifo.m4')
-@result{}
-m4wrap(`a`'m4wrap(`c
-', `d')')m4wrap(`b')
-@result{}
-^D
-@result{}abc
-@end example
-
@item
@acronym{POSIX} states that builtins that require arguments, but are
called without arguments, have undefined behavior. Traditional
@@ -7098,6 +7276,7 @@ presented here.
* Improved exch:: Solution for @code{exch}
* Improved forloop:: Solution for @code{forloop}
* Improved foreach:: Solution for @code{foreach}
+* Improved m4wrap:: Solution for @code{m4wrap}
* Improved cleardivert:: Solution for @code{cleardivert}
* Improved capitalize:: Solution for @code{capitalize}
* Improved fatal_error:: Solution for @code{fatal_error}
@@ -7549,6 +7728,77 @@ include(`loop.m4')dnl
@end ignore
+@node Improved m4wrap
+@section Solution for @code{m4wrap}
+
+The replacement @code{m4wrap} versions presented above, designed to
+guarantee FIFO or LIFO order regardless of the underlying M4
+implementation, share a bug when dealing with wrapped text that looks
+like parameter expansion. Note how the invocation of
+@code{m4wrap@var{n}} interprets these parameters, while using the
+builtin preserves them for their intended use.
+
+@comment examples
+@example
+$ @kbd{m4 -I examples}
+include(`wraplifo.m4')
+@result{}
+m4wrap(`define(`foo', ``$0:'-$1-$*-$#-')foo(`a', `b')
+')
+@result{}
+builtin(`m4wrap', ``'define(`bar', ``$0:'-$1-$*-$#-')bar(`a', `b')
+')
+@result{}
+^D
+@result{}m4wrap0:---0-
+@result{}bar:-a-a,b-2-
+@end example
+
+Additionally, the computation of @code{_m4wrap_level} and creation of
+multiple @code{m4wrap@var{n}} placeholders in the original examples is
+more expensive in time and memory than strictly necessary. Notice how
+the improved version grabs the wrapped text via @code{defn} to avoid
+parameter expansion, then undefines @code{_m4wrap_text}, before
+stripping a level of quotes with @code{_arg1} to expand the text. That
+way, each level of wrapping reuses the single placeholder, which starts
+each nesting level in an undefined state.
+
+Finally, it is worth emulating the @acronym{GNU} M4 extension of saving
+all arguments to @code{m4wrap}, separated by a space, rather than saving
+just the first argument. This is done with the @code{join} macro
+documented previously (@pxref{Shift}). The improved LIFO example is
+shipped as @file{m4-@value{VERSION}/@/examples/@/wraplifo2.m4}, and can
+easily be converted to a FIFO solution by swapping the adjacent
+invocations of @code{joinall} and @code{defn}.
+
+@comment examples
+@example
+$ @kbd{m4 -I examples}
+include(`wraplifo2.m4')
+@result{}
+undivert(`wraplifo2.m4')dnl
+@result{}dnl Redefine m4wrap to have LIFO semantics, improved example.
+@result{}include(`join.m4')dnl
+@result{}define(`_m4wrap', defn(`m4wrap'))dnl
+@result{}define(`_arg1', `$1')dnl
+@result{}define(`m4wrap',
+@result{}`ifdef(`_$0_text',
+@result{} `define(`_$0_text', joinall(` ', $@@)defn(`_$0_text'))',
+@result{} `_$0(`_arg1(defn(`_$0_text')undefine(`_$0_text'))')dnl
+@result{}define(`_$0_text', joinall(` ', $@@))')')dnl
+m4wrap(`define(`foo', ``$0:'-$1-$*-$#-')foo(`a', `b')
+')
+@result{}
+m4wrap(`lifo text
+m4wrap(`nested', `', `$@@
+')')
+@result{}
+^D
+@result{}lifo text
+@result{}foo:-a-a,b-2-
+@result{}nested $@@
+@end example
+
@node Improved cleardivert
@section Solution for @code{cleardivert}
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 3450eacb..254d2ab9 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -1,6 +1,6 @@
## Makefile.am - template for generating Makefile via Automake
##
-## Copyright (C) 2006, 2007 Free Software Foundation, Inc.
+## Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
##
## This file is part of GNU M4.
##
@@ -42,6 +42,7 @@ incl-test.m4 \
incl.m4 \
include.m4 \
indir.m4 \
+join.m4 \
loop.m4 \
misc.m4 \
multiquotes.m4 \
@@ -62,4 +63,5 @@ undivert.incl \
undivert.m4 \
wrap.m4 \
wrapfifo.m4 \
-wraplifo.m4
+wraplifo.m4 \
+wraplifo2.m4
diff --git a/examples/join.m4 b/examples/join.m4
new file mode 100644
index 00000000..8687ac70
--- /dev/null
+++ b/examples/join.m4
@@ -0,0 +1,15 @@
+divert(`-1')
+# join(sep, args) - join each non-empty ARG into a single
+# string, with each element separated by SEP
+define(`join',
+`ifelse(`$#', `2', ``$2'',
+ `ifelse(`$2', `', `', ``$2'_')$0(`$1', shift(shift($@)))')')
+define(`_join',
+`ifelse(`$#$2', `2', `',
+ `ifelse(`$2', `', `', ``$1$2'')$0(`$1', shift(shift($@)))')')
+# joinall(sep, args) - join each ARG, including empty ones,
+# into a single string, with each element separated by SEP
+define(`joinall', ``$2'_$0(`$1', shift($@))')
+define(`_joinall',
+`ifelse(`$#', `2', `', ``$1$3'$0(`$1', shift(shift($@)))')')
+divert`'dnl
diff --git a/examples/wraplifo2.m4 b/examples/wraplifo2.m4
new file mode 100644
index 00000000..5b450a76
--- /dev/null
+++ b/examples/wraplifo2.m4
@@ -0,0 +1,9 @@
+dnl Redefine m4wrap to have LIFO semantics, improved example.
+include(`join.m4')dnl
+define(`_m4wrap', defn(`m4wrap'))dnl
+define(`_arg1', `$1')dnl
+define(`m4wrap',
+`ifdef(`_$0_text',
+ `define(`_$0_text', joinall(` ', $@)defn(`_$0_text'))',
+ `_$0(`_arg1(defn(`_$0_text')undefine(`_$0_text'))')dnl
+define(`_$0_text', joinall(` ', $@))')')dnl
diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4
index 1a3434c0..be74a73c 100644
--- a/m4/gnulib-cache.m4
+++ b/m4/gnulib-cache.m4
@@ -1,4 +1,4 @@
-# Copyright (C) 2004-2007 Free Software Foundation, Inc.
+# Copyright (C) 2002-2008 Free Software Foundation, Inc.
#
# This file is free software, distributed under the terms of the GNU
# General Public License. As a special exception to the GNU General
diff --git a/src/builtin.c b/src/builtin.c
index 9dbb4693..c41aa544 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -667,7 +667,13 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode)
switch (arg_type (argv, 2))
{
+ case TOKEN_COMP:
+ m4_warn (0, me, _("cannot concatenate builtins"));
+ // TODO fall through instead
+ break;
+
case TOKEN_TEXT:
+ // TODO flatten TOKEN_COMP value, or support concatenation of builtins
define_user_macro (ARG (1), ARG_LEN (1), ARG (2), mode);
break;
@@ -1608,25 +1614,20 @@ m4_m4exit (struct obstack *obs, int argc, macro_arguments *argv)
exit (exit_code);
}
-/*-------------------------------------------------------------------------.
-| Save the argument text until EOF has been seen, allowing for user |
-| specified cleanup action. GNU version saves all arguments, the standard |
-| version only the first. |
-`-------------------------------------------------------------------------*/
+/*-----------------------------------------------------------------.
+| Save the argument text in FIFO order until EOF has been seen, |
+| allowing for user specified cleanup action. Extra arguments are |
+| saved when not in POSIX mode. |
+`-----------------------------------------------------------------*/
static void
m4_m4wrap (struct obstack *obs, int argc, macro_arguments *argv)
{
if (bad_argc (ARG (0), argc, 1, -1))
return;
- obs = push_wrapup_init ();
- if (no_gnu_extensions)
- obstack_grow (obs, ARG (1), ARG_LEN (1));
- else
- // TODO - allow builtins, rather than always flattening
- arg_print (obs, argv, 1, NULL, true, " ", NULL, false);
- push_wrapup_finish ();
+ wrap_args (argv);
}
+
/* Enable tracing of all specified macros, or all, if none is specified.
Tracing is disabled by default, when a macro is defined. This can be
diff --git a/src/input.c b/src/input.c
index cc40a587..a6fcc854 100644
--- a/src/input.c
+++ b/src/input.c
@@ -532,12 +532,36 @@ struct obstack *
push_wrapup_init (void)
{
input_block *i;
- i = (input_block *) obstack_alloc (wrapup_stack, sizeof *i);
- i->prev = wsp;
- i->type = INPUT_STRING;
- i->file = current_file;
- i->line = current_line;
- wsp = i;
+ token_chain *chain;
+
+ assert (obstack_object_size (wrapup_stack) == 0);
+ if (wsp)
+ {
+ i = wsp;
+ assert (i->type == INPUT_CHAIN && i->u.u_c.end
+ && i->u.u_c.end->type != CHAIN_LOC);
+ }
+ else
+ {
+ i = (input_block *) obstack_alloc (wrapup_stack, sizeof *i);
+ i->prev = wsp;
+ i->file = current_file;
+ i->line = current_line;
+ i->type = INPUT_CHAIN;
+ i->u.u_c.chain = i->u.u_c.end = NULL;
+ wsp = i;
+ }
+ chain = (token_chain *) obstack_alloc (wrapup_stack, sizeof *chain);
+ if (i->u.u_c.end)
+ i->u.u_c.end->next = chain;
+ else
+ i->u.u_c.chain = chain;
+ i->u.u_c.end = chain;
+ chain->next = NULL;
+ chain->type = CHAIN_LOC;
+ chain->quote_age = 0;
+ chain->u.u_l.file = current_file;
+ chain->u.u_l.line = current_line;
return wrapup_stack;
}
@@ -548,17 +572,7 @@ push_wrapup_init (void)
void
push_wrapup_finish (void)
{
- input_block *i = wsp;
- if (obstack_object_size (wrapup_stack) == 0)
- {
- wsp = i->prev;
- obstack_free (wrapup_stack, i);
- }
- else
- {
- i->u.u_s.len = obstack_object_size (wrapup_stack);
- i->u.u_s.str = (char *) obstack_finish (wrapup_stack);
- }
+ make_text_link (wrapup_stack, &wsp->u.u_c.chain, &wsp->u.u_c.end);
}
@@ -613,6 +627,8 @@ pop_input (bool cleanup)
return false;
arg_adjust_refcount (chain->u.u_a.argv, false);
break;
+ case CHAIN_LOC:
+ return false;
default:
assert (!"pop_input");
abort ();
@@ -837,6 +853,8 @@ peek_input (bool allow_argv)
chain->u.u_a.comma = true;
push_string_finish ();
return peek_input (allow_argv);
+ case CHAIN_LOC:
+ break;
default:
assert (!"peek_input");
abort ();
@@ -863,16 +881,18 @@ peek_input (bool allow_argv)
| string, so factor that out into a macro for speed. If |
| ALLOW_QUOTE, and the current input matches the current quote age, |
| return CHAR_QUOTE and leave consumption of data for |
-| append_quote_token. |
+| append_quote_token; otherwise, if ALLOW_ARGV and the current input |
+| matches an argv reference with the correct quoting, return |
+| CHAR_ARGV and leave consuption of data for init_argv_token. |
`-------------------------------------------------------------------*/
-#define next_char(AQ) \
+#define next_char(AQ, AA) \
(isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \
? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \
- : next_char_1 (AQ))
+ : next_char_1 (AQ, AA))
static int
-next_char_1 (bool allow_quote)
+next_char_1 (bool allow_quote, bool allow_argv)
{
int ch;
token_chain *chain;
@@ -929,6 +949,7 @@ next_char_1 (bool allow_quote)
chain = isp->u.u_c.chain;
while (chain)
{
+ unsigned int argc;
if (allow_quote && chain->quote_age == current_quote_age)
return CHAR_QUOTE;
switch (chain->type)
@@ -949,7 +970,8 @@ next_char_1 (bool allow_quote)
return CHAR_MACRO;
break;
case CHAIN_ARGV:
- if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv))
+ argc = arg_argc (chain->u.u_a.argv);
+ if (chain->u.u_a.index == argc)
{
arg_adjust_refcount (chain->u.u_a.argv, false);
break;
@@ -959,6 +981,12 @@ next_char_1 (bool allow_quote)
chain->u.u_a.comma = false;
return ',';
}
+ /* Only return a reference if the quoting is correct
+ and the reference has more than one argument
+ left. */
+ if (allow_argv && chain->quote_age == current_quote_age
+ && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc)
+ return CHAR_ARGV;
/* Rather than directly parse argv here, we push
another input block containing the next unparsed
argument from argv. */
@@ -970,7 +998,13 @@ next_char_1 (bool allow_quote)
chain->u.u_a.index++;
chain->u.u_a.comma = true;
push_string_finish ();
- return next_char_1 (allow_quote);
+ return next_char_1 (allow_quote, allow_argv);
+ case CHAIN_LOC:
+ isp->file = chain->u.u_l.file;
+ isp->line = chain->u.u_l.line;
+ input_change = true;
+ isp->u.u_c.chain = chain->next;
+ return next_char_1 (allow_quote, allow_argv);
default:
assert (!"next_char_1");
abort ();
@@ -1002,7 +1036,7 @@ skip_line (const char *name)
const char *file = current_file;
int line = current_line;
- while ((ch = next_char (false)) != CHAR_EOF && ch != '\n')
+ while ((ch = next_char (false, false)) != CHAR_EOF && ch != '\n')
;
if (ch == CHAR_EOF)
/* current_file changed to "" if we see CHAR_EOF, use the
@@ -1028,25 +1062,28 @@ skip_line (const char *name)
static void
init_macro_token (token_data *td)
{
- int ch = next_char (false);
- assert (ch == CHAR_MACRO);
- if (td)
- TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
+ token_chain *chain;
+
if (isp->type == INPUT_MACRO)
{
assert (isp->u.func);
if (td)
- TOKEN_DATA_FUNC (td) = isp->u.func;
+ {
+ TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
+ TOKEN_DATA_FUNC (td) = isp->u.func;
+ }
isp->u.func = NULL;
}
else
{
- token_chain *chain;
assert (isp->type == INPUT_CHAIN);
chain = isp->u.u_c.chain;
assert (!chain->quote_age && chain->type == CHAIN_FUNC && chain->u.func);
if (td)
- TOKEN_DATA_FUNC (td) = chain->u.func;
+ {
+ TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
+ TOKEN_DATA_FUNC (td) = chain->u.func;
+ }
chain->u.func = NULL;
}
}
@@ -1108,9 +1145,9 @@ init_argv_token (struct obstack *obs, token_data *td)
{
token_chain *src_chain;
token_chain *chain;
- int ch = next_char (true);
+ int ch;
- assert (ch == CHAR_QUOTE && TOKEN_DATA_TYPE (td) == TOKEN_VOID
+ assert (TOKEN_DATA_TYPE (td) == TOKEN_VOID
&& isp->type == INPUT_CHAIN && isp->u.u_c.chain->type == CHAIN_ARGV
&& obs && obstack_object_size (obs) == 0);
@@ -1146,7 +1183,7 @@ init_argv_token (struct obstack *obs, token_data *td)
decreased once the final element is parsed. */
assert (*curr_comm.str1 != ',' && *curr_comm.str1 != ')'
&& *curr_comm.str1 != *curr_quote.str1);
- ch = peek_input (false);
+ ch = peek_input (true);
if (ch != ',' && ch != ')')
{
isp->u.u_c.chain = src_chain;
@@ -1181,14 +1218,14 @@ match_input (const char *s, bool consume)
if (s[1] == '\0')
{
if (consume)
- next_char (false);
+ next_char (false, false);
return true; /* short match */
}
- next_char (false);
+ next_char (false, false);
for (n = 1, t = s++; (ch = peek_input (false)) == to_uchar (*s++); )
{
- next_char (false);
+ next_char (false, false);
n++;
if (*s == '\0') /* long match */
{
@@ -1558,15 +1595,13 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
if (!line)
line = &dummy;
- /* Can't consume character until after CHAR_MACRO is handled. */
TOKEN_DATA_TYPE (td) = TOKEN_VOID;
- ch = peek_input (allow_argv && current_quote_age);
+ ch = next_char (false, allow_argv && current_quote_age);
if (ch == CHAR_EOF)
{
#ifdef DEBUG_INPUT
xfprintf (stderr, "next_token -> EOF\n");
#endif /* DEBUG_INPUT */
- next_char (false);
return TOKEN_EOF;
}
if (ch == CHAR_MACRO)
@@ -1590,7 +1625,6 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
return TOKEN_ARGV;
}
- next_char (false); /* Consume character we already peeked at. */
file = current_file;
*line = current_line;
if (MATCH (ch, curr_comm.str1, true))
@@ -1600,7 +1634,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
obstack_grow (obs_td, curr_comm.str1, curr_comm.len1);
while (1)
{
- ch = next_char (false);
+ ch = next_char (false, false);
if (ch == CHAR_EOF)
/* Current_file changed to "" if we see CHAR_EOF, use the
previous value we stored earlier. */
@@ -1631,7 +1665,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
&& (isalnum (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
- next_char (false);
+ next_char (false, false);
}
type = TOKEN_WORD;
}
@@ -1654,7 +1688,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
obstack_blank (&token_stack, -1);
break;
}
- next_char (false);
+ next_char (false, false);
}
obstack_1grow (&token_stack, '\0');
@@ -1699,7 +1733,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
type = TOKEN_STRING;
while (1)
{
- ch = next_char (obs != NULL && current_quote_age);
+ ch = next_char (obs != NULL && current_quote_age, false);
if (ch == CHAR_EOF)
/* Current_file changed to "" if we see CHAR_EOF, use
the previous value we stored earlier. */
@@ -1723,7 +1757,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
xfprintf (stderr, "next_token -> MACDEF (%s)\n",
bp->name);
#endif
- ch = next_char (false);
+ ch = next_char (false, false);
MATCH (ch, curr_quote.str2, true);
return TOKEN_MACDEF;
}
diff --git a/src/m4.h b/src/m4.h
index aa92abab..a0204c82 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -284,7 +284,8 @@ enum token_chain_type
{
CHAIN_STR, /* Link contains a string, u.u_s is valid. */
CHAIN_FUNC, /* Builtin function definition, u.func is valid. */
- CHAIN_ARGV /* Link contains a $@ reference, u.u_a is valid. */
+ CHAIN_ARGV, /* Link contains a $@ reference, u.u_a is valid. */
+ CHAIN_LOC /* Link contains location of m4wrap, u.u_l is valid. */
};
/* Composite tokens are built of a linked list of chains. Each link
@@ -316,6 +317,12 @@ struct token_chain
const string_pair *quotes; /* NULL for $*, quotes for $@. */
}
u_a;
+ struct
+ {
+ const char *file; /* File where subsequent links originate. */
+ int line; /* Line where subsequent links originate. */
+ }
+ u_l;
}
u;
};
@@ -508,6 +515,7 @@ void push_arg (struct obstack *, macro_arguments *, unsigned int);
void push_arg_quote (struct obstack *, macro_arguments *, unsigned int,
const string_pair *);
void push_args (struct obstack *, macro_arguments *, bool, bool);
+void wrap_args (macro_arguments *);
/* Grab the text at argv index I. Assumes macro_argument *argv is in
scope, and aborts if the argument is not text. */
diff --git a/src/macro.c b/src/macro.c
index d6e92728..b6d8d3e4 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -946,11 +946,8 @@ arg_text (macro_arguments *argv, unsigned int index)
case CHAIN_STR:
obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
break;
- case CHAIN_FUNC:
- // TODO concatenate builtins
- assert (!"implemented");
- abort ();
case CHAIN_ARGV:
+ assert (!chain->u.u_a.has_func || argv->flatten);
arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
quote_cache (NULL, chain->quote_age,
chain->u.u_a.quotes),
@@ -1510,3 +1507,66 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
if (push_token (token, -1, argv->inuse))
arg_mark (argv);
}
+
+/* Push arguments from ARGV, which can include builtins, onto the wrap
+ stack for later rescanning. If GNU extensions are disabled, only
+ the first argument is pushed; otherwise, all arguments are pushed
+ and separated with a space. */
+void
+wrap_args (macro_arguments *argv)
+{
+ int i;
+ struct obstack *obs;
+ token_data *token;
+ token_chain *chain;
+
+ if ((argv->argc == 2 || no_gnu_extensions) && arg_empty (argv, 1))
+ return;
+
+ obs = push_wrapup_init ();
+ for (i = 1; i < (no_gnu_extensions ? 2 : argv->argc); i++)
+ {
+ if (i != 1)
+ obstack_1grow (obs, ' ');
+ token = arg_token (argv, i, NULL, false);
+ switch (TOKEN_DATA_TYPE (token))
+ {
+ case TOKEN_TEXT:
+ obstack_grow (obs, TOKEN_DATA_TEXT (token), TOKEN_DATA_LEN (token));
+ break;
+ case TOKEN_FUNC:
+ // TODO allow builtins through m4wrap
+ assert (false);
+ case TOKEN_COMP:
+ chain = token->u.u_c.chain;
+ while (chain)
+ {
+ switch (chain->type)
+ {
+ case CHAIN_STR:
+ obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
+ break;
+ case CHAIN_FUNC:
+ // TODO allow builtins through m4wrap
+ assert (false);
+ break;
+ case CHAIN_ARGV:
+ arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index,
+ quote_cache (NULL, chain->quote_age,
+ chain->u.u_a.quotes),
+ chain->u.u_a.flatten, NULL, NULL, false);
+ break;
+ default:
+ assert (!"wrap_args");
+ abort ();
+ }
+ chain = chain->next;
+ }
+ break;
+ default:
+ assert (!"wrap_args");
+ abort ();
+ }
+ }
+ push_wrapup_finish ();
+}