diff options
author | Eric Blake <ebb9@byu.net> | 2007-12-03 11:53:45 -0700 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-03-17 07:06:17 -0600 |
commit | f7f45337fa1bfba9512841e8d3d2251359944681 (patch) | |
tree | ad5eaad82fe1e706ced0ab6d25173e83686ad28d | |
parent | 434656c96d6486cf959c3050aa85aecb72d948a0 (diff) | |
download | m4-f7f45337fa1bfba9512841e8d3d2251359944681.tar.gz |
Stage20: make m4wrap obey POSIX fifo ordering
-rw-r--r-- | NEWS | 4 | ||||
-rw-r--r-- | doc/m4.texinfo | 342 | ||||
-rw-r--r-- | examples/Makefile.am | 6 | ||||
-rw-r--r-- | examples/join.m4 | 15 | ||||
-rw-r--r-- | examples/wraplifo2.m4 | 9 | ||||
-rw-r--r-- | m4/gnulib-cache.m4 | 2 | ||||
-rw-r--r-- | src/builtin.c | 25 | ||||
-rw-r--r-- | src/input.c | 126 | ||||
-rw-r--r-- | src/m4.h | 10 | ||||
-rw-r--r-- | src/macro.c | 68 |
10 files changed, 495 insertions, 112 deletions
@@ -8,6 +8,10 @@ Foundation, Inc. ** Fix regression introduced in 1.4.10b where using `builtin' or `indir' to perform nested `shift' calls triggered an assertion failure. +** Fix the `m4wrap' builtin to accumulate wrapped text in FIFO order, as + required by POSIX. The manual mentions a way to restore the LIFO order + present in earlier GNU M4 versions. + ** Enhance the `ifdef', `ifelse', and `shift' builtins, as well as all user macros, to transparently handle builtin tokens generated by `defn'. diff --git a/doc/m4.texinfo b/doc/m4.texinfo index b271e92a..00cff97a 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -269,6 +269,7 @@ Correct version of some examples * Improved exch:: Solution for @code{exch} * Improved forloop:: Solution for @code{forloop} * Improved foreach:: Solution for @code{foreach} +* Improved m4wrap:: Solution for @code{m4wrap} * Improved cleardivert:: Solution for @code{cleardivert} * Improved capitalize:: Solution for @code{capitalize} * Improved fatal_error:: Solution for @code{fatal_error} @@ -2283,25 +2284,40 @@ builtin token is preserved only when it occurs in isolation. A future version of @acronym{GNU} M4 may lift these restrictions. @example +$ @kbd{m4 -d} define(`a', `A')define(`AA', `b') @result{} +traceon(`defn', `define') +@result{} defn(`a', `divnum', `a') -@error{}m4:stdin:2: Warning: defn: cannot concatenate builtin `divnum' +@error{}m4:stdin:3: Warning: defn: cannot concatenate builtin `divnum' +@error{}m4trace: -1- defn(`a', `divnum', `a') -> ``A'`A'' @result{}AA define(`mydivnum', defn(`divnum', `divnum'))mydivnum -@error{}m4:stdin:3: Warning: defn: cannot concatenate builtin `divnum' -@error{}m4:stdin:3: Warning: defn: cannot concatenate builtin `divnum' +@error{}m4:stdin:4: Warning: defn: cannot concatenate builtin `divnum' +@error{}m4:stdin:4: Warning: defn: cannot concatenate builtin `divnum' +@error{}m4trace: -2- defn(`divnum', `divnum') +@error{}m4trace: -1- define(`mydivnum', `') +@result{} +traceoff(`defn', `define') @result{} define(`mydivnum', defn(`divnum')defn(`divnum'))mydivnum -@error{}m4:stdin:4: Warning: define: cannot concatenate builtin `divnum' -@error{}m4:stdin:4: Warning: define: cannot concatenate builtin `divnum' +@error{}m4:stdin:6: Warning: define: cannot concatenate builtin `divnum' +@error{}m4:stdin:6: Warning: define: cannot concatenate builtin `divnum' @result{} define(`mydivnum', defn(`divnum')`a')mydivnum -@error{}m4:stdin:5: Warning: define: cannot concatenate builtin `divnum' +@error{}m4:stdin:7: Warning: define: cannot concatenate builtin `divnum' @result{}A define(`mydivnum', `a'defn(`divnum'))mydivnum -@error{}m4:stdin:6: Warning: define: cannot concatenate builtin `divnum' +@error{}m4:stdin:8: Warning: define: cannot concatenate builtin `divnum' @result{}A +define(`q', ``$@@'') +@result{} +define(`foo', q(`a', defn(`divnum')))foo +@error{}m4:stdin:10: Warning: define: cannot quote builtin +@result{}a, +ifdef(`foo', `yes', `no') +@result{}yes @end example @node Pushdef @@ -2930,6 +2946,8 @@ shift(`foo', `bar', `baz') An example of the use of @code{shift} is this macro: +@cindex reversing arguments +@cindex arguments, reversing @deffn Composite reverse (@dots{}) Takes any number of arguments, and reverses their order. @end deffn @@ -3007,6 +3025,113 @@ example2(`feeling rather indecisive today') @result{}default answer: 4 @end example +@cindex joining arguments +@cindex arguments, joining +@cindex concatenating arguments +Another common task that requires iteration is joining a list of +arguments into a single string. + +@deffn Composite join (@ovar{separator}, @ovar{args@dots{}}) +@deffnx Composite joinall (@ovar{separator}, @ovar{args@dots{}}) +Generate a single-quoted string, consisting of each @var{arg} separated +by @var{separator}. While @code{joinall} always outputs a +@var{separator} between arguments, @code{join} avoids the +@var{separator} for an empty @var{arg}. +@end deffn + +Here are some examples of its usage, based on the implementation +@file{m4-@value{VERSION}/@/examples/@/join.m4} distributed in this +package: + +@comment examples +@example +$ @kbd{m4 -I examples} +include(`join.m4') +@result{} +join,join(`-'),join(`-', `'),join(`-', `', `') +@result{},,, +joinall,joinall(`-'),joinall(`-', `'),joinall(`-', `', `') +@result{},,,- +join(`-', `1') +@result{}1 +join(`-', `1', `2', `3') +@result{}1-2-3 +join(`', `1', `2', `3') +@result{}123 +join(`-', `', `1', `', `', `2', `') +@result{}1-2 +joinall(`-', `', `1', `', `', `2', `') +@result{}-1---2- +join(`,', `1', `2', `3') +@result{}1,2,3 +define(`nargs', `$#')dnl +nargs(join(`,', `1', `2', `3')) +@result{}1 +@end example + +Examining the implementation shows some interesting points about several +m4 programming idioms. + +@comment examples +@example +$ @kbd{m4 -I examples} +undivert(`join.m4')dnl +@result{}divert(`-1') +@result{}# join(sep, args) - join each non-empty ARG into a single +@result{}# string, with each element separated by SEP +@result{}define(`join', +@result{}`ifelse(`$#', `2', ``$2'', +@result{} `ifelse(`$2', `', `', ``$2'_')$0(`$1', shift(shift($@@)))')') +@result{}define(`_join', +@result{}`ifelse(`$#$2', `2', `', +@result{} `ifelse(`$2', `', `', ``$1$2'')$0(`$1', shift(shift($@@)))')') +@result{}# joinall(sep, args) - join each ARG, including empty ones, +@result{}# into a single string, with each element separated by SEP +@result{}define(`joinall', ``$2'_$0(`$1', shift($@@))') +@result{}define(`_joinall', +@result{}`ifelse(`$#', `2', `', ``$1$3'$0(`$1', shift(shift($@@)))')') +@result{}divert`'dnl +@end example + +First, notice that this implementation creates helper macros +@code{_join} and @code{_joinall}. This division of labor makes it +easier to output the correct number of @var{separator} instances: +@code{join} and @code{joinall} are responsible for the first argument, +without a separator, while @code{_join} and @code{_joinall} are +responsible for all remaining arguments, always outputting a separator +when outputting an argument. + +Next, observe how @code{join} decides to iterate to itself, because the +first @var{arg} was empty, or to output the argument and swap over to +@code{_join}. If the argument is non-empty, then the nested +@code{ifelse} results in an unquoted @samp{_}, which is concatenated +with the @samp{$0} to form the next macro name to invoke. The +@code{joinall} implementation is simpler since it does not have to +suppress empty @var{arg}; it always executes once then defers to +@code{_joinall}. + +Another important idiom is the idea that @var{separator} is reused for +each iteration. Each iteration has one less argument, but rather than +discarding @samp{$1} by iterating with @code{$0(shift($@@))}, the macro +discards @samp{$2} by using @code{$0(`$1', shift(shift($@@)))}. + +Next, notice that it is possible to compare more than one condition in a +single @code{ifelse} test. The test of @samp{$#$2} against @samp{2} +allows @code{_join} to iterate for two separate reasons---either there +are still more than two arguments, or there are exactly two arguments +but the last argument is not empty. + +Finally, notice that these macros require exactly two arguments to +terminate recursion, but that they still correctly result in empty +output when given no @var{args} (i.e., zero or one macro argument). On +the first pass when there are too few arguments, the @code{shift} +results in no output, but leaves an empty string to serve as the +required second argument for the second pass. Put another way, +@samp{`$1', shift($@@)} is not the same as @samp{$@@}, since only the +former guarantees at least two arguments. + +@cindex quote manipulation +@cindex manipulating quotes Sometimes, a recursive algorithm requires adding quotes to each element, or treating multiple arguments as a single element: @@ -3073,6 +3198,9 @@ undivert(`quote.m4')dnl @result{}divert`'dnl @end example +It is worth pointing out that @samp{quote(@var{args})} is more efficient +than @samp{joinall(`,', @var{args})} for producing the same output. + @cindex nine arguments, more than @cindex more than nine arguments @cindex arguments, more than nine @@ -4449,6 +4577,9 @@ Stores @var{string} in a safe place, to be reread when end of input is reached. As a @acronym{GNU} extension, additional arguments are concatenated with a space to the @var{string}. +Successive invocations of @code{m4wrap} accumulate saved text in +first-in, first-out order, as required by @acronym{POSIX}. + The expansion of @code{m4wrap} is void. The macro @code{m4wrap} is recognized only with parameters. @end deffn @@ -4468,16 +4599,83 @@ This is the first and last normal input line. The saved input is only reread when the end of normal input is seen, and not if @code{m4exit} is used to exit @code{m4}. -@comment FIXME: this contradicts POSIX, which requires that "If the -@comment m4wrap macro is used multiple times, the arguments specified -@comment shall be processed in the order in which the m4wrap macros were -@comment processed." -It is safe to call @code{m4wrap} from saved text, but then the order in -which the saved text is reread is undefined. If @code{m4wrap} is not used -recursively, the saved pieces of text are reread in the opposite order -in which they were saved (LIFO---last in, first out). However, this -behavior is likely to change in a future release, to match -@acronym{POSIX}, so you should not depend on this order. +It is safe to call @code{m4wrap} from wrapped text, where all the +recursively wrapped text is deferred until the current wrapped text is +exhausted. As of M4 1.4.11, when @code{m4wrap} is not used recursively, +the saved pieces of text are reread in the same order in which they were +saved (FIFO---first in, first out), as required by @acronym{POSIX}. + +@example +m4wrap(`1 +') +@result{} +m4wrap(`2', `3 +') +@result{} +^D +@result{}1 +@result{}2 3 +@end example + +However, earlier versions had reverse ordering (LIFO---last in, first +out), as this behavior is more like the semantics of the C function +@code{atexit}. It is possible to emulate @acronym{POSIX} behavior even +with older versions of @acronym{GNU} M4 by including the file +@file{m4-@value{VERSION}/@/examples/@/wrapfifo.m4} from the +distribution: + +@comment examples +@example +$ @kbd{m4 -I examples} +undivert(`wrapfifo.m4')dnl +@result{}dnl Redefine m4wrap to have FIFO semantics. +@result{}define(`_m4wrap_level', `0')dnl +@result{}define(`m4wrap', +@result{}`ifdef(`m4wrap'_m4wrap_level, +@result{} `define(`m4wrap'_m4wrap_level, +@result{} defn(`m4wrap'_m4wrap_level)`$1')', +@result{} `builtin(`m4wrap', `define(`_m4wrap_level', +@result{} incr(_m4wrap_level))dnl +@result{}m4wrap'_m4wrap_level)dnl +@result{}define(`m4wrap'_m4wrap_level, `$1')')')dnl +include(`wrapfifo.m4') +@result{} +m4wrap(`a`'m4wrap(`c +', `d')')m4wrap(`b') +@result{} +^D +@result{}abc +@end example + +It is likewise possible to emulate LIFO behavior without resorting to +the @acronym{GNU} M4 extension of @code{builtin}, by including the file +@file{m4-@value{VERSION}/@/examples/@/wraplifo.m4} from the +distribution. (Unfortunately, both examples shown here share some +subtle bugs. See if you can find and correct them; or @pxref{Improved +m4wrap, , Answers}). + +@comment examples +@example +$ @kbd{m4 -I examples} +undivert(`wraplifo.m4')dnl +@result{}dnl Redefine m4wrap to have LIFO semantics. +@result{}define(`_m4wrap_level', `0')dnl +@result{}define(`_m4wrap', defn(`m4wrap'))dnl +@result{}define(`m4wrap', +@result{}`ifdef(`m4wrap'_m4wrap_level, +@result{} `define(`m4wrap'_m4wrap_level, +@result{} `$1'defn(`m4wrap'_m4wrap_level))', +@result{} `_m4wrap(`define(`_m4wrap_level', incr(_m4wrap_level))dnl +@result{}m4wrap'_m4wrap_level)dnl +@result{}define(`m4wrap'_m4wrap_level, `$1')')')dnl +include(`wraplifo.m4') +@result{} +m4wrap(`a`'m4wrap(`c +', `d')')m4wrap(`b') +@result{} +^D +@result{}bac +@end example Here is an example of implementing a factorial function using @code{m4wrap}: @@ -4497,13 +4695,13 @@ Invocations of @code{m4wrap} at the same recursion level are concatenated and rescanned as usual: @example -define(`aa', `AA +define(`ab', `AB ') @result{} -m4wrap(`a')m4wrap(`a') +m4wrap(`a')m4wrap(`b') @result{} ^D -@result{}AA +@result{}AB @end example @noindent @@ -6417,9 +6615,13 @@ __line__ @result{}8 __line__ @result{}11 +m4wrap(`__line__ +') +@result{} ^D @result{}6 @result{}6 +@result{}12 @end example The @code{@w{__program__}} macro behaves like @samp{$0} in shell @@ -6861,37 +7063,13 @@ of @samp{-} on the command line. @item @acronym{POSIX} requires @code{m4wrap} (@pxref{M4wrap}) to act in FIFO -(first-in, first-out) order, but @acronym{GNU} @code{m4} currently uses +(first-in, first-out) order, and most other implementations obey this. +However, versions of @acronym{GNU} @code{m4} earlier than 1.4.11 used LIFO order. Furthermore, @acronym{POSIX} states that only the first argument to @code{m4wrap} is saved for later evaluation, but @acronym{GNU} @code{m4} saves and processes all arguments, with output separated by spaces. -However, it is possible to emulate @acronym{POSIX} behavior by -including the file @file{m4-@value{VERSION}/@/examples/@/wrapfifo.m4} -from the distribution: - -@example -undivert(`wrapfifo.m4')dnl -@result{}dnl Redefine m4wrap to have FIFO semantics. -@result{}define(`_m4wrap_level', `0')dnl -@result{}define(`m4wrap', -@result{}`ifdef(`m4wrap'_m4wrap_level, -@result{} `define(`m4wrap'_m4wrap_level, -@result{} defn(`m4wrap'_m4wrap_level)`$1')', -@result{} `builtin(`m4wrap', `define(`_m4wrap_level', -@result{} incr(_m4wrap_level))dnl -@result{}m4wrap'_m4wrap_level)dnl -@result{}define(`m4wrap'_m4wrap_level, `$1')')')dnl -include(`wrapfifo.m4') -@result{} -m4wrap(`a`'m4wrap(`c -', `d')')m4wrap(`b') -@result{} -^D -@result{}abc -@end example - @item @acronym{POSIX} states that builtins that require arguments, but are called without arguments, have undefined behavior. Traditional @@ -7098,6 +7276,7 @@ presented here. * Improved exch:: Solution for @code{exch} * Improved forloop:: Solution for @code{forloop} * Improved foreach:: Solution for @code{foreach} +* Improved m4wrap:: Solution for @code{m4wrap} * Improved cleardivert:: Solution for @code{cleardivert} * Improved capitalize:: Solution for @code{capitalize} * Improved fatal_error:: Solution for @code{fatal_error} @@ -7549,6 +7728,77 @@ include(`loop.m4')dnl @end ignore +@node Improved m4wrap +@section Solution for @code{m4wrap} + +The replacement @code{m4wrap} versions presented above, designed to +guarantee FIFO or LIFO order regardless of the underlying M4 +implementation, share a bug when dealing with wrapped text that looks +like parameter expansion. Note how the invocation of +@code{m4wrap@var{n}} interprets these parameters, while using the +builtin preserves them for their intended use. + +@comment examples +@example +$ @kbd{m4 -I examples} +include(`wraplifo.m4') +@result{} +m4wrap(`define(`foo', ``$0:'-$1-$*-$#-')foo(`a', `b') +') +@result{} +builtin(`m4wrap', ``'define(`bar', ``$0:'-$1-$*-$#-')bar(`a', `b') +') +@result{} +^D +@result{}m4wrap0:---0- +@result{}bar:-a-a,b-2- +@end example + +Additionally, the computation of @code{_m4wrap_level} and creation of +multiple @code{m4wrap@var{n}} placeholders in the original examples is +more expensive in time and memory than strictly necessary. Notice how +the improved version grabs the wrapped text via @code{defn} to avoid +parameter expansion, then undefines @code{_m4wrap_text}, before +stripping a level of quotes with @code{_arg1} to expand the text. That +way, each level of wrapping reuses the single placeholder, which starts +each nesting level in an undefined state. + +Finally, it is worth emulating the @acronym{GNU} M4 extension of saving +all arguments to @code{m4wrap}, separated by a space, rather than saving +just the first argument. This is done with the @code{join} macro +documented previously (@pxref{Shift}). The improved LIFO example is +shipped as @file{m4-@value{VERSION}/@/examples/@/wraplifo2.m4}, and can +easily be converted to a FIFO solution by swapping the adjacent +invocations of @code{joinall} and @code{defn}. + +@comment examples +@example +$ @kbd{m4 -I examples} +include(`wraplifo2.m4') +@result{} +undivert(`wraplifo2.m4')dnl +@result{}dnl Redefine m4wrap to have LIFO semantics, improved example. +@result{}include(`join.m4')dnl +@result{}define(`_m4wrap', defn(`m4wrap'))dnl +@result{}define(`_arg1', `$1')dnl +@result{}define(`m4wrap', +@result{}`ifdef(`_$0_text', +@result{} `define(`_$0_text', joinall(` ', $@@)defn(`_$0_text'))', +@result{} `_$0(`_arg1(defn(`_$0_text')undefine(`_$0_text'))')dnl +@result{}define(`_$0_text', joinall(` ', $@@))')')dnl +m4wrap(`define(`foo', ``$0:'-$1-$*-$#-')foo(`a', `b') +') +@result{} +m4wrap(`lifo text +m4wrap(`nested', `', `$@@ +')') +@result{} +^D +@result{}lifo text +@result{}foo:-a-a,b-2- +@result{}nested $@@ +@end example + @node Improved cleardivert @section Solution for @code{cleardivert} diff --git a/examples/Makefile.am b/examples/Makefile.am index 3450eacb..254d2ab9 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -1,6 +1,6 @@ ## Makefile.am - template for generating Makefile via Automake ## -## Copyright (C) 2006, 2007 Free Software Foundation, Inc. +## Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc. ## ## This file is part of GNU M4. ## @@ -42,6 +42,7 @@ incl-test.m4 \ incl.m4 \ include.m4 \ indir.m4 \ +join.m4 \ loop.m4 \ misc.m4 \ multiquotes.m4 \ @@ -62,4 +63,5 @@ undivert.incl \ undivert.m4 \ wrap.m4 \ wrapfifo.m4 \ -wraplifo.m4 +wraplifo.m4 \ +wraplifo2.m4 diff --git a/examples/join.m4 b/examples/join.m4 new file mode 100644 index 00000000..8687ac70 --- /dev/null +++ b/examples/join.m4 @@ -0,0 +1,15 @@ +divert(`-1') +# join(sep, args) - join each non-empty ARG into a single +# string, with each element separated by SEP +define(`join', +`ifelse(`$#', `2', ``$2'', + `ifelse(`$2', `', `', ``$2'_')$0(`$1', shift(shift($@)))')') +define(`_join', +`ifelse(`$#$2', `2', `', + `ifelse(`$2', `', `', ``$1$2'')$0(`$1', shift(shift($@)))')') +# joinall(sep, args) - join each ARG, including empty ones, +# into a single string, with each element separated by SEP +define(`joinall', ``$2'_$0(`$1', shift($@))') +define(`_joinall', +`ifelse(`$#', `2', `', ``$1$3'$0(`$1', shift(shift($@)))')') +divert`'dnl diff --git a/examples/wraplifo2.m4 b/examples/wraplifo2.m4 new file mode 100644 index 00000000..5b450a76 --- /dev/null +++ b/examples/wraplifo2.m4 @@ -0,0 +1,9 @@ +dnl Redefine m4wrap to have LIFO semantics, improved example. +include(`join.m4')dnl +define(`_m4wrap', defn(`m4wrap'))dnl +define(`_arg1', `$1')dnl +define(`m4wrap', +`ifdef(`_$0_text', + `define(`_$0_text', joinall(` ', $@)defn(`_$0_text'))', + `_$0(`_arg1(defn(`_$0_text')undefine(`_$0_text'))')dnl +define(`_$0_text', joinall(` ', $@))')')dnl diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index 1a3434c0..be74a73c 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -1,4 +1,4 @@ -# Copyright (C) 2004-2007 Free Software Foundation, Inc. +# Copyright (C) 2002-2008 Free Software Foundation, Inc. # # This file is free software, distributed under the terms of the GNU # General Public License. As a special exception to the GNU General diff --git a/src/builtin.c b/src/builtin.c index 9dbb4693..c41aa544 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -667,7 +667,13 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode) switch (arg_type (argv, 2)) { + case TOKEN_COMP: + m4_warn (0, me, _("cannot concatenate builtins")); + // TODO fall through instead + break; + case TOKEN_TEXT: + // TODO flatten TOKEN_COMP value, or support concatenation of builtins define_user_macro (ARG (1), ARG_LEN (1), ARG (2), mode); break; @@ -1608,25 +1614,20 @@ m4_m4exit (struct obstack *obs, int argc, macro_arguments *argv) exit (exit_code); } -/*-------------------------------------------------------------------------. -| Save the argument text until EOF has been seen, allowing for user | -| specified cleanup action. GNU version saves all arguments, the standard | -| version only the first. | -`-------------------------------------------------------------------------*/ +/*-----------------------------------------------------------------. +| Save the argument text in FIFO order until EOF has been seen, | +| allowing for user specified cleanup action. Extra arguments are | +| saved when not in POSIX mode. | +`-----------------------------------------------------------------*/ static void m4_m4wrap (struct obstack *obs, int argc, macro_arguments *argv) { if (bad_argc (ARG (0), argc, 1, -1)) return; - obs = push_wrapup_init (); - if (no_gnu_extensions) - obstack_grow (obs, ARG (1), ARG_LEN (1)); - else - // TODO - allow builtins, rather than always flattening - arg_print (obs, argv, 1, NULL, true, " ", NULL, false); - push_wrapup_finish (); + wrap_args (argv); } + /* Enable tracing of all specified macros, or all, if none is specified. Tracing is disabled by default, when a macro is defined. This can be diff --git a/src/input.c b/src/input.c index cc40a587..a6fcc854 100644 --- a/src/input.c +++ b/src/input.c @@ -532,12 +532,36 @@ struct obstack * push_wrapup_init (void) { input_block *i; - i = (input_block *) obstack_alloc (wrapup_stack, sizeof *i); - i->prev = wsp; - i->type = INPUT_STRING; - i->file = current_file; - i->line = current_line; - wsp = i; + token_chain *chain; + + assert (obstack_object_size (wrapup_stack) == 0); + if (wsp) + { + i = wsp; + assert (i->type == INPUT_CHAIN && i->u.u_c.end + && i->u.u_c.end->type != CHAIN_LOC); + } + else + { + i = (input_block *) obstack_alloc (wrapup_stack, sizeof *i); + i->prev = wsp; + i->file = current_file; + i->line = current_line; + i->type = INPUT_CHAIN; + i->u.u_c.chain = i->u.u_c.end = NULL; + wsp = i; + } + chain = (token_chain *) obstack_alloc (wrapup_stack, sizeof *chain); + if (i->u.u_c.end) + i->u.u_c.end->next = chain; + else + i->u.u_c.chain = chain; + i->u.u_c.end = chain; + chain->next = NULL; + chain->type = CHAIN_LOC; + chain->quote_age = 0; + chain->u.u_l.file = current_file; + chain->u.u_l.line = current_line; return wrapup_stack; } @@ -548,17 +572,7 @@ push_wrapup_init (void) void push_wrapup_finish (void) { - input_block *i = wsp; - if (obstack_object_size (wrapup_stack) == 0) - { - wsp = i->prev; - obstack_free (wrapup_stack, i); - } - else - { - i->u.u_s.len = obstack_object_size (wrapup_stack); - i->u.u_s.str = (char *) obstack_finish (wrapup_stack); - } + make_text_link (wrapup_stack, &wsp->u.u_c.chain, &wsp->u.u_c.end); } @@ -613,6 +627,8 @@ pop_input (bool cleanup) return false; arg_adjust_refcount (chain->u.u_a.argv, false); break; + case CHAIN_LOC: + return false; default: assert (!"pop_input"); abort (); @@ -837,6 +853,8 @@ peek_input (bool allow_argv) chain->u.u_a.comma = true; push_string_finish (); return peek_input (allow_argv); + case CHAIN_LOC: + break; default: assert (!"peek_input"); abort (); @@ -863,16 +881,18 @@ peek_input (bool allow_argv) | string, so factor that out into a macro for speed. If | | ALLOW_QUOTE, and the current input matches the current quote age, | | return CHAR_QUOTE and leave consumption of data for | -| append_quote_token. | +| append_quote_token; otherwise, if ALLOW_ARGV and the current input | +| matches an argv reference with the correct quoting, return | +| CHAR_ARGV and leave consuption of data for init_argv_token. | `-------------------------------------------------------------------*/ -#define next_char(AQ) \ +#define next_char(AQ, AA) \ (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \ ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \ - : next_char_1 (AQ)) + : next_char_1 (AQ, AA)) static int -next_char_1 (bool allow_quote) +next_char_1 (bool allow_quote, bool allow_argv) { int ch; token_chain *chain; @@ -929,6 +949,7 @@ next_char_1 (bool allow_quote) chain = isp->u.u_c.chain; while (chain) { + unsigned int argc; if (allow_quote && chain->quote_age == current_quote_age) return CHAR_QUOTE; switch (chain->type) @@ -949,7 +970,8 @@ next_char_1 (bool allow_quote) return CHAR_MACRO; break; case CHAIN_ARGV: - if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv)) + argc = arg_argc (chain->u.u_a.argv); + if (chain->u.u_a.index == argc) { arg_adjust_refcount (chain->u.u_a.argv, false); break; @@ -959,6 +981,12 @@ next_char_1 (bool allow_quote) chain->u.u_a.comma = false; return ','; } + /* Only return a reference if the quoting is correct + and the reference has more than one argument + left. */ + if (allow_argv && chain->quote_age == current_quote_age + && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc) + return CHAR_ARGV; /* Rather than directly parse argv here, we push another input block containing the next unparsed argument from argv. */ @@ -970,7 +998,13 @@ next_char_1 (bool allow_quote) chain->u.u_a.index++; chain->u.u_a.comma = true; push_string_finish (); - return next_char_1 (allow_quote); + return next_char_1 (allow_quote, allow_argv); + case CHAIN_LOC: + isp->file = chain->u.u_l.file; + isp->line = chain->u.u_l.line; + input_change = true; + isp->u.u_c.chain = chain->next; + return next_char_1 (allow_quote, allow_argv); default: assert (!"next_char_1"); abort (); @@ -1002,7 +1036,7 @@ skip_line (const char *name) const char *file = current_file; int line = current_line; - while ((ch = next_char (false)) != CHAR_EOF && ch != '\n') + while ((ch = next_char (false, false)) != CHAR_EOF && ch != '\n') ; if (ch == CHAR_EOF) /* current_file changed to "" if we see CHAR_EOF, use the @@ -1028,25 +1062,28 @@ skip_line (const char *name) static void init_macro_token (token_data *td) { - int ch = next_char (false); - assert (ch == CHAR_MACRO); - if (td) - TOKEN_DATA_TYPE (td) = TOKEN_FUNC; + token_chain *chain; + if (isp->type == INPUT_MACRO) { assert (isp->u.func); if (td) - TOKEN_DATA_FUNC (td) = isp->u.func; + { + TOKEN_DATA_TYPE (td) = TOKEN_FUNC; + TOKEN_DATA_FUNC (td) = isp->u.func; + } isp->u.func = NULL; } else { - token_chain *chain; assert (isp->type == INPUT_CHAIN); chain = isp->u.u_c.chain; assert (!chain->quote_age && chain->type == CHAIN_FUNC && chain->u.func); if (td) - TOKEN_DATA_FUNC (td) = chain->u.func; + { + TOKEN_DATA_TYPE (td) = TOKEN_FUNC; + TOKEN_DATA_FUNC (td) = chain->u.func; + } chain->u.func = NULL; } } @@ -1108,9 +1145,9 @@ init_argv_token (struct obstack *obs, token_data *td) { token_chain *src_chain; token_chain *chain; - int ch = next_char (true); + int ch; - assert (ch == CHAR_QUOTE && TOKEN_DATA_TYPE (td) == TOKEN_VOID + assert (TOKEN_DATA_TYPE (td) == TOKEN_VOID && isp->type == INPUT_CHAIN && isp->u.u_c.chain->type == CHAIN_ARGV && obs && obstack_object_size (obs) == 0); @@ -1146,7 +1183,7 @@ init_argv_token (struct obstack *obs, token_data *td) decreased once the final element is parsed. */ assert (*curr_comm.str1 != ',' && *curr_comm.str1 != ')' && *curr_comm.str1 != *curr_quote.str1); - ch = peek_input (false); + ch = peek_input (true); if (ch != ',' && ch != ')') { isp->u.u_c.chain = src_chain; @@ -1181,14 +1218,14 @@ match_input (const char *s, bool consume) if (s[1] == '\0') { if (consume) - next_char (false); + next_char (false, false); return true; /* short match */ } - next_char (false); + next_char (false, false); for (n = 1, t = s++; (ch = peek_input (false)) == to_uchar (*s++); ) { - next_char (false); + next_char (false, false); n++; if (*s == '\0') /* long match */ { @@ -1558,15 +1595,13 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, if (!line) line = &dummy; - /* Can't consume character until after CHAR_MACRO is handled. */ TOKEN_DATA_TYPE (td) = TOKEN_VOID; - ch = peek_input (allow_argv && current_quote_age); + ch = next_char (false, allow_argv && current_quote_age); if (ch == CHAR_EOF) { #ifdef DEBUG_INPUT xfprintf (stderr, "next_token -> EOF\n"); #endif /* DEBUG_INPUT */ - next_char (false); return TOKEN_EOF; } if (ch == CHAR_MACRO) @@ -1590,7 +1625,6 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, return TOKEN_ARGV; } - next_char (false); /* Consume character we already peeked at. */ file = current_file; *line = current_line; if (MATCH (ch, curr_comm.str1, true)) @@ -1600,7 +1634,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, obstack_grow (obs_td, curr_comm.str1, curr_comm.len1); while (1) { - ch = next_char (false); + ch = next_char (false, false); if (ch == CHAR_EOF) /* Current_file changed to "" if we see CHAR_EOF, use the previous value we stored earlier. */ @@ -1631,7 +1665,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, && (isalnum (ch) || ch == '_')) { obstack_1grow (&token_stack, ch); - next_char (false); + next_char (false, false); } type = TOKEN_WORD; } @@ -1654,7 +1688,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, obstack_blank (&token_stack, -1); break; } - next_char (false); + next_char (false, false); } obstack_1grow (&token_stack, '\0'); @@ -1699,7 +1733,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, type = TOKEN_STRING; while (1) { - ch = next_char (obs != NULL && current_quote_age); + ch = next_char (obs != NULL && current_quote_age, false); if (ch == CHAR_EOF) /* Current_file changed to "" if we see CHAR_EOF, use the previous value we stored earlier. */ @@ -1723,7 +1757,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, xfprintf (stderr, "next_token -> MACDEF (%s)\n", bp->name); #endif - ch = next_char (false); + ch = next_char (false, false); MATCH (ch, curr_quote.str2, true); return TOKEN_MACDEF; } @@ -284,7 +284,8 @@ enum token_chain_type { CHAIN_STR, /* Link contains a string, u.u_s is valid. */ CHAIN_FUNC, /* Builtin function definition, u.func is valid. */ - CHAIN_ARGV /* Link contains a $@ reference, u.u_a is valid. */ + CHAIN_ARGV, /* Link contains a $@ reference, u.u_a is valid. */ + CHAIN_LOC /* Link contains location of m4wrap, u.u_l is valid. */ }; /* Composite tokens are built of a linked list of chains. Each link @@ -316,6 +317,12 @@ struct token_chain const string_pair *quotes; /* NULL for $*, quotes for $@. */ } u_a; + struct + { + const char *file; /* File where subsequent links originate. */ + int line; /* Line where subsequent links originate. */ + } + u_l; } u; }; @@ -508,6 +515,7 @@ void push_arg (struct obstack *, macro_arguments *, unsigned int); void push_arg_quote (struct obstack *, macro_arguments *, unsigned int, const string_pair *); void push_args (struct obstack *, macro_arguments *, bool, bool); +void wrap_args (macro_arguments *); /* Grab the text at argv index I. Assumes macro_argument *argv is in scope, and aborts if the argument is not text. */ diff --git a/src/macro.c b/src/macro.c index d6e92728..b6d8d3e4 100644 --- a/src/macro.c +++ b/src/macro.c @@ -946,11 +946,8 @@ arg_text (macro_arguments *argv, unsigned int index) case CHAIN_STR: obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); break; - case CHAIN_FUNC: - // TODO concatenate builtins - assert (!"implemented"); - abort (); case CHAIN_ARGV: + assert (!chain->u.u_a.has_func || argv->flatten); arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, quote_cache (NULL, chain->quote_age, chain->u.u_a.quotes), @@ -1510,3 +1507,66 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) if (push_token (token, -1, argv->inuse)) arg_mark (argv); } + +/* Push arguments from ARGV, which can include builtins, onto the wrap + stack for later rescanning. If GNU extensions are disabled, only + the first argument is pushed; otherwise, all arguments are pushed + and separated with a space. */ +void +wrap_args (macro_arguments *argv) +{ + int i; + struct obstack *obs; + token_data *token; + token_chain *chain; + + if ((argv->argc == 2 || no_gnu_extensions) && arg_empty (argv, 1)) + return; + + obs = push_wrapup_init (); + for (i = 1; i < (no_gnu_extensions ? 2 : argv->argc); i++) + { + if (i != 1) + obstack_1grow (obs, ' '); + token = arg_token (argv, i, NULL, false); + switch (TOKEN_DATA_TYPE (token)) + { + case TOKEN_TEXT: + obstack_grow (obs, TOKEN_DATA_TEXT (token), TOKEN_DATA_LEN (token)); + break; + case TOKEN_FUNC: + // TODO allow builtins through m4wrap + assert (false); + case TOKEN_COMP: + chain = token->u.u_c.chain; + while (chain) + { + switch (chain->type) + { + case CHAIN_STR: + obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); + break; + case CHAIN_FUNC: + // TODO allow builtins through m4wrap + assert (false); + break; + case CHAIN_ARGV: + arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, + quote_cache (NULL, chain->quote_age, + chain->u.u_a.quotes), + chain->u.u_a.flatten, NULL, NULL, false); + break; + default: + assert (!"wrap_args"); + abort (); + } + chain = chain->next; + } + break; + default: + assert (!"wrap_args"); + abort (); + } + } + push_wrapup_finish (); +} |