diff options
author | Eric Blake <ebb9@byu.net> | 2007-11-13 06:55:27 -0700 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-02-20 19:48:16 -0700 |
commit | 44740d89961c48b712562dfc650dc0cb57898aa0 (patch) | |
tree | e4ee391b2a73fffac48ff427df40882bc89a53d3 | |
parent | 1fecefc8b990254aa667a01d12c6c7a2d716df06 (diff) | |
download | m4-44740d89961c48b712562dfc650dc0cb57898aa0.tar.gz |
Stage16: cache quotes and improve arg_print
-rwxr-xr-x | checks/get-them | 4 | ||||
-rw-r--r-- | doc/m4.texinfo | 98 | ||||
-rw-r--r-- | examples/foreach2.m4 | 2 | ||||
-rw-r--r-- | examples/foreachq2.m4 | 2 | ||||
-rw-r--r-- | examples/forloop2.m4 | 2 | ||||
-rw-r--r-- | examples/null.m4 | bin | 5764 -> 5747 bytes | |||
-rw-r--r-- | examples/null.out | bin | 400 -> 402 bytes | |||
-rw-r--r-- | src/builtin.c | 91 | ||||
-rw-r--r-- | src/debug.c | 38 | ||||
-rw-r--r-- | src/input.c | 132 | ||||
-rw-r--r-- | src/m4.c | 1 | ||||
-rw-r--r-- | src/m4.h | 8 | ||||
-rw-r--r-- | src/macro.c | 83 |
13 files changed, 302 insertions, 159 deletions
diff --git a/checks/get-them b/checks/get-them index e034962c..803f413c 100755 --- a/checks/get-them +++ b/checks/get-them @@ -1,11 +1,13 @@ #!/bin/sh # -*- AWK -*- # Extract all examples from the manual source. -# Copyright (C) 1992, 2005, 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 1992, 2005, 2006, 2007, 2008 Free Software Foundation, +# Inc. # This script is for use with GNU awk. FILE=${1-/dev/null} +: ${AWK=awk} $AWK ' diff --git a/doc/m4.texinfo b/doc/m4.texinfo index b2599c98..2c6079bb 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -2900,6 +2900,7 @@ An actual implementation of these three macros is distributed as @file{m4-@value{VERSION}/@/examples/@/quote.m4} in this package. First, let's examine their usage: +@comment examples @example $ @kbd{m4 -I examples} include(`quote.m4') @@ -2932,6 +2933,7 @@ other hand, results in a string no matter what, since it is still possible to tell whether it was invoked without arguments based on the resulting string. +@comment examples @example $ @kbd{m4 -I examples} undivert(`quote.m4')dnl @@ -2993,6 +2995,7 @@ invocation is restored. It can, for example, be used for simple counting: +@comment examples @example $ @kbd{m4 -I examples} include(`forloop.m4') @@ -3003,6 +3006,7 @@ forloop(`i', `1', `8', `i ') For-loops can be nested, like: +@comment examples @example $ @kbd{m4 -I examples} include(`forloop.m4') @@ -3030,6 +3034,7 @@ not finished, it increments the iterator (using the predefined macro Here is an actual implementation of @code{forloop}, distributed as @file{m4-@value{VERSION}/@/examples/@/forloop.m4} in this package: +@comment examples @example $ @kbd{m4 -I examples} undivert(`forloop.m4')dnl @@ -3076,6 +3081,7 @@ using an implementation of @code{foreach} distributed as @file{m4-@value{VERSION}/@/examples/@/foreach.m4}, and @code{foreachq} in @file{m4-@value{VERSION}/@/examples/@/foreachq.m4}. +@comment examples @example $ @kbd{m4 -I examples} include(`foreach.m4') @@ -3098,6 +3104,7 @@ It is possible to be more complex; each element of the @var{paren-list} or @var{quote-list} can itself be a list, to pass as further arguments to a helper macro. This example generates a shell case statement: +@comment examples @example $ @kbd{m4 -I examples} include(`foreach.m4') @@ -3127,6 +3134,7 @@ needed to grab the first element of a list. Second, through the original list. Here is a simple implementation of @code{foreach}: +@comment examples @example $ @kbd{m4 -I examples} undivert(`foreach.m4')dnl @@ -3153,6 +3161,7 @@ expecting the macro name on output after one layer of quotes is removed during list iteration and the final layer removed during the final rescan: +@comment examples @example $ @kbd{m4 -I examples} define(`a', `1')define(`b', `2')define(`c', `3') @@ -3177,6 +3186,7 @@ foreachq(`x', ```a'', ``(b'', ``c)''', `x Obviously, @code{foreachq} did a better job; here is its implementation: +@comment examples @example $ @kbd{m4 -I examples} undivert(`foreachq.m4')dnl @@ -3422,7 +3432,8 @@ following: In trace output, show the actual arguments that were collected before invoking the macro. This applies to all macro calls if the @samp{t} flag is used, otherwise only the macros covered by calls of -@code{traceon}. +@code{traceon}. Arguments are subject to length truncation specified by +the command line option @option{--arglength} (or @option{-l}). @item c In trace output, show several trace lines for each macro call. A line @@ -3433,7 +3444,9 @@ after the call has completed. @item e In trace output, show the expansion of each macro call, if it is not void. This applies to all macro calls if the @samp{t} flag is used, -otherwise only the macros covered by calls of @code{traceon}. +otherwise only the macros covered by calls of @code{traceon}. The +expansion is subject to length truncation specified by the command line +option @option{--arglength} (or @option{-l}). @item f In debug and trace output, include the name of the current input file in @@ -3513,6 +3526,25 @@ foo @result{}FOO @end example +The following example demonstrates the behavior of length truncation, +when specified on the command line. Note that each argument and the +final result are individually truncated. Also, the special tokens for +builtin functions are not truncated. + +@comment options: -l6 +@example +$ @kbd{m4 -d -l 6} +define(`echo', `$@@')debugmode(`+t') +@result{} +echo(`1', `long string') +@error{}m4trace: -1- echo(`1', `long s...') -> ``1',`l...' +@result{}1,long string +indir(`echo', defn(`changequote')) +@error{}m4trace: -2- defn(`change...') +@error{}m4trace: -1- indir(`echo', <changequote>) -> ``'' +@result{} +@end example + @node Debug Output @section Saving debugging output @@ -4417,6 +4449,7 @@ Normally file inclusion is used to insert the contents of a file into the input stream. The contents of the file will be read by @code{m4} and macro calls in the file will be expanded: +@comment examples @example $ @kbd{m4 -I examples} define(`foo', `FOO') @@ -4433,6 +4466,7 @@ of the file can be used to define macros that operate on entire files. Here is an example, which defines @samp{bar} to expand to the contents of @file{incl.m4}: +@comment examples @example $ @kbd{m4 -I examples} define(`bar', include(`incl.m4')) @@ -5217,6 +5251,7 @@ word to upper case and the remaining characters to lower case. First, an example of their usage, using implementations distributed in @file{m4-@value{VERSION}/@/examples/@/capitalize.m4}. +@comment examples @example $ @kbd{m4 -I examples} include(`capitalize.m4') @@ -5236,6 +5271,7 @@ merely parses out the words, and replaces them with an invocation of some subtle flaws. You should try to see if you can find and correct them; or @pxref{Improved capitalize, , Answers}). +@comment examples @example $ @kbd{m4 -I examples} undivert(`capitalize.m4')dnl @@ -5327,6 +5363,7 @@ ifelse(format(`%.1A', `1.999'), `0X1.0P+1', `success', Using the @code{forloop} macro defined earlier (@pxref{Forloop}), this example shows how @code{format} can be used to produce tabular output. +@comment examples @example $ @kbd{m4 -I examples} include(`forloop.m4') @@ -6186,6 +6223,7 @@ message output. This example reuses the file @file{incl.m4} mentioned earlier (@pxref{Include}): +@comment examples @example $ @kbd{m4 -I examples} define(`foo', ``$0' called at __file__:__line__') @@ -6944,6 +6982,7 @@ shipped as @file{m4-@value{VERSION}/@/examples/@/forloop2.m4}; this version also optimizes based on the fact that the starting bound does not need to be passed to the helper @code{@w{_forloop}}. +@comment examples @example $ @kbd{m4 -I examples} undivert(`forloop2.m4')dnl @@ -6953,7 +6992,7 @@ undivert(`forloop2.m4')dnl @result{}# performs sanity check that FROM is larger than TO @result{}# allows complex numerical expressions in TO and FROM @result{}define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1', -@result{} `pushdef(`$1', eval(`$2'))_forloop(`$1', +@result{} `pushdef(`$1', eval(`$2'))_$0(`$1', @result{} eval(`$3'), `$4')popdef(`$1')')') @result{}define(`_forloop', @result{} `$3`'ifelse(indir(`$1'), `$2', `', @@ -6972,6 +7011,48 @@ forloop(`i', `a', `b', `non-numeric bounds') @result{} @end example +One other change to notice is that the improved version used @samp{_$0} +rather than @samp{_foreach} to invoke the helper routine. In general, +this is a good practice to follow, because then the set of macros can be +uniformly transformed. The following example shows a transformation +that doubles the current quoting and appends a suffix @samp{2} to each +transformed macro. If @code{foreach} refers to the literal +@samp{_foreach}, then @code{foreach2} invokes @code{_foreach} instead of +the intended @code{_foreach2}, and the mixing of quoting paradigms leads +to an infinite recursion loop in this example. + +@comment options: -L9 +@comment status: 1 +@comment examples +@example +$ @kbd{m4 -d -L 9 -I examples} +define(`arg1', `$1')include(`forloop2.m4')include(`quote.m4') +@result{} +define(`double', `define(`$1'`2', + arg1(patsubst(dquote(defn(`$1')), `[`']', `\&\&')))') +@result{} +double(`forloop')double(`_forloop')defn(`forloop2') +@result{}ifelse(eval(``($3) >= ($2)''), ``1'', +@result{} ``pushdef(``$1'', eval(``$2''))_$0(``$1'', +@result{} eval(``$3''), ``$4'')popdef(``$1'')'') +forloop(i, 1, 5, `ifelse(')forloop(i, 1, 5, `)') +@result{} +changequote(`[', `]')changequote([``], ['']) +@result{} +forloop2(i, 1, 5, ``ifelse('')forloop2(i, 1, 5, ``)'') +@result{} +changequote`'include(`forloop.m4') +@result{} +double(`forloop')double(`_forloop')defn(`forloop2') +@result{}pushdef(``$1'', ``$2'')_forloop($@@)popdef(``$1'') +forloop(i, 1, 5, `ifelse(')forloop(i, 1, 5, `)') +@result{} +changequote(`[', `]')changequote([``], ['']) +@result{} +forloop2(i, 1, 5, ``ifelse('')forloop2(i, 1, 5, ``)'') +@error{}m4:stdin:12: recursion limit of 9 exceeded, use -L<N> to change it +@end example + Of course, it is possible to make even more improvements, such as adding an optional step argument, or allowing iteration through descending sequences. @acronym{GNU} Autoconf provides some of these @@ -6984,6 +7065,7 @@ The @code{foreach} and @code{foreachq} macros (@pxref{Foreach}) as presented earlier each have flaws. First, we will examine and fix the quadratic behavior of @code{foreachq}: +@comment examples @example $ @kbd{m4 -I examples} include(`foreachq.m4') @@ -7025,6 +7107,7 @@ fewer macros, is less likely to run into machine limits, and most importantly, performs faster. The fixed version of @code{foreachq} can be found in @file{m4-@value{VERSION}/@/examples/@/foreachq2.m4}: +@comment examples @example $ @kbd{m4 -I examples} include(`foreachq2.m4') @@ -7034,7 +7117,7 @@ undivert(`foreachq2.m4')dnl @result{}divert(`-1') @result{}# foreachq(x, `item_1, item_2, ..., item_n', stmt) @result{}# quoted list, improved version -@result{}define(`foreachq', `pushdef(`$1')_foreachq($@@)popdef(`$1')') +@result{}define(`foreachq', `pushdef(`$1')_$0($@@)popdef(`$1')') @result{}define(`_arg1q', ``$1'') @result{}define(`_rest', `ifelse(`$#', `1', `', `dquote(shift($@@))')') @result{}define(`_foreachq', `ifelse(`$2', `', `', @@ -7073,6 +7156,7 @@ instead of an arbitrary length list as the key to end recursion. This alternative approach is available as @file{m4-@value{VERSION}/@/examples/@/foreach3.m4}: +@comment examples @example $ @kbd{m4 -I examples} include(`foreachq3.m4') @@ -7120,6 +7204,7 @@ overquotes the arguments to @code{@w{_foreach}} to begin with, using @code{@w{_arg1}} to remove the extra layer of quoting that was added up front: +@comment examples @example $ @kbd{m4 -I examples} include(`foreach2.m4') @@ -7129,7 +7214,7 @@ undivert(`foreach2.m4')dnl @result{}divert(`-1') @result{}# foreach(x, (item_1, item_2, ..., item_n), stmt) @result{}# parenthesized list, improved version -@result{}define(`foreach', `pushdef(`$1')_foreach(`$1', +@result{}define(`foreach', `pushdef(`$1')_$0(`$1', @result{} (dquote(dquote_elt$2)), `$3')popdef(`$1')') @result{}define(`_arg1', `$1') @result{}define(`_foreach', `ifelse(`$2', `(`')', `', @@ -7167,6 +7252,7 @@ deciding which list style to use, one must take into account whether repeating the side effects of unquoted list elements will have any detrimental effects. +@comment examples @example $ @kbd{m4 -d -I examples} include(`foreach2.m4') @@ -7279,6 +7365,7 @@ difference between calling @code{capitalize} with the expansion of a macro, expanding the result of a case change, and changing the case of a double-quoted string: +@comment examples @example $ @kbd{m4 -I examples} include(`capitalize.m4')dnl @@ -7355,6 +7442,7 @@ must be redefined as @code{_upcase_alt} and @code{_downcase_alt}, since they contain nested quotes but are invoked with the alternate quoting scheme in effect. +@comment examples @example $ @kbd{m4 -I examples} include(`capitalize2.m4')dnl diff --git a/examples/foreach2.m4 b/examples/foreach2.m4 index 4acf0c26..74d00fb6 100644 --- a/examples/foreach2.m4 +++ b/examples/foreach2.m4 @@ -2,7 +2,7 @@ include(`quote.m4')dnl divert(`-1') # foreach(x, (item_1, item_2, ..., item_n), stmt) # parenthesized list, improved version -define(`foreach', `pushdef(`$1')_foreach(`$1', +define(`foreach', `pushdef(`$1')_$0(`$1', (dquote(dquote_elt$2)), `$3')popdef(`$1')') define(`_arg1', `$1') define(`_foreach', `ifelse(`$2', `(`')', `', diff --git a/examples/foreachq2.m4 b/examples/foreachq2.m4 index 345ddfed..f57d3edf 100644 --- a/examples/foreachq2.m4 +++ b/examples/foreachq2.m4 @@ -2,7 +2,7 @@ include(`quote.m4')dnl divert(`-1') # foreachq(x, `item_1, item_2, ..., item_n', stmt) # quoted list, improved version -define(`foreachq', `pushdef(`$1')_foreachq($@)popdef(`$1')') +define(`foreachq', `pushdef(`$1')_$0($@)popdef(`$1')') define(`_arg1q', ``$1'') define(`_rest', `ifelse(`$#', `1', `', `dquote(shift($@))')') define(`_foreachq', `ifelse(`$2', `', `', diff --git a/examples/forloop2.m4 b/examples/forloop2.m4 index f1bdf0ef..41e0e165 100644 --- a/examples/forloop2.m4 +++ b/examples/forloop2.m4 @@ -4,7 +4,7 @@ divert(`-1') # performs sanity check that FROM is larger than TO # allows complex numerical expressions in TO and FROM define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1', - `pushdef(`$1', eval(`$2'))_forloop(`$1', + `pushdef(`$1', eval(`$2'))_$0(`$1', eval(`$3'), `$4')popdef(`$1')')') define(`_forloop', `$3`'ifelse(indir(`$1'), `$2', `', diff --git a/examples/null.m4 b/examples/null.m4 Binary files differindex 2632522c..79f4715f 100644 --- a/examples/null.m4 +++ b/examples/null.m4 diff --git a/examples/null.out b/examples/null.out Binary files differindex c42e03c1..aca4b785 100644 --- a/examples/null.out +++ b/examples/null.out diff --git a/src/builtin.c b/src/builtin.c index c89ad44e..beb8e350 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -198,6 +198,28 @@ find_builtin_by_name (const char *name) return bp; return bp + 1; } + +/*------------------------------------------------------------------. +| Print a representation of FUNC to OBS. If FLATTEN, output QUOTES | +| around an empty string instead. | +`------------------------------------------------------------------*/ +void +func_print (struct obstack *obs, const builtin *func, bool flatten, + const string_pair *quotes) +{ + assert (func); + if (flatten && quotes) + { + obstack_grow (obs, quotes->str1, quotes->len1); + obstack_grow (obs, quotes->str2, quotes->len2); + } + else if (!flatten) + { + obstack_1grow (obs, '<'); + obstack_grow (obs, func->name, strlen (func->name)); + obstack_1grow (obs, '>'); + } +} /*-------------------------------------------------------------------------. | Install a builtin macro with name NAME, bound to the C function given in | @@ -396,14 +418,15 @@ free_regex (void) } } -/*-------------------------------------------------------------------------. -| Define a predefined or user-defined macro, with name NAME, and expansion | -| TEXT. MODE destinguishes between the "define" and the "pushdef" case. | -| It is also used from main (). | -`-------------------------------------------------------------------------*/ +/*-----------------------------------------------------------------. +| Define a predefined or user-defined macro, with name NAME of | +| length NAME_LEN, and expansion TEXT. MODE is SYMBOL_INSERT for | +| "define" or SYMBOL_PUSHDEF for "pushdef". This function is also | +| used from main (). | +`-----------------------------------------------------------------*/ void -define_user_macro (const char *name, size_t len, const char *text, +define_user_macro (const char *name, size_t name_len, const char *text, symbol_lookup mode) { symbol *s; @@ -420,24 +443,23 @@ define_user_macro (const char *name, size_t len, const char *text, if (macro_sequence_inuse && text) { regoff_t offset = 0; - len = strlen (defn); + struct re_registers *regs = ¯o_sequence_regs; + size_t len = strlen (defn); while (offset < len && (offset = re_search (¯o_sequence_buf, defn, len, offset, - len - offset, ¯o_sequence_regs)) >= 0) + len - offset, regs)) >= 0) { /* Skip empty matches. */ - if (macro_sequence_regs.start[0] == macro_sequence_regs.end[0]) + if (regs->start[0] == regs->end[0]) offset++; else { - char tmp; - offset = macro_sequence_regs.end[0]; - tmp = defn[offset]; - defn[offset] = '\0'; - m4_warn (0, NULL, _("definition of `%s' contains sequence `%s'"), - name, defn + macro_sequence_regs.start[0]); - defn[offset] = tmp; + offset = regs->end[0]; + m4_warn (0, NULL, + _("definition of `%s' contains sequence `%.*s'"), + name, regs->end[0] - regs->start[0], + defn + regs->start[0]); } } if (offset == -2) @@ -597,34 +619,6 @@ shipout_int (struct obstack *obs, int val) obstack_grow (obs, s, strlen (s)); } -/*------------------------------------------------------------------. -| Print arguments from the table ARGV to obstack OBS, starting with | -| START, separated by SEP, and quoted by the current quotes if | -| QUOTED is true. | -`------------------------------------------------------------------*/ - -static void -dump_args (struct obstack *obs, int start, macro_arguments *argv, - const char *sep, bool quoted) -{ - unsigned int i; - bool dump_sep = false; - size_t len = strlen (sep); - unsigned int argc = arg_argc (argv); - - for (i = start; i < argc; i++) - { - if (dump_sep) - obstack_grow (obs, sep, len); - else - dump_sep = true; - if (quoted) - obstack_grow (obs, curr_quote.str1, curr_quote.len1); - obstack_grow (obs, ARG (i), ARG_LEN (i)); - if (quoted) - obstack_grow (obs, curr_quote.str2, curr_quote.len2); - } -} /* The rest of this file is code for builtins and expansion of user defined macros. All the functions for builtins have a prototype as: @@ -1516,7 +1510,7 @@ m4_errprint (struct obstack *obs, int argc, macro_arguments *argv) if (bad_argc (ARG (0), argc, 1, -1)) return; - dump_args (obs, 1, argv, " ", false); + arg_print (obs, argv, 1, NULL, true, " ", NULL, false); debug_flush_files (); len = obstack_object_size (obs); /* The close_stdin module makes it safe to skip checking the return @@ -1597,12 +1591,13 @@ m4_m4wrap (struct obstack *obs, int argc, macro_arguments *argv) { if (bad_argc (ARG (0), argc, 1, -1)) return; + obs = push_wrapup_init (); if (no_gnu_extensions) obstack_grow (obs, ARG (1), ARG_LEN (1)); else - dump_args (obs, 1, argv, " ", false); - obstack_1grow (obs, '\0'); - push_wrapup ((char *) obstack_finish (obs)); + // TODO - allow builtins, rather than always flattening + arg_print (obs, argv, 1, NULL, true, " ", NULL, false); + push_wrapup_finish (); } /* Enable tracing of all specified macros, or all, if none is specified. diff --git a/src/debug.c b/src/debug.c index d6b2ddc9..737ee524 100644 --- a/src/debug.c +++ b/src/debug.c @@ -359,44 +359,16 @@ trace_prepre (const char *name, int id) void trace_pre (const char *name, int id, macro_arguments *argv) { - int i; - const builtin *bp; - int argc = arg_argc (argv); - trace_header (id); trace_format ("%s", name); - if (argc > 1 && (debug_level & DEBUG_TRACE_ARGS)) + if (arg_argc (argv) > 1 && (debug_level & DEBUG_TRACE_ARGS)) { + int len = max_debug_argument_length; trace_format ("("); - - for (i = 1; i < argc; i++) - { - if (i != 1) - trace_format (", "); - - switch (arg_type (argv, i)) - { - case TOKEN_TEXT: - trace_format ("%l%S%r", ARG (i)); - break; - - case TOKEN_FUNC: - bp = find_builtin_by_addr (arg_func (argv, i)); - if (bp == NULL) - { - assert (!"trace_pre"); - abort (); - } - trace_format ("<%s>", bp->name); - break; - - default: - assert (!"trace_pre"); - abort (); - } - - } + arg_print (&trace, argv, 1, + (debug_level & DEBUG_TRACE_QUOTE) ? &curr_quote : NULL, + false, ", ", &len, true); trace_format (")"); } diff --git a/src/input.c b/src/input.c index 5bbaf088..063186a4 100644 --- a/src/input.c +++ b/src/input.c @@ -42,14 +42,14 @@ loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks. Pushing new input on the input stack is done by push_file (), - push_string (), push_wrapup () (for wrapup text), and push_macro () - (for macro definitions). Because macro expansion needs direct - access to the current input obstack (for optimization), push_string - () is split in two functions, push_string_init (), which returns a - pointer to the current input stack, and push_string_finish (), - which returns a pointer to the final text. The input_block *next - is used to manage the coordination between the different push - routines. + push_string (), push_wrapup_init/push_wrapup_finish () (for wrapup + text), and push_macro () (for macro definitions). Because macro + expansion needs direct access to the current input obstack (for + optimization), push_string () is split in two functions, + push_string_init (), which returns a pointer to the current input + stack, and push_string_finish (), which returns a pointer to the + final text. The input_block *next is used to manage the + coordination between the different push routines. The current file and line number are stored in two global variables, for use by the error handling functions in m4.c. Macro @@ -188,6 +188,9 @@ static struct re_registers regs; context. */ static unsigned int current_quote_age; +/* Cache a quote pair. See quote_cache. */ +static string_pair *cached_quote; + static bool pop_input (bool); static void set_quote_age (void); @@ -503,17 +506,14 @@ push_string_finish (void) return ret; } -/*------------------------------------------------------------------. -| The function push_wrapup () pushes a string on the wrapup stack. | -| When the normal input stack gets empty, the wrapup stack will | -| become the input stack, and push_string () and push_file () will | -| operate on wrapup_stack. Push_wrapup should be done as | -| push_string (), but this will suffice, as long as arguments to | -| m4_m4wrap () are moderate in size. | -`------------------------------------------------------------------*/ +/*--------------------------------------------------------------. +| The function push_wrapup_init () returns an obstack ready for | +| direct expansion of wrapup text, and should be followed by | +| push_wrapup_finish (). | +`--------------------------------------------------------------*/ -void -push_wrapup (const char *s) +struct obstack * +push_wrapup_init (void) { input_block *i; i = (input_block *) obstack_alloc (wrapup_stack, sizeof *i); @@ -521,9 +521,28 @@ push_wrapup (const char *s) i->type = INPUT_STRING; i->file = current_file; i->line = current_line; - i->u.u_s.len = strlen (s); - i->u.u_s.str = (char *) obstack_copy (wrapup_stack, s, i->u.u_s.len); wsp = i; + return wrapup_stack; +} + +/*---------------------------------------------------------------. +| After pushing wrapup text, push_wrapup_finish () completes the | +| bookkeeping. | +`---------------------------------------------------------------*/ +void +push_wrapup_finish (void) +{ + input_block *i = wsp; + if (obstack_object_size (wrapup_stack) == 0) + { + wsp = i->prev; + obstack_free (wrapup_stack, i); + } + else + { + i->u.u_s.len = obstack_object_size (wrapup_stack); + i->u.u_s.str = (char *) obstack_finish (wrapup_stack); + } } @@ -610,6 +629,7 @@ pop_input (bool cleanup) abort (); } obstack_free (current_input, isp); + cached_quote = NULL; next = NULL; /* might be set in push_string_init () */ isp = tmp; @@ -674,13 +694,7 @@ input_print (struct obstack *obs, const input_block *input) obstack_1grow (obs, '>'); break; case INPUT_MACRO: - { - const builtin *bp = find_builtin_by_addr (input->u.func); - assert (bp); - obstack_1grow (obs, '<'); - obstack_grow (obs, bp->name, strlen (bp->name)); - obstack_1grow (obs, '>'); - } + func_print (obs, find_builtin_by_addr (input->u.func), false, NULL); break; case INPUT_CHAIN: chain = input->u.u_c.chain; @@ -696,7 +710,9 @@ input_print (struct obstack *obs, const input_block *input) case CHAIN_ARGV: assert (!chain->u.u_a.comma); if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, - chain->u.u_a.quotes, &maxlen)) + quote_cache (NULL, chain->quote_age, + chain->u.u_a.quotes), + chain->u.u_a.flatten, NULL, &maxlen, false)) return; break; default: @@ -783,7 +799,9 @@ peek_input (bool allow_argv) argument from argv. */ push_string_init (); push_arg_quote (current_input, chain->u.u_a.argv, - chain->u.u_a.index, chain->u.u_a.quotes); + chain->u.u_a.index, + quote_cache (NULL, chain->quote_age, + chain->u.u_a.quotes)); chain->u.u_a.index++; chain->u.u_a.comma = true; push_string_finish (); @@ -911,7 +929,9 @@ next_char_1 (bool allow_quote) argument from argv. */ push_string_init (); push_arg_quote (current_input, chain->u.u_a.argv, - chain->u.u_a.index, chain->u.u_a.quotes); + chain->u.u_a.index, + quote_cache (NULL, chain->quote_age, + chain->u.u_a.quotes)); chain->u.u_a.index++; chain->u.u_a.comma = true; push_string_finish (); @@ -1008,7 +1028,9 @@ append_quote_token (struct obstack *obs, token_data *td) if (src_chain->type == CHAIN_ARGV) { arg_print (obs, src_chain->u.u_a.argv, src_chain->u.u_a.index, - src_chain->u.u_a.quotes, NULL); + quote_cache (NULL, src_chain->quote_age, + src_chain->u.u_a.quotes), + src_chain->u.u_a.flatten, NULL, NULL, false); arg_adjust_refcount (src_chain->u.u_a.argv, false); return; } @@ -1369,6 +1391,7 @@ set_quote_age (void) | (*curr_quote.str2 & 0xff)); else current_quote_age = 0; + cached_quote = NULL; } /* Return the current quote age. Each non-trivial changequote alters @@ -1394,6 +1417,53 @@ safe_quotes (void) { return current_quote_age != 0; } + +/* Interface for caching frequently used quote pairs, using AGE for + optimization. If QUOTES is NULL, don't use quoting. If OBS is + non-NULL, AGE should be the current quote age, and QUOTES should be + &curr_quote; the return value will be a cached quote pair, where + the pointer is valid at least as long as OBS is not reset, but + whose contents are only guaranteed until the next changequote or + quote_cache. Otherwise, OBS is NULL, AGE should be the same as + before, and QUOTES should be a previously returned cache value; + used to refresh the contents of the result. */ +const string_pair * +quote_cache (struct obstack *obs, unsigned int age, const string_pair *quotes) +{ + static char lquote[2]; + static char rquote[2]; + static string_pair simple = {lquote, 1, rquote, 1}; + + /* Implementation - if AGE is non-zero, then the implementation of + set_quote_age guarantees that we can recreate the return value on + the fly; so we use static storage, and the contents must be used + immediately. If AGE is zero, then we must copy QUOTES onto OBS + (since changequote will invalidate the original), but we might as + well cache that copy (in case the current expansion contains more + than one instance of $@). */ + if (!quotes) + return NULL; + if (age) + { + *lquote = (age >> 8) & 0xff; + *rquote = age & 0xff; + return &simple; + } + if (!obs) + return quotes; + assert (next && quotes == &curr_quote); + if (!cached_quote) + { + assert (obs == current_input && obstack_object_size (obs) == 0); + cached_quote = (string_pair *) obstack_copy (obs, quotes, + sizeof *quotes); + cached_quote->str1 = (char *) obstack_copy0 (obs, quotes->str1, + quotes->len1); + cached_quote->str2 = (char *) obstack_copy0 (obs, quotes->str2, + quotes->len2); + } + return cached_quote; +} /*--------------------------------------------------------------------. @@ -113,6 +113,7 @@ m4_verror_at_line (bool warn, int status, int errnum, const char *file, *p++ = *macro++; } while (*macro); + *p = '\0'; } /* Prepend warning and the macro name, as needed. But if that fails for non-memory reasons (unlikely), then still use the original @@ -386,7 +386,8 @@ void push_macro (builtin_func *); struct obstack *push_string_init (void); bool push_token (token_data *, int, bool); const input_block *push_string_finish (void); -void push_wrapup (const char *); +struct obstack *push_wrapup_init (void); +void push_wrapup_finish (void); bool pop_wrapup (void); void input_print (struct obstack *, const input_block *); @@ -410,6 +411,8 @@ void set_word_regexp (const char *, const char *); #endif unsigned int quote_age (void); bool safe_quotes (void); +const string_pair *quote_cache (struct obstack *, unsigned int, + const string_pair *); /* File: output.c --- output functions. */ extern int current_diversion; @@ -494,7 +497,7 @@ size_t arg_len (macro_arguments *, unsigned int); builtin_func *arg_func (macro_arguments *, unsigned int); struct obstack *arg_scratch (void); bool arg_print (struct obstack *, macro_arguments *, unsigned int, - const string_pair *, int *); + const string_pair *, bool, const char *, int *, bool); macro_arguments *make_argv_ref (macro_arguments *, const char *, size_t, bool, bool); void push_arg (struct obstack *, macro_arguments *, unsigned int); @@ -553,6 +556,7 @@ const char *ntoa (int32_t, int); const builtin *find_builtin_by_addr (builtin_func *); const builtin *find_builtin_by_name (const char *); +void func_print (struct obstack *, const builtin *, bool, const string_pair *); /* File: path.c --- path search for include files. */ diff --git a/src/macro.c b/src/macro.c index 8341dd2f..7f817b39 100644 --- a/src/macro.c +++ b/src/macro.c @@ -911,7 +911,9 @@ arg_text (macro_arguments *argv, unsigned int index) break; case CHAIN_ARGV: arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, - chain->u.u_a.quotes, NULL); + quote_cache (NULL, chain->quote_age, + chain->u.u_a.quotes), + chain->u.u_a.flatten, NULL, NULL, false); break; default: assert (!"arg_text"); @@ -1097,50 +1099,70 @@ arg_scratch (void) /* Dump a representation of ARGV to the obstack OBS, starting with argument INDEX. If QUOTES is non-NULL, each argument is displayed - with those quotes. If MAX_LEN is non-NULL, truncate the output - after *MAX_LEN bytes are output and return true; otherwise, return - false, and reduce *MAX_LEN by the number of bytes output. */ + with those quotes. If FLATTEN, builtins are ignored. Separate + arguments with SEP, which defaults to a comma. If MAX_LEN is + non-NULL, truncate the output after *MAX_LEN bytes are output and + return true; otherwise, return false, and reduce *MAX_LEN by the + number of bytes output. If QUOTE_EACH, the truncation length is + reset for each argument, quotes do not count against length, and + all arguments are printed; otherwise, quotes count against the + length and trailing arguments may be discarded. */ bool arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index, - const string_pair *quotes, int *max_len) + const string_pair *quotes, bool flatten, const char *sep, + int *max_len, bool quote_each) { int len = max_len ? *max_len : INT_MAX; unsigned int i; token_data *token; token_chain *chain; - bool comma = false; - + bool use_sep = false; + bool done; + size_t sep_len; + size_t *plen = quote_each ? NULL : &len; + + if (!sep) + sep = ","; + sep_len = strlen (sep); for (i = index; i < argv->argc; i++) { - if (comma && obstack_print (obs, ",", 1, &len)) + if (quote_each && max_len) + len = *max_len; + if (use_sep && obstack_print (obs, sep, sep_len, plen)) return true; - else - comma = true; + use_sep = true; token = arg_token (argv, i, NULL); - if (quotes && obstack_print (obs, quotes->str1, quotes->len1, &len)) - return true; switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: + if (quotes && obstack_print (obs, quotes->str1, quotes->len1, plen)) + return true; if (obstack_print (obs, TOKEN_DATA_TEXT (token), - TOKEN_DATA_LEN (token), &len)) + TOKEN_DATA_LEN (token), &len) && !quote_each) + return true; + if (quotes && obstack_print (obs, quotes->str2, quotes->len2, plen)) return true; break; case TOKEN_COMP: + if (quotes && obstack_print (obs, quotes->str1, quotes->len1, plen)) + return true; chain = token->u.u_c.chain; - while (chain) + done = false; + while (chain && !done) { switch (chain->type) { case CHAIN_STR: if (obstack_print (obs, chain->u.u_s.str, chain->u.u_s.len, &len)) - return true; + done = true; break; case CHAIN_ARGV: if (arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, - chain->u.u_a.quotes, &len)) - return true; + quote_cache (NULL, chain->quote_age, + chain->u.u_a.quotes), + flatten, NULL, &len, false)) + done = true; break; default: assert (!"arg_print"); @@ -1148,16 +1170,19 @@ arg_print (struct obstack *obs, macro_arguments *argv, unsigned int index, } chain = chain->next; } + if (done && !quote_each) + return true; + if (quotes && obstack_print (obs, quotes->str2, quotes->len2, plen)) + return true; break; case TOKEN_FUNC: - // TODO - support func? + func_print (obs, find_builtin_by_addr (TOKEN_DATA_FUNC (token)), + flatten, quotes); + break; default: assert (!"arg_print"); abort (); } - if (quotes && obstack_print (obs, quotes->str2, quotes->len2, - &len)) - return true; } if (max_len) *max_len = len; @@ -1201,21 +1226,7 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level, chain->u.u_a.flatten = flatten; chain->u.u_a.comma = false; chain->u.u_a.skip_last = false; - if (quotes) - { - /* Clone the quotes into the obstack, since a subsequent - changequote may take effect before the $@ ref is - rescanned. */ - // TODO - optimize when quote_age is nonzero? Cache in argv in case - // user macro expands to multiple refs? - string_pair *tmp = (string_pair *) obstack_copy (obs, quotes, - sizeof *quotes); - tmp->str1 = (char *) obstack_copy0 (obs, quotes->str1, quotes->len1); - tmp->str2 = (char *) obstack_copy0 (obs, quotes->str2, quotes->len2); - chain->u.u_a.quotes = tmp; - } - else - chain->u.u_a.quotes = NULL; + chain->u.u_a.quotes = quote_cache (obs, chain->quote_age, quotes); return token; } |