diff options
author | Eric Blake <ebb9@byu.net> | 2007-11-01 09:28:46 -0600 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-02-14 14:10:50 -0700 |
commit | 1fecefc8b990254aa667a01d12c6c7a2d716df06 (patch) | |
tree | d49eb7931fb7b2c3f48ff914cbf5a4de4fd16daa | |
parent | 9d08c0c8685fdd749b20062e03c061275dc8afbc (diff) | |
download | m4-1fecefc8b990254aa667a01d12c6c7a2d716df06.tar.gz |
Stage15: return argv refs back to collect_arguments
-rwxr-xr-x | checks/check-them | 10 | ||||
-rw-r--r-- | doc/m4.texinfo | 22 | ||||
-rw-r--r-- | examples/null.m4 | bin | 3671 -> 5764 bytes | |||
-rw-r--r-- | examples/null.out | bin | 338 -> 400 bytes | |||
-rw-r--r-- | m4/gnulib-cache.m4 | 4 | ||||
-rw-r--r-- | src/input.c | 180 | ||||
-rw-r--r-- | src/m4.c | 30 | ||||
-rw-r--r-- | src/m4.h | 8 | ||||
-rw-r--r-- | src/macro.c | 112 | ||||
-rw-r--r-- | src/symtab.c | 2 |
10 files changed, 285 insertions, 83 deletions
diff --git a/checks/check-them b/checks/check-them index daa1b001..9fca39b5 100755 --- a/checks/check-them +++ b/checks/check-them @@ -1,6 +1,6 @@ #!/bin/sh # Check GNU m4 against examples from the manual source. -# Copyright (C) 1992, 2006, 2007 Free Software Foundation, Inc. +# Copyright (C) 1992, 2006, 2007, 2008 Free Software Foundation, Inc. # Sanity check what we are testing m4 --version @@ -68,7 +68,7 @@ do echo "Checking $file" options=`sed -ne '3s/^dnl @ extra options: //p;3q' "$file"` sed -e '/^dnl @/d' -e '/^\^D$/q' "$file" \ - | LC_MESSAGES=C m4 -d -I "$examples" $options - >$out 2>$err + | LC_MESSAGES=C M4PATH=$examples m4 -d $options - >$out 2>$err stat=$? xstat=`sed -ne '2s/^dnl @ expected status: //p;2q' "$file"` @@ -96,9 +96,11 @@ do xerrfile=`sed -n 's/^dnl @ expected error: //p' "$file"` if test -z "$xerrfile" ; then - sed -e '/^dnl @error{}/!d' -e 's///' -e "s|^m4:|$m4:|" "$file" > $xerr + sed '/^dnl @error{}/!d; s///; '"s|^m4:|$m4:|; s|\.\./examples|$examples|" \ + "$file" > $xerr else - cp "$examples/$xerrfile" $xerr + sed "s|^m4:|$m4:|; s|\.\./examples|$examples|" \ + "$examples/$xerrfile" > $xerr fi # For the benefit of mingw, normalize \r\n line endings diff --git a/doc/m4.texinfo b/doc/m4.texinfo index 420d8177..b2599c98 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -933,7 +933,13 @@ exception of the @sc{nul} character (the zero byte @samp{'\0'}). @comment xout: null.out @comment xerr: null.err @example -include(`null.m4')dnl +define(`m4exit')include(`null.m4')dnl +@end example + +@comment status: 2 +@example +include(`null.m4') +@result{}# This file tests m4 behavior on NUL bytes. @end example @end ignore @@ -2408,6 +2414,20 @@ indir(`divert', defn(`foo')) @result{} @end example +Warning messages issued on behalf of an indirect macro use an +unambiguous representation of the macro name, using escape sequences +similar to C strings, and with colons also quoted. + +@example +define(`%%:\ +odd', defn(`divnum')) +@result{} +indir(`%%:\ +odd', `extra') +@error{}m4:stdin:3: Warning: %%\:\\\nodd: extra arguments ignored: 1 > 0 +@result{}0 +@end example + @node Builtin @section Indirect call of builtins diff --git a/examples/null.m4 b/examples/null.m4 Binary files differindex 904a6efb..2632522c 100644 --- a/examples/null.m4 +++ b/examples/null.m4 diff --git a/examples/null.out b/examples/null.out Binary files differindex 6e8a114c..c42e03c1 100644 --- a/examples/null.out +++ b/examples/null.out diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index 0055a697..be1c1217 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -15,11 +15,11 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix +# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([local]) -gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix]) +gl_MODULES([assert avltree-oset binary-io clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer free fseeko gendocs getopt gnupload gpl-3.0 intprops memmem mkstemp obstack quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xprintf xvasprintf-posix]) gl_AVOID([]) gl_SOURCE_BASE([lib]) gl_M4_BASE([m4]) diff --git a/src/input.c b/src/input.c index 8738aebb..5bbaf088 100644 --- a/src/input.c +++ b/src/input.c @@ -154,6 +154,7 @@ static bool input_change; #define CHAR_EOF 256 /* Character return on EOF. */ #define CHAR_MACRO 257 /* Character return for MACRO token. */ #define CHAR_QUOTE 258 /* Character return for quoted string. */ +#define CHAR_ARGV 259 /* Character return for $@ reference. */ /* Quote chars. */ string_pair curr_quote; @@ -449,7 +450,7 @@ push_token (token_data *token, int level, bool inuse) next->u.u_c.end = chain; if (chain->type == CHAIN_ARGV) { - assert (!chain->u.u_a.comma); + assert (!chain->u.u_a.comma && !chain->u.u_a.skip_last); inuse |= arg_adjust_refcount (chain->u.u_a.argv, true); } else if (chain->type == CHAIN_STR && chain->u.u_s.level >= 0) @@ -712,17 +713,18 @@ input_print (struct obstack *obs, const input_block *input) } -/*-----------------------------------------------------------------. -| Low level input is done a character at a time. The function | -| peek_input () is used to look at the next character in the input | -| stream. At any given time, it reads from the input_block on the | -| top of the current input stack. The return value is an unsigned | -| char, or CHAR_EOF if there is no more input, or CHAR_MACRO if a | -| builtin token occurs next. | -`-----------------------------------------------------------------*/ +/*------------------------------------------------------------------. +| Low level input is done a character at a time. The function | +| peek_input () is used to look at the next character in the input | +| stream. At any given time, it reads from the input_block on the | +| top of the current input stack. The return value is an unsigned | +| char, CHAR_EOF if there is no more input, CHAR_MACRO if a builtin | +| token occurs next, or CHAR_ARGV if ALLOW_ARGV and the input is | +| visiting an argv reference with the correct quoting. | +`------------------------------------------------------------------*/ static int -peek_input (void) +peek_input (bool allow_argv) { int ch; input_block *block = isp; @@ -757,6 +759,7 @@ peek_input (void) chain = block->u.u_c.chain; while (chain) { + unsigned int argc; switch (chain->type) { case CHAIN_STR: @@ -764,12 +767,17 @@ peek_input (void) return to_uchar (*chain->u.u_s.str); break; case CHAIN_ARGV: - // TODO - figure out how to pass multiple arguments to - // macro.c at once - if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv)) + argc = arg_argc (chain->u.u_a.argv); + if (chain->u.u_a.index == argc) break; if (chain->u.u_a.comma) return ','; + /* Only return a reference if the quoting is correct + and the reference has more than one argument + left. */ + if (allow_argv && chain->quote_age == current_quote_age + && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc) + return CHAR_ARGV; /* Rather than directly parse argv here, we push another input block containing the next unparsed argument from argv. */ @@ -779,7 +787,7 @@ peek_input (void) chain->u.u_a.index++; chain->u.u_a.comma = true; push_string_finish (); - return peek_input (); + return peek_input (allow_argv); default: assert (!"peek_input"); abort (); @@ -872,9 +880,7 @@ next_char_1 (bool allow_quote) chain = isp->u.u_c.chain; while (chain) { - // TODO also support returning $@ as CHAR_QUOTE - if (allow_quote && chain->quote_age == current_quote_age - && chain->type == CHAIN_STR) + if (allow_quote && chain->quote_age == current_quote_age) return CHAR_QUOTE; switch (chain->type) { @@ -890,8 +896,6 @@ next_char_1 (bool allow_quote) adjust_refcount (chain->u.u_s.level, false); break; case CHAIN_ARGV: - // TODO - figure out how to pass multiple arguments to - // macro.c at once if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv)) { arg_adjust_refcount (chain->u.u_a.argv, false); @@ -958,7 +962,6 @@ skip_line (const char *name) if (file != current_file || line != current_line) input_change = true; } - /*-------------------------------------------------------------------. | When a MACRO token is seen, next_token () uses init_macro_token () | @@ -985,20 +988,31 @@ append_quote_token (struct obstack *obs, token_data *td) token_chain *src_chain = isp->u.u_c.chain; token_chain *chain; - assert (isp->type == INPUT_CHAIN && obs && current_quote_age - && src_chain->type == CHAIN_STR && src_chain->u.u_s.level >= 0); + assert (isp->type == INPUT_CHAIN && obs && current_quote_age); isp->u.u_c.chain = src_chain->next; /* Speed consideration - for short enough tokens, the speed and memory overhead of parsing another INPUT_CHAIN link outweighs the time to inline the token text. */ - if (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD) + if (src_chain->type == CHAIN_STR + && src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD) { + assert (src_chain->u.u_s.level >= 0); obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len); adjust_refcount (src_chain->u.u_s.level, false); return; } + // TODO preserve $@ through a quoted context, in case a later reference + // strips those quotes. + if (src_chain->type == CHAIN_ARGV) + { + arg_print (obs, src_chain->u.u_a.argv, src_chain->u.u_a.index, + src_chain->u.u_a.quotes, NULL); + arg_adjust_refcount (src_chain->u.u_a.argv, false); + return; + } + if (TOKEN_DATA_TYPE (td) == TOKEN_VOID) { TOKEN_DATA_TYPE (td) = TOKEN_COMP; @@ -1015,6 +1029,65 @@ append_quote_token (struct obstack *obs, token_data *td) chain->next = NULL; } + +/*-------------------------------------------------------------------. +| When an ARGV token is seen, convert TD to point to it via a | +| composite token. Use OBS for any additional allocations needed to | +| store the token chain. | +`-------------------------------------------------------------------*/ +static void +init_argv_token (struct obstack *obs, token_data *td) +{ + token_chain *src_chain; + token_chain *chain; + int ch = next_char (true); + + assert (ch == CHAR_QUOTE && TOKEN_DATA_TYPE (td) == TOKEN_VOID + && isp->type == INPUT_CHAIN && isp->u.u_c.chain->type == CHAIN_ARGV + && obs && obstack_object_size (obs) == 0); + + src_chain = isp->u.u_c.chain; + isp->u.u_c.chain = src_chain->next; + TOKEN_DATA_TYPE (td) = TOKEN_COMP; + /* Clone the link, since the input will be discarded soon. */ + chain = (token_chain *) obstack_copy (obs, src_chain, sizeof *chain); + td->u.u_c.chain = td->u.u_c.end = chain; + chain->next = NULL; + + /* If the next character is not ',' or ')', then unlink the last + argument from argv and schedule it for reparsing. This way, + expand_argument never has to deal with concatenation of argv with + arbitrary text. Note that the implementation of safe_quotes + ensures peek_input won't return CHAR_ARGV if the user is perverse + enough to mix comment delimiters with argument separators: + + define(n,`$#')define(echo,$*)changecom(`,,',`)')n(echo(a,`,b`)'',c)) + => 2 (not 3) + + Therefore, we do not have to worry about calling MATCH, and thus + do not have to worry about pop_input being called and + invalidating the argv reference. + + When the $@ ref is used unchanged, we completely bypass the + decrement of the argv refcount in next_char_1, since the ref is + still live via the current collect_arguments. However, when the + last element of the $@ ref is reparsed, we must increase the argv + refcount here, to compensate for the fact that it will be + decreased once the final element is parsed. */ + assert (*curr_comm.str1 != ',' && *curr_comm.str1 != ')' + && *curr_comm.str1 != *curr_quote.str1); + ch = peek_input (false); + if (ch != ',' && ch != ')') + { + isp->u.u_c.chain = src_chain; + src_chain->u.u_a.index = arg_argc (chain->u.u_a.argv) - 1; + src_chain->u.u_a.comma = true; + chain->u.u_a.skip_last = true; + arg_adjust_refcount (chain->u.u_a.argv, true); + } +} + + /*------------------------------------------------------------------. | This function is for matching a string against a prefix of the | | input stream. If the string S matches the input and CONSUME is | @@ -1031,7 +1104,7 @@ match_input (const char *s, bool consume) const char *t; bool result = false; - ch = peek_input (); + ch = peek_input (false); if (ch != to_uchar (*s)) return false; /* fail */ @@ -1043,7 +1116,7 @@ match_input (const char *s, bool consume) } next_char (false); - for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); ) + for (n = 1, t = s++; (ch = peek_input (false)) == to_uchar (*s++); ) { next_char (false); n++; @@ -1324,18 +1397,20 @@ safe_quotes (void) /*--------------------------------------------------------------------. -| Parse a single token from the input stream, set TD to its | -| contents, and return its type. A token is TOKEN_EOF if the | +| Parse a single token from the input stream, set TD to its | +| contents, and return its type. A token is TOKEN_EOF if the | | input_stack is empty; TOKEN_STRING for a quoted string or comment; | -| TOKEN_WORD for something that is a potential macro name; and | +| TOKEN_WORD for something that is a potential macro name; and | | TOKEN_SIMPLE for any single character that is not a part of any of | | the previous types. If LINE is not NULL, set *LINE to the line | | where the token starts. If OBS is not NULL, expand TOKEN_STRING | | directly into OBS rather than in token_stack temporary storage | -| area, and TD could be a TOKEN_COMP instead of the usual | -| TOKEN_TEXT. Report errors (unterminated comments or strings) on | -| behalf of CALLER, if non-NULL. | -| | +| area, and TD could be a TOKEN_COMP instead of the usual | +| TOKEN_TEXT. If ALLOW_ARGV, OBS must be non-NULL, and an entire | +| series of arguments can be returned as TOKEN_ARGV when a $@ | +| reference is encountered. Report errors (unterminated comments or | +| strings) on behalf of CALLER, if non-NULL. | +| | | Next_token () returns the token type, and passes back a pointer to | | the token data through TD. Non-string token text is collected on | | the obstack token_stack, which never contains more than one token | @@ -1344,7 +1419,8 @@ safe_quotes (void) `--------------------------------------------------------------------*/ token_type -next_token (token_data *td, int *line, struct obstack *obs, const char *caller) +next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, + const char *caller) { int ch; int quote_level; @@ -1366,7 +1442,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) /* Can't consume character until after CHAR_MACRO is handled. */ TOKEN_DATA_TYPE (td) = TOKEN_VOID; - ch = peek_input (); + ch = peek_input (allow_argv && current_quote_age); if (ch == CHAR_EOF) { #ifdef DEBUG_INPUT @@ -1385,6 +1461,17 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) #endif /* DEBUG_INPUT */ return TOKEN_MACDEF; } + if (ch == CHAR_ARGV) + { + init_argv_token (obs, td); +#ifdef DEBUG_INPUT + xfprintf (stderr, "next_token -> ARGV (%d args)\n", + (arg_argc (td->u.u_c.chain->u.u_a.argv) + - td->u.u_c.chain->u.u_a.index + - (td->u.u_c.chain->u.u_a.skip_last ? 1 : 0))); +#endif + return TOKEN_ARGV; + } next_char (false); /* Consume character we already peeked at. */ file = current_file; @@ -1413,7 +1500,8 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) else if (default_word_regexp && (isalpha (ch) || ch == '_')) { obstack_1grow (&token_stack, ch); - while ((ch = peek_input ()) < CHAR_EOF && (isalnum (ch) || ch == '_')) + while ((ch = peek_input (false)) < CHAR_EOF + && (isalnum (ch) || ch == '_')) { obstack_1grow (&token_stack, ch); next_char (false); @@ -1428,7 +1516,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) obstack_1grow (&token_stack, ch); while (1) { - ch = peek_input (); + ch = peek_input (false); if (ch >= CHAR_EOF) break; obstack_1grow (&token_stack, ch); @@ -1551,9 +1639,19 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) token_type_string (type)); while (chain) { - assert (chain->type == CHAIN_STR); - xfprintf (stderr, "%s", chain->u.u_s.str); - len += chain->u.u_s.len; + switch (chain->type) + { + case CHAIN_STR: + xfprintf (stderr, "%s", chain->u.u_s.str); + len += chain->u.u_s.len; + break; + case CHAIN_ARGV: + xfprintf (stderr, "{$@}"); + break; + default: + assert (!"next_token"); + abort (); + } links++; chain = chain->next; } @@ -1573,7 +1671,7 @@ token_type peek_token (void) { token_type result; - int ch = peek_input (); + int ch = peek_input (false); if (ch == CHAR_EOF) { @@ -1688,7 +1786,7 @@ lex_debug (void) token_type t; token_data td; - while ((t = next_token (&td, NULL, NULL, "<debug>")) != TOKEN_EOF) + while ((t = next_token (&td, NULL, NULL, false, "<debug>")) != TOKEN_EOF) print_token ("lex", t, &td); } #endif /* DEBUG_INPUT */ @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, 2007 - Free Software Foundation, Inc. + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2004, 2005, 2006, + 2007, 2008 Free Software Foundation, Inc. This file is part of GNU M4. @@ -98,18 +98,37 @@ m4_verror_at_line (bool warn, int status, int errnum, const char *file, va_list args) { char *full = NULL; + char *safe_macro = NULL; + + /* Sanitize MACRO, since we are turning around and using it in a + format string. The allocation is overly conservative, but + problematic macro names only occur via indir or changeword. */ + if (macro && strchr (macro, '%')) + { + char *p = safe_macro = xcharalloc (2 * strlen (macro) + 1); + do + { + if (*macro == '%') + *p++ = '%'; + *p++ = *macro++; + } + while (*macro); + } /* Prepend warning and the macro name, as needed. But if that fails for non-memory reasons (unlikely), then still use the original format. */ if (warn && macro) - full = xasprintf (_("Warning: %s: %s"), macro, format); + full = xasprintf (_("Warning: %s: %s"), + quotearg (safe_macro ? safe_macro : macro), format); else if (warn) full = xasprintf (_("Warning: %s"), format); else if (macro) - full = xasprintf (_("%s: %s"), macro, format); + full = xasprintf (_("%s: %s"), + quotearg (safe_macro ? safe_macro : macro), format); verror_at_line (status, errnum, line ? file : NULL, line, full ? full : format, args); free (full); + free (safe_macro); if ((!warn || fatal_warnings) && !retcode) retcode = EXIT_FAILURE; } @@ -435,6 +454,8 @@ main (int argc, char *const *argv, char *const *envp) include_init (); debug_init (); + set_quoting_style (NULL, escape_quoting_style); + set_char_quoting (NULL, ':', 1); #ifdef USE_STACKOVF setup_stackovf_trap (argv, envp, stackovf_handler); #endif @@ -687,6 +708,7 @@ main (int argc, char *const *argv, char *const *envp) } output_exit (); free_regex (); + quotearg_free (); #ifdef DEBUG_REGEX if (trace_file) fclose (trace_file); @@ -43,6 +43,7 @@ #include "exitfail.h" #include "intprops.h" #include "obstack.h" +#include "quotearg.h" #include "stdio--.h" #include "stdlib--.h" #include "unistd--.h" @@ -265,7 +266,8 @@ enum token_type TOKEN_COMMA, /* Active character `,', TOKEN_TEXT. */ TOKEN_CLOSE, /* Active character `)', TOKEN_TEXT. */ TOKEN_SIMPLE, /* Any other single character, TOKEN_TEXT. */ - TOKEN_MACDEF /* A macro's definition (see "defn"), TOKEN_FUNC. */ + TOKEN_MACDEF, /* A macro's definition (see "defn"), TOKEN_FUNC. */ + TOKEN_ARGV /* A series of parameters, TOKEN_COMP. */ }; /* The data for a token, a macro argument, and a macro definition. */ @@ -308,6 +310,7 @@ struct token_chain unsigned int index; /* Argument index within argv. */ bool_bitfield flatten : 1; /* True to treat builtins as text. */ bool_bitfield comma : 1; /* True when `,' is next input. */ + bool_bitfield skip_last : 1; /* True if last argument omitted. */ const string_pair *quotes; /* NULL for $*, quotes for $@. */ } u_a; @@ -372,7 +375,8 @@ typedef enum token_data_type token_data_type; void input_init (void); token_type peek_token (void); -token_type next_token (token_data *, int *, struct obstack *, const char *); +token_type next_token (token_data *, int *, struct obstack *, bool, + const char *); void skip_line (const char *); /* push back input */ diff --git a/src/macro.c b/src/macro.c index 0cc42225..8341dd2f 100644 --- a/src/macro.c +++ b/src/macro.c @@ -216,7 +216,7 @@ expand_input (void) TOKEN_DATA_ORIG_TEXT (&empty_token) = ""; #endif - while ((t = next_token (&td, &line, NULL, NULL)) != TOKEN_EOF) + while ((t = next_token (&td, &line, NULL, false, NULL)) != TOKEN_EOF) expand_token (NULL, t, &td, line, true); for (i = 0; i < stacks_count; i++) @@ -364,7 +364,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) /* Skip leading white space. */ do { - t = next_token (&td, NULL, obs, caller); + t = next_token (&td, NULL, obs, true, caller); } while (t == TOKEN_SIMPLE && isspace (to_uchar (*TOKEN_DATA_TEXT (&td)))); @@ -455,6 +455,20 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) } break; + case TOKEN_ARGV: + assert (paren_level == 0 && TOKEN_DATA_TYPE (argp) == TOKEN_VOID + && obstack_object_size (obs) == 0 + && td.u.u_c.chain == td.u.u_c.end + && td.u.u_c.chain->type == CHAIN_ARGV); + TOKEN_DATA_TYPE (argp) = TOKEN_COMP; + argp->u.u_c.chain = argp->u.u_c.end = td.u.u_c.chain; + t = next_token (&td, NULL, NULL, false, caller); + if (argp->u.u_c.chain->u.u_a.skip_last) + assert (t == TOKEN_COMMA); + else + assert (t == TOKEN_COMMA || t == TOKEN_CLOSE); + return t == TOKEN_COMMA; + default: assert (!"expand_argument"); abort (); @@ -462,7 +476,7 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) if (TOKEN_DATA_TYPE (argp) != TOKEN_VOID || obstack_object_size (obs)) first = false; - t = next_token (&td, NULL, obs, caller); + t = next_token (&td, NULL, obs, first, caller); } } @@ -496,7 +510,8 @@ collect_arguments (symbol *sym, struct obstack *arguments, if (peek_token () == TOKEN_OPEN) { - next_token (&td, NULL, NULL, SYMBOL_NAME (sym)); /* gobble parenthesis */ + /* gobble parenthesis */ + next_token (&td, NULL, NULL, false, SYMBOL_NAME (sym)); do { tdp = (token_data *) obstack_alloc (arguments, sizeof *tdp); @@ -519,12 +534,22 @@ collect_arguments (symbol *sym, struct obstack *arguments, && TOKEN_DATA_QUOTE_AGE (tdp) != args.quote_age) args.quote_age = 0; else if (TOKEN_DATA_TYPE (tdp) == TOKEN_COMP) - args.has_ref = true; + { + args.has_ref = true; + if (tdp->u.u_c.chain->type == CHAIN_ARGV) + { + args.argc += (tdp->u.u_c.chain->u.u_a.argv->argc + - tdp->u.u_c.chain->u.u_a.index + - tdp->u.u_c.chain->u.u_a.skip_last - 1); + args.wrapper = true; + } + } } while (more_args); } argv = (macro_arguments *) obstack_finish (argv_stack); argv->argc = args.argc; + argv->wrapper = args.wrapper; argv->has_ref = args.has_ref; if (args.quote_age != quote_age ()) argv->quote_age = 0; @@ -734,9 +759,20 @@ arg_adjust_refcount (macro_arguments *argv, bool increase) chain = argv->array[i]->u.u_c.chain; while (chain) { - assert (chain->type == CHAIN_STR); - if (chain->u.u_s.level >= 0) - adjust_refcount (chain->u.u_s.level, increase); + switch (chain->type) + { + case CHAIN_STR: + if (chain->u.u_s.level >= 0) + adjust_refcount (chain->u.u_s.level, increase); + break; + case CHAIN_ARGV: + assert (chain->u.u_a.argv->inuse); + arg_adjust_refcount (chain->u.u_a.argv, increase); + break; + default: + assert (!"arg_adjust_refcount"); + abort (); + } chain = chain->next; } } @@ -766,12 +802,14 @@ arg_token (macro_arguments *argv, unsigned int index, int *level) for (i = 0; i < argv->arraylen; i++) { token = argv->array[i]; - if (TOKEN_DATA_TYPE (token) == TOKEN_COMP) + if (TOKEN_DATA_TYPE (token) == TOKEN_COMP + && token->u.u_c.chain->type == CHAIN_ARGV) { token_chain *chain = token->u.u_c.chain; // TODO for now we support only a single-length $@ chain... - assert (!chain->next && chain->type == CHAIN_ARGV); - if (index < chain->u.u_a.argv->argc - (chain->u.u_a.index - 1)) + assert (!chain->next); + if (index <= (chain->u.u_a.argv->argc - chain->u.u_a.index + - chain->u.u_a.skip_last)) { token = arg_token (chain->u.u_a.argv, chain->u.u_a.index - 1 + index, level); @@ -780,7 +818,8 @@ arg_token (macro_arguments *argv, unsigned int index, int *level) token = &empty_token; break; } - index -= chain->u.u_a.argv->argc - chain->u.u_a.index; + index -= (chain->u.u_a.argv->argc - chain->u.u_a.index + - chain->u.u_a.skip_last); } else if (--index == 0) break; @@ -793,18 +832,24 @@ arg_token (macro_arguments *argv, unsigned int index, int *level) static void arg_mark (macro_arguments *argv) { + unsigned int i; + token_chain *chain; + if (argv->inuse) return; argv->inuse = true; if (argv->wrapper) - { - // TODO for now we support only a single-length $@ chain... - assert (argv->arraylen == 1 - && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP - && !argv->array[0]->u.u_c.chain->next - && argv->array[0]->u.u_c.chain->type == CHAIN_ARGV); - argv->array[0]->u.u_c.chain->u.u_a.argv->inuse = true; - } + for (i = 0; i < argv->arraylen; i++) + if (TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP) + { + chain = argv->array[i]->u.u_c.chain; + while (chain) + { + if (chain->type == CHAIN_ARGV && !chain->u.u_a.argv->inuse) + arg_mark (chain->u.u_a.argv); + chain = chain->next; + } + } } /* Given ARGV, return how many arguments it refers to. */ @@ -854,14 +899,24 @@ arg_text (macro_arguments *argv, unsigned int index) case TOKEN_TEXT: return TOKEN_DATA_TEXT (token); case TOKEN_COMP: - // TODO - concatenate argv refs, or even functions? For now, we assume - // all chain elements are text. + // TODO - concatenate functions? chain = token->u.u_c.chain; obs = arg_scratch (); while (chain) { - assert (chain->type == CHAIN_STR); - obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); + switch (chain->type) + { + case CHAIN_STR: + obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); + break; + case CHAIN_ARGV: + arg_print (obs, chain->u.u_a.argv, chain->u.u_a.index, + chain->u.u_a.quotes, NULL); + break; + default: + assert (!"arg_text"); + abort (); + } chain = chain->next; } obstack_1grow (obs, '\0'); @@ -1122,13 +1177,13 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level, token_chain *chain; assert (obstack_object_size (obs) == 0); - if (argv->wrapper) + if (argv->wrapper && argv->arraylen == 1) { // TODO for now we support only a single-length $@ chain... - assert (argv->arraylen == 1 - && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP); + assert (TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP); chain = argv->array[0]->u.u_c.chain; - assert (!chain->next && chain->type == CHAIN_ARGV); + assert (!chain->next && chain->type == CHAIN_ARGV + && !chain->u.u_a.skip_last); argv = chain->u.u_a.argv; index += chain->u.u_a.index - 1; } @@ -1145,6 +1200,7 @@ make_argv_ref_token (token_data *token, struct obstack *obs, int level, chain->u.u_a.index = index; chain->u.u_a.flatten = flatten; chain->u.u_a.comma = false; + chain->u.u_a.skip_last = false; if (quotes) { /* Clone the quotes into the obstack, since a subsequent diff --git a/src/symtab.c b/src/symtab.c index 277a79f4..dac49d7c 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -350,7 +350,7 @@ symtab_debug (void) int delete; static int i; - while (next_token (&td, NULL, NULL, "<debug>") == TOKEN_WORD) + while (next_token (&td, NULL, NULL, false, "<debug>") == TOKEN_WORD) { text = TOKEN_DATA_TEXT (&td); if (*text == '_') |