summaryrefslogtreecommitdiff
path: root/src/input.c
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-11-01 09:28:46 -0600
committerEric Blake <ebb9@byu.net>2008-02-16 06:36:45 -0700
commit7319157ccd7cd65f72c0a456c3091252a13f558a (patch)
treedfe5eac9e92b8943f4f118ce4a7447726f67c01e /src/input.c
parent950807234e3ffd376ad51b2a21cd276dd8b4e59c (diff)
downloadm4-7319157ccd7cd65f72c0a456c3091252a13f558a.tar.gz
Stage 15: return argv refs back to collect_arguments.
* src/m4.h (enum token_type): Add TOKEN_ARGV. (struct token_chain): Add skip_last member to argv link. (next_token): Update prototype. * src/input.c (CHAR_ARGV): New placeholder input character. (peek_input): Add parameter, to pass $@ at once. (next_char_1, append_quote_token): Handle $@ inside quotes. (init_argv_token): New function. (push_token, match_input, next_token, peek_token, lex_debug): Update callers. * src/macro.c (expand_input, collect_arguments): Likewise. (expand_argument): Handle incoming $@ token. (arg_adjust_refcount, arg_token, arg_text, make_argv_ref_token): Handle nested $@ refs. * src/symtab.c (symtab_debug): Update caller. * examples/null.m4: Document more tests that are needed. Add tests for NUL with divert, patsubst, and regexp. * examples/null.out: Update for new tests. * doc/m4.texinfo (Syntax): Add test for m4exit and NUL. * checks/get-them (AWK): Give a default value. * checks/check-them: Allow tests to invoke child processes with same include path. Perform message normalization on stderr. (cherry picked from commit 1fecefc8b990254aa667a01d12c6c7a2d716df06) Signed-off-by: Eric Blake <ebb9@byu.net>
Diffstat (limited to 'src/input.c')
-rw-r--r--src/input.c177
1 files changed, 138 insertions, 39 deletions
diff --git a/src/input.c b/src/input.c
index a0de36fe..e320c72b 100644
--- a/src/input.c
+++ b/src/input.c
@@ -154,6 +154,7 @@ static bool input_change;
#define CHAR_EOF 256 /* Character return on EOF. */
#define CHAR_MACRO 257 /* Character return for MACRO token. */
#define CHAR_QUOTE 258 /* Character return for quoted string. */
+#define CHAR_ARGV 259 /* Character return for $@ reference. */
/* Quote chars. */
string_pair curr_quote;
@@ -446,7 +447,7 @@ push_token (token_data *token, int level, bool inuse)
next->u.u_c.end = chain;
if (chain->type == CHAIN_ARGV)
{
- assert (!chain->u.u_a.comma);
+ assert (!chain->u.u_a.comma && !chain->u.u_a.skip_last);
inuse |= arg_adjust_refcount (chain->u.u_a.argv, true);
}
else if (chain->type == CHAIN_STR && chain->u.u_s.level >= 0)
@@ -712,17 +713,18 @@ input_print (struct obstack *obs, const input_block *input)
}
-/*-----------------------------------------------------------------.
-| Low level input is done a character at a time. The function |
-| peek_input () is used to look at the next character in the input |
-| stream. At any given time, it reads from the input_block on the |
-| top of the current input stack. The return value is an unsigned |
-| char, or CHAR_EOF if there is no more input, or CHAR_MACRO if a |
-| builtin token occurs next. |
-`-----------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| Low level input is done a character at a time. The function |
+| peek_input () is used to look at the next character in the input |
+| stream. At any given time, it reads from the input_block on the |
+| top of the current input stack. The return value is an unsigned |
+| char, CHAR_EOF if there is no more input, CHAR_MACRO if a builtin |
+| token occurs next, or CHAR_ARGV if ALLOW_ARGV and the input is |
+| visiting an argv reference with the correct quoting. |
+`------------------------------------------------------------------*/
static int
-peek_input (void)
+peek_input (bool allow_argv)
{
int ch;
input_block *block = isp;
@@ -757,6 +759,7 @@ peek_input (void)
chain = block->u.u_c.chain;
while (chain)
{
+ unsigned int argc;
switch (chain->type)
{
case CHAIN_STR:
@@ -764,11 +767,17 @@ peek_input (void)
return to_uchar (*chain->u.u_s.str);
break;
case CHAIN_ARGV:
- /* TODO - pass multiple arguments to macro.c at once. */
- if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv))
+ argc = arg_argc (chain->u.u_a.argv);
+ if (chain->u.u_a.index == argc)
break;
if (chain->u.u_a.comma)
return ',';
+ /* Only return a reference if the quoting is correct
+ and the reference has more than one argument
+ left. */
+ if (allow_argv && chain->quote_age == current_quote_age
+ && chain->u.u_a.quotes && chain->u.u_a.index + 1 < argc)
+ return CHAR_ARGV;
/* Rather than directly parse argv here, we push
another input block containing the next unparsed
argument from argv. */
@@ -778,7 +787,7 @@ peek_input (void)
chain->u.u_a.index++;
chain->u.u_a.comma = true;
push_string_finish ();
- return peek_input ();
+ return peek_input (allow_argv);
default:
assert (!"peek_input");
abort ();
@@ -871,9 +880,7 @@ next_char_1 (bool allow_quote)
chain = isp->u.u_c.chain;
while (chain)
{
- /* TODO also support returning $@ as CHAR_QUOTE. */
- if (allow_quote && chain->quote_age == current_quote_age
- && chain->type == CHAIN_STR)
+ if (allow_quote && chain->quote_age == current_quote_age)
return CHAR_QUOTE;
switch (chain->type)
{
@@ -889,7 +896,6 @@ next_char_1 (bool allow_quote)
adjust_refcount (chain->u.u_s.level, false);
break;
case CHAIN_ARGV:
- /* TODO - pass multiple arguments to macro.c at once. */
if (chain->u.u_a.index == arg_argc (chain->u.u_a.argv))
{
arg_adjust_refcount (chain->u.u_a.argv, false);
@@ -956,7 +962,6 @@ skip_line (const char *name)
if (file != current_file || line != current_line)
input_change = true;
}
-
/*-------------------------------------------------------------------.
| When a MACRO token is seen, next_token () uses init_macro_token () |
@@ -983,20 +988,30 @@ append_quote_token (struct obstack *obs, token_data *td)
token_chain *src_chain = isp->u.u_c.chain;
token_chain *chain;
- assert (isp->type == INPUT_CHAIN && obs && current_quote_age
- && src_chain->type == CHAIN_STR && src_chain->u.u_s.level >= 0);
+ assert (isp->type == INPUT_CHAIN && obs && current_quote_age);
isp->u.u_c.chain = src_chain->next;
/* Speed consideration - for short enough tokens, the speed and
memory overhead of parsing another INPUT_CHAIN link outweighs the
time to inline the token text. */
- if (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
+ if (src_chain->type == CHAIN_STR
+ && src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
{
+ assert (src_chain->u.u_s.level >= 0);
obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len);
adjust_refcount (src_chain->u.u_s.level, false);
return;
}
+ /* TODO preserve $@ through a quoted context. */
+ if (src_chain->type == CHAIN_ARGV)
+ {
+ arg_print (obs, src_chain->u.u_a.argv, src_chain->u.u_a.index,
+ src_chain->u.u_a.quotes, NULL);
+ arg_adjust_refcount (src_chain->u.u_a.argv, false);
+ return;
+ }
+
if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
{
TOKEN_DATA_TYPE (td) = TOKEN_COMP;
@@ -1013,6 +1028,65 @@ append_quote_token (struct obstack *obs, token_data *td)
chain->next = NULL;
}
+
+/*-------------------------------------------------------------------.
+| When an ARGV token is seen, convert TD to point to it via a |
+| composite token. Use OBS for any additional allocations needed to |
+| store the token chain. |
+`-------------------------------------------------------------------*/
+static void
+init_argv_token (struct obstack *obs, token_data *td)
+{
+ token_chain *src_chain;
+ token_chain *chain;
+ int ch = next_char (true);
+
+ assert (ch == CHAR_QUOTE && TOKEN_DATA_TYPE (td) == TOKEN_VOID
+ && isp->type == INPUT_CHAIN && isp->u.u_c.chain->type == CHAIN_ARGV
+ && obs && obstack_object_size (obs) == 0);
+
+ src_chain = isp->u.u_c.chain;
+ isp->u.u_c.chain = src_chain->next;
+ TOKEN_DATA_TYPE (td) = TOKEN_COMP;
+ /* Clone the link, since the input will be discarded soon. */
+ chain = (token_chain *) obstack_copy (obs, src_chain, sizeof *chain);
+ td->u.u_c.chain = td->u.u_c.end = chain;
+ chain->next = NULL;
+
+ /* If the next character is not ',' or ')', then unlink the last
+ argument from argv and schedule it for reparsing. This way,
+ expand_argument never has to deal with concatenation of argv with
+ arbitrary text. Note that the implementation of safe_quotes
+ ensures peek_input won't return CHAR_ARGV if the user is perverse
+ enough to mix comment delimiters with argument separators:
+
+ define(n,`$#')define(echo,$*)changecom(`,,',`)')n(echo(a,`,b`)'',c))
+ => 2 (not 3)
+
+ Therefore, we do not have to worry about calling MATCH, and thus
+ do not have to worry about pop_input being called and
+ invalidating the argv reference.
+
+ When the $@ ref is used unchanged, we completely bypass the
+ decrement of the argv refcount in next_char_1, since the ref is
+ still live via the current collect_arguments. However, when the
+ last element of the $@ ref is reparsed, we must increase the argv
+ refcount here, to compensate for the fact that it will be
+ decreased once the final element is parsed. */
+ assert (*curr_comm.str1 != ',' && *curr_comm.str1 != ')'
+ && *curr_comm.str1 != *curr_quote.str1);
+ ch = peek_input (false);
+ if (ch != ',' && ch != ')')
+ {
+ isp->u.u_c.chain = src_chain;
+ src_chain->u.u_a.index = arg_argc (chain->u.u_a.argv) - 1;
+ src_chain->u.u_a.comma = true;
+ chain->u.u_a.skip_last = true;
+ arg_adjust_refcount (chain->u.u_a.argv, true);
+ }
+}
+
+
/*------------------------------------------------------------------.
| This function is for matching a string against a prefix of the |
| input stream. If the string S matches the input and CONSUME is |
@@ -1029,7 +1103,7 @@ match_input (const char *s, bool consume)
const char *t;
bool result = false;
- ch = peek_input ();
+ ch = peek_input (false);
if (ch != to_uchar (*s))
return false; /* fail */
@@ -1041,7 +1115,7 @@ match_input (const char *s, bool consume)
}
next_char (false);
- for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
+ for (n = 1, t = s++; (ch = peek_input (false)) == to_uchar (*s++); )
{
next_char (false);
n++;
@@ -1320,18 +1394,20 @@ safe_quotes (void)
/*--------------------------------------------------------------------.
-| Parse a single token from the input stream, set TD to its |
-| contents, and return its type. A token is TOKEN_EOF if the |
+| Parse a single token from the input stream, set TD to its |
+| contents, and return its type. A token is TOKEN_EOF if the |
| input_stack is empty; TOKEN_STRING for a quoted string or comment; |
-| TOKEN_WORD for something that is a potential macro name; and |
+| TOKEN_WORD for something that is a potential macro name; and |
| TOKEN_SIMPLE for any single character that is not a part of any of |
| the previous types. If LINE is not NULL, set *LINE to the line |
| where the token starts. If OBS is not NULL, expand TOKEN_STRING |
| directly into OBS rather than in token_stack temporary storage |
-| area, and TD could be a TOKEN_COMP instead of the usual |
-| TOKEN_TEXT. Report errors (unterminated comments or strings) on |
-| behalf of CALLER, if non-NULL. |
-| |
+| area, and TD could be a TOKEN_COMP instead of the usual |
+| TOKEN_TEXT. If ALLOW_ARGV, OBS must be non-NULL, and an entire |
+| series of arguments can be returned as TOKEN_ARGV when a $@ |
+| reference is encountered. Report errors (unterminated comments or |
+| strings) on behalf of CALLER, if non-NULL. |
+| |
| Next_token () returns the token type, and passes back a pointer to |
| the token data through TD. Non-string token text is collected on |
| the obstack token_stack, which never contains more than one token |
@@ -1340,7 +1416,8 @@ safe_quotes (void)
`--------------------------------------------------------------------*/
token_type
-next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
+next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
+ const char *caller)
{
int ch;
int quote_level;
@@ -1362,7 +1439,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
/* Can't consume character until after CHAR_MACRO is handled. */
TOKEN_DATA_TYPE (td) = TOKEN_VOID;
- ch = peek_input ();
+ ch = peek_input (allow_argv && current_quote_age);
if (ch == CHAR_EOF)
{
#ifdef DEBUG_INPUT
@@ -1381,6 +1458,17 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
#endif /* DEBUG_INPUT */
return TOKEN_MACDEF;
}
+ if (ch == CHAR_ARGV)
+ {
+ init_argv_token (obs, td);
+#ifdef DEBUG_INPUT
+ xfprintf (stderr, "next_token -> ARGV (%d args)\n",
+ (arg_argc (td->u.u_c.chain->u.u_a.argv)
+ - td->u.u_c.chain->u.u_a.index
+ - (td->u.u_c.chain->u.u_a.skip_last ? 1 : 0)));
+#endif
+ return TOKEN_ARGV;
+ }
next_char (false); /* Consume character we already peeked at. */
file = current_file;
@@ -1409,7 +1497,8 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
else if (default_word_regexp && (isalpha (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
- while ((ch = peek_input ()) < CHAR_EOF && (isalnum (ch) || ch == '_'))
+ while ((ch = peek_input (false)) < CHAR_EOF
+ && (isalnum (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
next_char (false);
@@ -1424,7 +1513,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
obstack_1grow (&token_stack, ch);
while (1)
{
- ch = peek_input ();
+ ch = peek_input (false);
if (ch >= CHAR_EOF)
break;
obstack_1grow (&token_stack, ch);
@@ -1547,9 +1636,19 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
token_type_string (type));
while (chain)
{
- assert (chain->type == CHAIN_STR);
- xfprintf (stderr, "%s", chain->u.u_s.str);
- len += chain->u.u_s.len;
+ switch (chain->type)
+ {
+ case CHAIN_STR:
+ xfprintf (stderr, "%s", chain->u.u_s.str);
+ len += chain->u.u_s.len;
+ break;
+ case CHAIN_ARGV:
+ xfprintf (stderr, "{$@}");
+ break;
+ default:
+ assert (!"next_token");
+ abort ();
+ }
links++;
chain = chain->next;
}
@@ -1569,7 +1668,7 @@ token_type
peek_token (void)
{
token_type result;
- int ch = peek_input ();
+ int ch = peek_input (false);
if (ch == CHAR_EOF)
{
@@ -1684,7 +1783,7 @@ lex_debug (void)
token_type t;
token_data td;
- while ((t = next_token (&td, NULL, NULL, "<debug>")) != TOKEN_EOF)
+ while ((t = next_token (&td, NULL, NULL, false, "<debug>")) != TOKEN_EOF)
print_token ("lex", t, &td);
}
#endif /* DEBUG_INPUT */