summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-10-27 05:44:09 -0600
committerEric Blake <ebb9@byu.net>2008-01-19 15:24:14 -0700
commitb1fef201f5d121e25e5dd61ec8ca3eac41a899ba (patch)
treea76568b9a8d798657905f9090405a87b3c933628
parent622fc8cb2cb6ce0fc7391a6414bb0aaffeec6fc0 (diff)
downloadm4-b1fef201f5d121e25e5dd61ec8ca3eac41a899ba.tar.gz
Stage11: full circle for single argument references
-rw-r--r--src/input.c207
-rw-r--r--src/m4.h25
-rw-r--r--src/macro.c234
3 files changed, 348 insertions, 118 deletions
diff --git a/src/input.c b/src/input.c
index 09cf7088..bf92bc44 100644
--- a/src/input.c
+++ b/src/input.c
@@ -153,6 +153,7 @@ static bool input_change;
#define CHAR_EOF 256 /* Character return on EOF. */
#define CHAR_MACRO 257 /* Character return for MACRO token. */
+#define CHAR_QUOTE 258 /* Character return for quoted string. */
/* Quote chars. */
STRING rquote;
@@ -167,7 +168,7 @@ STRING ecomm;
# define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
/* Table of characters that can start a word. */
-static char *word_start;
+static char word_start[256];
/* Current regular expression for detecting words. */
static struct re_pattern_buffer word_regexp;
@@ -201,7 +202,7 @@ static const char *token_type_string (token_type);
| chain that starts at *START and ends at *END. START may be NULL |
| if *END is non-NULL. |
`-------------------------------------------------------------------*/
-static void
+void
make_text_link (struct obstack *obs, token_chain **start, token_chain **end)
{
token_chain *chain;
@@ -218,6 +219,7 @@ make_text_link (struct obstack *obs, token_chain **start, token_chain **end)
*start = chain;
*end = chain;
chain->next = NULL;
+ chain->quote_age = 0;
chain->str = str;
chain->len = len;
chain->level = -1;
@@ -361,6 +363,7 @@ push_token (token_data *token, int level)
next->u.u_c.chain = chain;
next->u.u_c.end = chain;
chain->next = NULL;
+ chain->quote_age = TOKEN_DATA_QUOTE_AGE (token);
chain->str = TOKEN_DATA_TEXT (token);
chain->len = TOKEN_DATA_LEN (token);
chain->level = level;
@@ -563,19 +566,6 @@ pop_wrapup (void)
return true;
}
-/*-------------------------------------------------------------------.
-| When a MACRO token is seen, next_token () uses init_macro_token () |
-| to retrieve the value of the function pointer and store it in TD. |
-`-------------------------------------------------------------------*/
-
-static void
-init_macro_token (token_data *td)
-{
- assert (isp->type == INPUT_MACRO);
- TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
- TOKEN_DATA_FUNC (td) = isp->u.func;
-}
-
/*--------------------------------------------------------------.
| Dump a representation of INPUT to the obstack OBS, for use in |
| tracing. |
@@ -699,16 +689,19 @@ peek_input (void)
| consisting of a newline alone is taken as belonging to the line it |
| ends, and the current line number is not incremented until the |
| next character is read. 99.9% of all calls will read from a |
-| string, so factor that out into a macro for speed. |
+| string, so factor that out into a macro for speed. If |
+| ALLOW_QUOTE, and the current input matches the current quote age, |
+| return CHAR_QUOTE and leave consumption of data for |
+| append_quote_token. |
`-------------------------------------------------------------------*/
-#define next_char() \
+#define next_char(AQ) \
(isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \
? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \
- : next_char_1 ())
+ : next_char_1 (AQ))
static int
-next_char_1 (void)
+next_char_1 (bool allow_quote)
{
int ch;
token_chain *chain;
@@ -765,10 +758,14 @@ next_char_1 (void)
chain = isp->u.u_c.chain;
while (chain)
{
+ if (allow_quote && chain->quote_age == current_quote_age)
+ return CHAR_QUOTE;
if (chain->str)
{
if (chain->len)
{
+ /* Partial consumption invalidates quote age. */
+ chain->quote_age = 0;
chain->len--;
return to_uchar (*chain->str++);
}
@@ -808,7 +805,7 @@ skip_line (const char *name)
const char *file = current_file;
int line = current_line;
- while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
+ while ((ch = next_char (false)) != CHAR_EOF && ch != '\n')
;
if (ch == CHAR_EOF)
/* current_file changed to "" if we see CHAR_EOF, use the
@@ -825,6 +822,49 @@ skip_line (const char *name)
}
+/*-------------------------------------------------------------------.
+| When a MACRO token is seen, next_token () uses init_macro_token () |
+| to retrieve the value of the function pointer and store it in TD. |
+`-------------------------------------------------------------------*/
+
+static void
+init_macro_token (token_data *td)
+{
+ assert (isp->type == INPUT_MACRO);
+ TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
+ TOKEN_DATA_FUNC (td) = isp->u.func;
+}
+
+/*-------------------------------------------------------------------.
+| When a QUOTE token is seen, convert TD to a composite (if it is |
+| not one already), consisting of any unfinished text on OBS, as |
+| well as the quoted token from the top of the input stack. Use OBS |
+| for any additional allocations needed to store the token chain. |
+`-------------------------------------------------------------------*/
+static void
+append_quote_token (struct obstack *obs, token_data *td)
+{
+ token_chain *src_chain = isp->u.u_c.chain;
+ token_chain *chain;
+ assert (isp->type == INPUT_CHAIN && obs && current_quote_age);
+
+ if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
+ {
+ TOKEN_DATA_TYPE (td) = TOKEN_COMP;
+ td->u.u_c.chain = td->u.u_c.end = NULL;
+ }
+ assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP);
+ make_text_link (obs, &td->u.u_c.chain, &td->u.u_c.end);
+ chain = (token_chain *) obstack_copy (obs, src_chain, sizeof *chain);
+ if (td->u.u_c.end)
+ td->u.u_c.end->next = chain;
+ else
+ td->u.u_c.chain = chain;
+ td->u.u_c.end = chain;
+ td->u.u_c.end->next = NULL;
+ isp->u.u_c.chain = src_chain->next;
+}
+
/*------------------------------------------------------------------.
| This function is for matching a string against a prefix of the |
| input stream. If the string S matches the input and CONSUME is |
@@ -848,14 +888,14 @@ match_input (const char *s, bool consume)
if (s[1] == '\0')
{
if (consume)
- (void) next_char ();
+ next_char (false);
return true; /* short match */
}
- (void) next_char ();
+ next_char (false);
for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
{
- (void) next_char ();
+ next_char (false);
n++;
if (*s == '\0') /* long match */
{
@@ -1016,7 +1056,6 @@ void
set_word_regexp (const char *caller, const char *regexp)
{
int i;
- char test[2];
const char *msg;
struct re_pattern_buffer new_word_regexp;
@@ -1048,15 +1087,10 @@ set_word_regexp (const char *caller, const char *regexp)
default_word_regexp = false;
set_quote_age ();
- if (word_start == NULL)
- word_start = (char *) xmalloc (256);
-
- word_start[0] = '\0';
- test[1] = '\0';
for (i = 1; i < 256; i++)
{
- test[0] = i;
- word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0;
+ char test = i;
+ word_start[i] = re_match (&word_regexp, &test, 1, 0, NULL) > 0;
}
}
@@ -1140,16 +1174,17 @@ safe_quotes (void)
/*--------------------------------------------------------------------.
-| Parse and return a single token from the input stream. A token |
-| can either be TOKEN_EOF, if the input_stack is empty; it can be |
-| TOKEN_STRING for a quoted string or comment; TOKEN_WORD for |
-| something that is a potential macro name; and TOKEN_SIMPLE for any |
-| single character that is not a part of any of the previous types. |
-| If LINE is not NULL, set *LINE to the line where the token starts. |
-| If OBS is not NULL, expand TOKEN_STRING directly into OBS rather |
-| than in token_stack temporary storage area. Report errors |
-| (unterminated comments or strings) on behalf of CALLER, if |
-| non-NULL. |
+| Parse a single token from the input stream, set TD to its |
+| contents, and return its type. A token is TOKEN_EOF if the |
+| input_stack is empty; TOKEN_STRING for a quoted string or comment; |
+| TOKEN_WORD for something that is a potential macro name; and |
+| TOKEN_SIMPLE for any single character that is not a part of any of |
+| the previous types. If LINE is not NULL, set *LINE to the line |
+| where the token starts. If OBS is not NULL, expand TOKEN_STRING |
+| directly into OBS rather than in token_stack temporary storage |
+| area, and TD could be a TOKEN_COMP instead of the usual |
+| TOKEN_TEXT. Report errors (unterminated comments or strings) on |
+| behalf of CALLER, if non-NULL. |
| |
| Next_token () returns the token type, and passes back a pointer to |
| the token data through TD. Non-string token text is collected on |
@@ -1165,7 +1200,6 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
int quote_level;
token_type type;
#ifdef ENABLE_CHANGEWORD
- int startpos;
char *orig_text = NULL;
#endif /* ENABLE_CHANGEWORD */
const char *file;
@@ -1181,19 +1215,20 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
line = &dummy;
/* Can't consume character until after CHAR_MACRO is handled. */
+ TOKEN_DATA_TYPE (td) = TOKEN_VOID;
ch = peek_input ();
if (ch == CHAR_EOF)
{
#ifdef DEBUG_INPUT
xfprintf (stderr, "next_token -> EOF\n");
#endif /* DEBUG_INPUT */
- next_char ();
+ next_char (false);
return TOKEN_EOF;
}
if (ch == CHAR_MACRO)
{
init_macro_token (td);
- next_char ();
+ next_char (false);
#ifdef DEBUG_INPUT
xfprintf (stderr, "next_token -> MACDEF (%s)\n",
find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
@@ -1201,7 +1236,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
return TOKEN_MACDEF;
}
- next_char (); /* Consume character we already peeked at. */
+ next_char (false); /* Consume character we already peeked at. */
file = current_file;
*line = current_line;
if (MATCH (ch, bcomm.string, true))
@@ -1209,11 +1244,14 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
if (obs)
obs_td = obs;
obstack_grow (obs_td, bcomm.string, bcomm.length);
- while ((ch = next_char ()) != CHAR_EOF
+ while ((ch = next_char (false)) < CHAR_EOF
&& !MATCH (ch, ecomm.string, true))
obstack_1grow (obs_td, ch);
if (ch != CHAR_EOF)
- obstack_grow (obs_td, ecomm.string, ecomm.length);
+ {
+ assert (ch < CHAR_EOF);
+ obstack_grow (obs_td, ecomm.string, ecomm.length);
+ }
else
/* Current_file changed to "" if we see CHAR_EOF, use the
previous value we stored earlier. */
@@ -1225,10 +1263,10 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
else if (default_word_regexp && (isalpha (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
- while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
+ while ((ch = peek_input ()) < CHAR_EOF && (isalnum (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
- (void) next_char ();
+ next_char (false);
}
type = TOKEN_WORD;
}
@@ -1241,20 +1279,17 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
while (1)
{
ch = peek_input ();
- if (ch == CHAR_EOF)
+ if (ch >= CHAR_EOF)
break;
obstack_1grow (&token_stack, ch);
- startpos = re_search (&word_regexp,
- (char *) obstack_base (&token_stack),
- obstack_object_size (&token_stack), 0, 0,
- &regs);
- if (startpos != 0 ||
- regs.end [0] != obstack_object_size (&token_stack))
+ if (re_match (&word_regexp, (char *) obstack_base (&token_stack),
+ obstack_object_size (&token_stack), 0, &regs)
+ != obstack_object_size (&token_stack))
{
obstack_blank (&token_stack, -1);
break;
}
- next_char ();
+ next_char (false);
}
obstack_1grow (&token_stack, '\0');
@@ -1297,14 +1332,16 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
quote_level = 1;
while (1)
{
- ch = next_char ();
+ ch = next_char (obs != NULL && current_quote_age);
if (ch == CHAR_EOF)
/* Current_file changed to "" if we see CHAR_EOF, use
the previous value we stored earlier. */
m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller,
_("end of file in string"));
- if (MATCH (ch, rquote.string, true))
+ if (ch == CHAR_QUOTE)
+ append_quote_token (obs, td);
+ else if (MATCH (ch, rquote.string, true))
{
if (--quote_level == 0)
break;
@@ -1316,35 +1353,49 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
obstack_grow (obs_td, lquote.string, lquote.length);
}
else
- obstack_1grow (obs_td, ch);
+ {
+ assert (ch < CHAR_EOF);
+ obstack_1grow (obs_td, ch);
+ }
}
type = TOKEN_STRING;
}
- TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
- TOKEN_DATA_LEN (td) = obstack_object_size (obs_td);
- if (obs_td != obs)
+ if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
{
- obstack_1grow (obs_td, '\0');
- TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td);
- }
- else
- TOKEN_DATA_TEXT (td) = NULL;
- TOKEN_DATA_QUOTE_AGE (td) = current_quote_age;
+ TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
+ TOKEN_DATA_LEN (td) = obstack_object_size (obs_td);
+ if (obs_td != obs)
+ {
+ obstack_1grow (obs_td, '\0');
+ TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td);
+ }
+ else
+ TOKEN_DATA_TEXT (td) = NULL;
+ TOKEN_DATA_QUOTE_AGE (td) = current_quote_age;
#ifdef ENABLE_CHANGEWORD
- if (orig_text == NULL)
- TOKEN_DATA_ORIG_TEXT (td) = TOKEN_DATA_TEXT (td);
+ if (orig_text == NULL)
+ TOKEN_DATA_ORIG_TEXT (td) = TOKEN_DATA_TEXT (td);
+ else
+ {
+ TOKEN_DATA_ORIG_TEXT (td) = orig_text;
+ TOKEN_DATA_LEN (td) = strlen (orig_text);
+ }
+#endif /* ENABLE_CHANGEWORD */
+#ifdef DEBUG_INPUT
+ xfprintf (stderr, "next_token -> %s (%s), len %zu\n",
+ token_type_string (type), TOKEN_DATA_TEXT (td),
+ TOKEN_DATA_LEN (td));
+#endif /* DEBUG_INPUT */
+ }
else
{
- TOKEN_DATA_ORIG_TEXT (td) = orig_text;
- TOKEN_DATA_LEN (td) = strlen (orig_text);
- }
-#endif /* ENABLE_CHANGEWORD */
+ assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP && type == TOKEN_STRING);
#ifdef DEBUG_INPUT
- xfprintf (stderr, "next_token -> %s (%s), len %zu\n",
- token_type_string (type), TOKEN_DATA_TEXT (td),
- TOKEN_DATA_LEN (td));
+ xfprintf (stderr, "next_token -> %s <chain>\n",
+ token_type_string (type));
#endif /* DEBUG_INPUT */
+ }
return type;
}
diff --git a/src/m4.h b/src/m4.h
index ea3947ff..474338b4 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -271,19 +271,20 @@ enum token_data_type
TOKEN_VOID, /* Token still being constructed, u is invalid. */
TOKEN_TEXT, /* Straight text, u.u_t is valid. */
TOKEN_FUNC, /* Builtin function definition, u.func is valid. */
- TOKEN_COMP /* Composite argument, u.chain is valid. */
+ TOKEN_COMP /* Composite argument, u.u_c is valid. */
};
/* Composite tokens are built of a linked list of chains. */
struct token_chain
{
- token_chain *next; /* Pointer to next link of chain. */
- const char *str; /* NUL-terminated string if text, else NULL. */
- size_t len; /* Length of str, else 0. */
- int level; /* Expansion level of link content, or -1. */
- macro_arguments *argv;/* Reference to earlier $@. */
- unsigned int index; /* Argument index within argv. */
- bool flatten; /* True to treat builtins as text. */
+ token_chain *next; /* Pointer to next link of chain. */
+ unsigned int quote_age; /* Quote_age of this link of chain, or 0. */
+ const char *str; /* NUL-terminated string if text, or NULL. */
+ size_t len; /* Length of str, else 0. */
+ int level; /* Expansion level of link content, or -1. */
+ macro_arguments *argv; /* Reference to earlier $@. */
+ unsigned int index; /* Argument index within argv. */
+ bool flatten; /* True to treat builtins as text. */
};
/* The content of a token or macro argument. */
@@ -319,7 +320,12 @@ struct token_data
/* Composite text: a linked list of straight text and $@
placeholders. */
- token_chain *chain;
+ struct
+ {
+ token_chain *chain; /* First link of the chain. */
+ token_chain *end; /* Last link of the chain. */
+ }
+ u_c;
}
u;
};
@@ -342,6 +348,7 @@ token_type next_token (token_data *, int *, struct obstack *, const char *);
void skip_line (const char *);
/* push back input */
+void make_text_link (struct obstack *, token_chain **, token_chain **);
void push_file (FILE *, const char *, bool);
void push_macro (builtin_func *);
struct obstack *push_string_init (void);
diff --git a/src/macro.c b/src/macro.c
index 228f82d5..bb8f4fa9 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -45,6 +45,9 @@ struct macro_arguments
bool_bitfield inuse : 1;
/* False if all arguments are just text or func, true if this argv
refers to another one. */
+ bool_bitfield wrapper : 1;
+ /* False if all arguments belong to this argv, true if some of them
+ include references to another. */
bool_bitfield has_ref : 1;
const char *argv0; /* The macro name being expanded. */
size_t argv0_len; /* Length of argv0. */
@@ -382,11 +385,16 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
return t == TOKEN_COMMA;
warn_builtin_concat (caller, TOKEN_DATA_FUNC (argp));
}
- obstack_1grow (obs, '\0');
- TOKEN_DATA_TYPE (argp) = TOKEN_TEXT;
- TOKEN_DATA_TEXT (argp) = (char *) obstack_finish (obs);
- TOKEN_DATA_LEN (argp) = len;
- TOKEN_DATA_QUOTE_AGE (argp) = age;
+ if (TOKEN_DATA_TYPE (argp) != TOKEN_COMP)
+ {
+ obstack_1grow (obs, '\0');
+ TOKEN_DATA_TYPE (argp) = TOKEN_TEXT;
+ TOKEN_DATA_TEXT (argp) = (char *) obstack_finish (obs);
+ TOKEN_DATA_LEN (argp) = len;
+ TOKEN_DATA_QUOTE_AGE (argp) = age;
+ }
+ else
+ make_text_link (obs, NULL, &argp->u.u_c.end);
return t == TOKEN_COMMA;
}
/* fallthru */
@@ -411,6 +419,23 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
case TOKEN_STRING:
if (!expand_token (obs, t, &td, line, first))
age = 0;
+ if (TOKEN_DATA_TYPE (&td) == TOKEN_COMP)
+ {
+ if (TOKEN_DATA_TYPE (argp) != TOKEN_COMP)
+ {
+ if (TOKEN_DATA_TYPE (argp) == TOKEN_FUNC)
+ warn_builtin_concat (caller, TOKEN_DATA_FUNC (argp));
+ TOKEN_DATA_TYPE (argp) = TOKEN_COMP;
+ argp->u.u_c.chain = td.u.u_c.chain;
+ argp->u.u_c.end = td.u.u_c.end;
+ }
+ else
+ {
+ assert (argp->u.u_c.end);
+ argp->u.u_c.end->next = td.u.u_c.chain;
+ argp->u.u_c.end = td.u.u_c.end;
+ }
+ }
break;
case TOKEN_MACDEF:
@@ -459,6 +484,7 @@ collect_arguments (symbol *sym, struct obstack *arguments,
args.argc = 1;
args.inuse = false;
+ args.wrapper = false;
args.has_ref = false;
args.argv0 = SYMBOL_NAME (sym);
args.argv0_len = strlen (args.argv0);
@@ -490,11 +516,14 @@ collect_arguments (symbol *sym, struct obstack *arguments,
&& TOKEN_DATA_LEN (tdp) > 0
&& TOKEN_DATA_QUOTE_AGE (tdp) != args.quote_age)
args.quote_age = 0;
+ else if (TOKEN_DATA_TYPE (tdp) == TOKEN_COMP)
+ args.has_ref = true;
}
while (more_args);
}
argv = (macro_arguments *) obstack_finish (argv_stack);
argv->argc = args.argc;
+ argv->has_ref = args.has_ref;
if (args.quote_age != quote_age ())
argv->quote_age = 0;
argv->arraylen = args.arraylen;
@@ -633,8 +662,23 @@ expand_macro (symbol *sym)
if (SYMBOL_DELETED (sym))
free_symbol (sym);
- /* If argv contains references, those refcounts can be reduced now. */
- // TODO - support references in argv
+ /* If argv contains references, those refcounts must be reduced now. */
+ if (argv->has_ref)
+ {
+ token_chain *chain;
+ size_t i;
+ for (i = 0; i < argv->arraylen; i++)
+ if (TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP)
+ {
+ chain = argv->array[i]->u.u_c.chain;
+ while (chain)
+ {
+ if (chain->level >= 0)
+ adjust_refcount (chain->level, false);
+ chain = chain->next;
+ }
+ }
+ }
/* We no longer need argv, so reduce the refcount. Additionally, if
no other references to argv were created, we can free our portion
@@ -698,7 +742,7 @@ arg_token (macro_arguments *argv, unsigned int index)
token_data *token;
assert (index && index < argv->argc);
- if (!argv->has_ref)
+ if (!argv->wrapper)
return argv->array[index - 1];
/* Must cycle through all tokens, until we find index, since a ref
may occupy multiple indices. */
@@ -707,7 +751,7 @@ arg_token (macro_arguments *argv, unsigned int index)
token = argv->array[i];
if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
{
- token_chain *chain = token->u.chain;
+ token_chain *chain = token->u.u_c.chain;
// TODO for now we support only a single-length $@ chain...
assert (!chain->next && !chain->str);
if (index < chain->argv->argc - (chain->index - 1))
@@ -731,14 +775,14 @@ static void
arg_mark (macro_arguments *argv)
{
argv->inuse = true;
- if (argv->has_ref)
+ if (argv->wrapper)
{
// TODO for now we support only a single-length $@ chain...
assert (argv->arraylen == 1
&& TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP
- && !argv->array[0]->u.chain->next
- && !argv->array[0]->u.chain->str);
- argv->array[0]->u.chain->argv->inuse = true;
+ && !argv->array[0]->u.u_c.chain->next
+ && !argv->array[0]->u.u_c.chain->str);
+ argv->array[0]->u.u_c.chain->argv->inuse = true;
}
}
@@ -761,17 +805,22 @@ arg_type (macro_arguments *argv, unsigned int index)
return TOKEN_TEXT;
token = arg_token (argv, index);
type = TOKEN_DATA_TYPE (token);
- assert (type != TOKEN_COMP);
+ /* Composite tokens are currently sequences of text only. */
+ if (type == TOKEN_COMP)
+ type = TOKEN_TEXT;
return type;
}
/* Given ARGV, return the text at argument INDEX. Abort if the
argument is not text. Index 0 is always text, and indices beyond
- argc return the empty string. */
+ argc return the empty string. The result is always NUL-terminated,
+ even if it includes embedded NUL characters. */
const char *
arg_text (macro_arguments *argv, unsigned int index)
{
token_data *token;
+ token_chain *chain;
+ struct obstack *obs;
if (index == 0)
return argv->argv0;
@@ -783,8 +832,19 @@ arg_text (macro_arguments *argv, unsigned int index)
case TOKEN_TEXT:
return TOKEN_DATA_TEXT (token);
case TOKEN_COMP:
- // TODO - how to concatenate multiple arguments? For now, we expect
- // only one element in the chain, and arg_token dereferences it...
+ // TODO - concatenate argv refs, or even functions? For now, we assume
+ // all chain elements are text.
+ chain = token->u.u_c.chain;
+ obs = arg_scratch ();
+ while (chain)
+ {
+ // TODO - cache compiled chains?
+ assert (chain->str);
+ obstack_grow (obs, chain->str, chain->len);
+ chain = chain->next;
+ }
+ obstack_1grow (obs, '\0');
+ return (char *) obstack_finish (obs);
default:
break;
}
@@ -801,14 +861,84 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb)
{
token_data *ta = arg_token (argv, indexa);
token_data *tb = arg_token (argv, indexb);
+ token_chain tmpa;
+ token_chain tmpb;
+ token_chain *ca = &tmpa;
+ token_chain *cb = &tmpb;
+ /* Quick tests. */
if (ta == &empty_token || tb == &empty_token)
return ta == tb;
+ if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT
+ && TOKEN_DATA_TYPE (tb) == TOKEN_TEXT)
+ return (TOKEN_DATA_LEN (ta) == TOKEN_DATA_LEN (tb)
+ && memcmp (TOKEN_DATA_TEXT (ta), TOKEN_DATA_TEXT (tb),
+ TOKEN_DATA_LEN (ta)) == 0);
+
+ /* Convert both arguments to chains, if not one already. */
// TODO - allow builtin tokens in the comparison?
- assert (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT
- && TOKEN_DATA_TYPE (tb) == TOKEN_TEXT);
- return (TOKEN_DATA_LEN (ta) == TOKEN_DATA_LEN (tb)
- && strcmp (TOKEN_DATA_TEXT (ta), TOKEN_DATA_TEXT (tb)) == 0);
+ if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT)
+ {
+ tmpa.next = NULL;
+ tmpa.str = TOKEN_DATA_TEXT (ta);
+ tmpa.len = TOKEN_DATA_LEN (ta);
+ }
+ else
+ {
+ assert (TOKEN_DATA_TYPE (ta) == TOKEN_COMP);
+ ca = ta->u.u_c.chain;
+ }
+ if (TOKEN_DATA_TYPE (tb) == TOKEN_TEXT)
+ {
+ tmpb.next = NULL;
+ tmpb.str = TOKEN_DATA_TEXT (tb);
+ tmpb.len = TOKEN_DATA_LEN (tb);
+ }
+ else
+ {
+ assert (TOKEN_DATA_TYPE (tb) == TOKEN_COMP);
+ cb = tb->u.u_c.chain;
+ }
+
+ /* Compare each link of the chain. */
+ while (ca && cb)
+ {
+ // TODO support comparison against $@ refs.
+ assert (ca->str && cb->str);
+ if (ca->len == cb->len)
+ {
+ if (memcmp (ca->str, cb->str, ca->len) != 0)
+ return false;
+ ca = ca->next;
+ cb = cb->next;
+ }
+ else if (ca->len < cb->len)
+ {
+ if (memcmp (ca->str, cb->str, ca->len) != 0)
+ return false;
+ tmpb.next = cb->next;
+ tmpb.str = cb->str + ca->len;
+ tmpb.len = cb->len - ca->len;
+ ca = ca->next;
+ cb = &tmpb;
+ }
+ else
+ {
+ assert (ca->len > cb->len);
+ if (memcmp (ca->str, cb->str, cb->len) != 0)
+ return false;
+ tmpa.next = ca->next;
+ tmpa.str = ca->str + cb->len;
+ tmpa.len = ca->len - cb->len;
+ ca = &tmpa;
+ cb = cb->next;
+ }
+ }
+
+ /* If we get this far, the two tokens are equal only if both chains
+ are exhausted. */
+ assert (ca != cb || ca == NULL);
+ return ca == cb;
}
/* Given ARGV, return true if argument INDEX is the empty string.
@@ -830,6 +960,8 @@ size_t
arg_len (macro_arguments *argv, unsigned int index)
{
token_data *token;
+ token_chain *chain;
+ size_t len;
if (index == 0)
return argv->argv0_len;
@@ -842,8 +974,18 @@ arg_len (macro_arguments *argv, unsigned int index)
assert ((token == &empty_token) == (TOKEN_DATA_LEN (token) == 0));
return TOKEN_DATA_LEN (token);
case TOKEN_COMP:
- // TODO - how to concatenate multiple arguments? For now, we expect
- // only one element in the chain, and arg_token dereferences it...
+ // TODO - concatenate argv refs, or even functions? For now, we assume
+ // all chain elements are text.
+ chain = token->u.u_c.chain;
+ len = 0;
+ while (chain)
+ {
+ assert (chain->str);
+ len += chain->len;
+ chain = chain->next;
+ }
+ assert (len);
+ return len;
default:
break;
}
@@ -892,12 +1034,12 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len,
/* When making a reference through a reference, point to the
original if possible. */
- if (argv->has_ref)
+ if (argv->wrapper)
{
// TODO for now we support only a single-length $@ chain...
assert (argv->arraylen == 1
&& TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP);
- chain = argv->array[0]->u.chain;
+ chain = argv->array[0]->u.u_c.chain;
assert (!chain->next && !chain->str);
argv = chain->argv;
index += chain->index - 1;
@@ -907,6 +1049,7 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len,
new_argv = (macro_arguments *)
obstack_alloc (obs, offsetof (macro_arguments, array));
new_argv->arraylen = 0;
+ new_argv->wrapper = false;
new_argv->has_ref = false;
}
else
@@ -918,10 +1061,12 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len,
chain = (token_chain *) obstack_alloc (obs, sizeof *chain);
new_argv->arraylen = 1;
new_argv->array[0] = token;
+ new_argv->wrapper = true;
new_argv->has_ref = true;
TOKEN_DATA_TYPE (token) = TOKEN_COMP;
- token->u.chain = chain;
+ token->u.u_c.chain = token->u.u_c.end = chain;
chain->next = NULL;
+ chain->quote_age = argv->quote_age;
chain->str = NULL;
chain->len = 0;
chain->level = expansion_level - 1;
@@ -955,9 +1100,23 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index)
return;
token = arg_token (argv, index);
// TODO handle func tokens?
- assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
- if (push_token (token, expansion_level - 1))
- arg_mark (argv);
+ if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
+ {
+ if (push_token (token, expansion_level - 1))
+ arg_mark (argv);
+ }
+ else if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
+ {
+ // TODO - really handle composites; for now, just flatten the
+ // composite and push its text
+ token_chain *chain = token->u.u_c.chain;
+ while (chain)
+ {
+ assert (chain->str);
+ obstack_grow (obs, chain->str, chain->len);
+ chain = chain->next;
+ }
+ }
}
/* Push series of comma-separated arguments from ARGV, which should
@@ -968,6 +1127,7 @@ void
push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
{
token_data *token;
+ token_chain *chain;
unsigned int i = skip ? 2 : 1;
const char *sep = ",";
size_t sep_len = 1;
@@ -1007,8 +1167,20 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
else
use_sep = true;
// TODO handle func tokens?
- assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
- inuse |= push_token (token, expansion_level - 1);
+ if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
+ inuse |= push_token (token, expansion_level - 1);
+ else
+ {
+ // TODO - handle composite text in push_token
+ assert (TOKEN_DATA_TYPE (token) == TOKEN_COMP);
+ chain = token->u.u_c.chain;
+ while (chain)
+ {
+ assert (chain->str);
+ obstack_grow (obs, chain->str, chain->len);
+ chain = chain->next;
+ }
+ }
}
if (quote)
obstack_grow (obs, rquote.string, rquote.length);