summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-10-25 10:47:43 -0600
committerEric Blake <ebb9@byu.net>2007-12-11 09:40:13 -0700
commit687dd577f66622e0b69a8cd03b7e5e76fa546c52 (patch)
treee118788b604717cf4f41d10d83d86cf7b5317c9d
parent6dcf7d2e3c5deac2d16ee9a29b6a307474603dc7 (diff)
downloadm4-687dd577f66622e0b69a8cd03b7e5e76fa546c52.tar.gz
Stage 7: add chained token support to input parser
-rw-r--r--src/input.c211
-rw-r--r--src/m4.h11
-rw-r--r--src/macro.c53
3 files changed, 234 insertions, 41 deletions
diff --git a/src/input.c b/src/input.c
index 4e5d2990..f2f14e95 100644
--- a/src/input.c
+++ b/src/input.c
@@ -69,7 +69,8 @@ enum input_type
{
INPUT_STRING, /* String resulting from macro expansion. */
INPUT_FILE, /* File from command line or include. */
- INPUT_MACRO /* Builtin resulting from defn. */
+ INPUT_MACRO, /* Builtin resulting from defn. */
+ INPUT_CHAIN /* FIFO chain of separate strings and $@ refs. */
};
typedef enum input_type input_type;
@@ -85,7 +86,8 @@ struct input_block
{
struct
{
- char *string; /* Remaining string value. */
+ char *str; /* Remaining string value. */
+ size_t len; /* Remaining length. */
}
u_s; /* INPUT_STRING */
struct
@@ -96,7 +98,13 @@ struct input_block
bool_bitfield advance : 1; /* Track previous start_of_input_line. */
}
u_f; /* INPUT_FILE */
- builtin_func *func; /* Pointer to macro's function. */
+ builtin_func *func; /* INPUT_MACRO */
+ struct
+ {
+ token_chain *chain; /* Current link in chain. */
+ token_chain *end; /* Last link in chain. */
+ }
+ u_c; /* INPUT_CHAIN */
}
u;
};
@@ -184,6 +192,36 @@ static const char *token_type_string (token_type);
/*-------------------------------------------------------------------.
+| Given an obstack OBS, capture any unfinished text as a link in the |
+| chain that starts at *START and ends at *END. START may be NULL |
+| if *END is non-NULL. |
+`-------------------------------------------------------------------*/
+static void
+make_text_link (struct obstack *obs, token_chain **start, token_chain **end)
+{
+ token_chain *chain;
+ size_t len = obstack_object_size (obs);
+
+ assert (end && (start || *end));
+ if (len)
+ {
+ char *str = (char *) obstack_finish (obs);
+ chain = (token_chain *) obstack_alloc (obs, sizeof *chain);
+ if (*end)
+ (*end)->next = chain;
+ else
+ *start = chain;
+ *end = chain;
+ chain->next = NULL;
+ chain->str = str;
+ chain->len = len;
+ chain->argv = NULL;
+ chain->index = 0;
+ chain->flatten = false;
+ }
+}
+
+/*-------------------------------------------------------------------.
| push_file () pushes an input file on the input stack, saving the |
| current file name and line number. If next is non-NULL, this push |
| invalidates a call to push_string_init (), whose storage is |
@@ -272,6 +310,54 @@ push_string_init (void)
}
/*-------------------------------------------------------------------.
+| If TOKEN contains text, then convert the current string into a |
+| chain if it is not one already, and add the contents of TOKEN as a |
+| new link in the chain. LEVEL describes the current expansion |
+| level, or -1 if the contents of TOKEN reside entirely on the |
+| current_input stack and TOKEN lives in temporary storage. Allows |
+| gathering input from multiple locations, rather than copying |
+| everything consecutively onto the input stack. Must be called |
+| between push_string_init and push_string_finish. |
+`-------------------------------------------------------------------*/
+void
+push_token (token_data *token, int level)
+{
+ token_chain *chain;
+
+ assert (next);
+ // TODO - also accept TOKEN_COMP chains
+ assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
+ if (TOKEN_DATA_LEN (token) == 0)
+ return;
+
+ if (next->type == INPUT_STRING)
+ {
+ next->type = INPUT_CHAIN;
+ next->u.u_c.chain = next->u.u_c.end = NULL;
+ }
+ make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end);
+ chain = (token_chain *) obstack_alloc (current_input, sizeof *chain);
+ if (next->u.u_c.end)
+ next->u.u_c.end->next = chain;
+ else
+ next->u.u_c.chain = chain;
+ next->u.u_c.end = chain;
+ chain->next = NULL;
+ if (level >= 0)
+ // TODO - use token as-is, rather than copying data. This implies
+ // lengthening lifetime of $@ arguments until the rescan is complete,
+ // rather than the current approach of freeing them during expand_macro
+ chain->str = (char *) obstack_copy (current_input, TOKEN_DATA_TEXT (token),
+ TOKEN_DATA_LEN (token));
+ else
+ chain->str = TOKEN_DATA_TEXT (token);
+ chain->len = TOKEN_DATA_LEN (token);
+ chain->argv = NULL;
+ chain->index = 0;
+ chain->flatten = false;
+}
+
+/*-------------------------------------------------------------------.
| Last half of push_string (). If next is now NULL, a call to |
| push_file () or push_macro () has invalidated the previous call to |
| push_string_init (), so we just give up. If the new object is |
@@ -294,10 +380,15 @@ push_string_finish (void)
return NULL;
}
- if (len)
+ if (len || next->type == INPUT_CHAIN)
{
- obstack_1grow (current_input, '\0');
- next->u.u_s.string = (char *) obstack_finish (current_input);
+ if (next->type == INPUT_STRING)
+ {
+ next->u.u_s.str = (char *) obstack_finish (current_input);
+ next->u.u_s.len = len;
+ }
+ else
+ make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end);
next->prev = isp;
isp = next;
input_change = true;
@@ -327,7 +418,8 @@ push_wrapup (const char *s)
i->type = INPUT_STRING;
i->file = current_file;
i->line = current_line;
- i->u.u_s.string = (char *) obstack_copy0 (wrapup_stack, s, strlen (s));
+ i->u.u_s.len = strlen (s);
+ i->u.u_s.str = (char *) obstack_copy (wrapup_stack, s, i->u.u_s.len);
wsp = i;
}
@@ -345,12 +437,13 @@ static bool
pop_input (bool cleanup)
{
input_block *tmp = isp->prev;
+ token_chain *chain;
switch (isp->type)
{
case INPUT_STRING:
- assert (!cleanup || !*isp->u.u_s.string);
- if (*isp->u.u_s.string)
+ assert (!cleanup || !isp->u.u_s.len);
+ if (isp->u.u_s.len)
return false;
break;
@@ -359,6 +452,26 @@ pop_input (bool cleanup)
return false;
break;
+ case INPUT_CHAIN:
+ chain = isp->u.u_c.chain;
+ assert (!chain || !cleanup);
+ while (chain)
+ {
+ if (chain->str)
+ {
+ if (chain->len)
+ return false;
+ }
+ else
+ {
+ // TODO - peek into argv
+ assert (!"implemented yet");
+ abort ();
+ }
+ chain = chain->next;
+ }
+ break;
+
case INPUT_FILE:
if (!cleanup)
return false;
@@ -451,12 +564,13 @@ void
input_print (struct obstack *obs, const input_block *input)
{
int maxlen = max_debug_argument_length;
+ token_chain *chain;
assert (input);
switch (input->type)
{
case INPUT_STRING:
- obstack_print (obs, input->u.u_s.string, SIZE_MAX, &maxlen);
+ obstack_print (obs, input->u.u_s.str, input->u.u_s.len, &maxlen);
break;
case INPUT_FILE:
obstack_grow (obs, "<file: ", strlen ("<file: "));
@@ -472,6 +586,17 @@ input_print (struct obstack *obs, const input_block *input)
obstack_1grow (obs, '>');
}
break;
+ case INPUT_CHAIN:
+ chain = input->u.u_c.chain;
+ while (chain)
+ {
+ // TODO support argv refs as well
+ assert (chain->str);
+ if (obstack_print (obs, chain->str, chain->len, &maxlen))
+ return;
+ chain = chain->next;
+ }
+ break;
default:
assert (!"input_print");
abort ();
@@ -493,6 +618,7 @@ peek_input (void)
{
int ch;
input_block *block = isp;
+ token_chain *chain;
while (1)
{
@@ -502,10 +628,9 @@ peek_input (void)
switch (block->type)
{
case INPUT_STRING:
- ch = to_uchar (block->u.u_s.string[0]);
- if (ch != '\0')
- return ch;
- break;
+ if (!block->u.u_s.len)
+ break;
+ return to_uchar (block->u.u_s.str[0]);
case INPUT_FILE:
ch = getc (block->u.u_f.fp);
@@ -520,6 +645,25 @@ peek_input (void)
case INPUT_MACRO:
return CHAR_MACRO;
+ case INPUT_CHAIN:
+ chain = block->u.u_c.chain;
+ while (chain)
+ {
+ if (chain->str)
+ {
+ if (chain->len)
+ return to_uchar (chain->str[0]);
+ }
+ else
+ {
+ // TODO - peek into argv
+ assert (!"implemented yet");
+ abort ();
+ }
+ chain = chain->next;
+ }
+ break;
+
default:
assert (!"peek_input");
abort ();
@@ -539,15 +683,15 @@ peek_input (void)
`-------------------------------------------------------------------------*/
#define next_char() \
- (isp && isp->type == INPUT_STRING && isp->u.u_s.string[0] \
- && !input_change \
- ? to_uchar (*isp->u.u_s.string++) \
+ (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \
+ ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \
: next_char_1 ())
static int
next_char_1 (void)
{
int ch;
+ token_chain *chain;
while (1)
{
@@ -568,13 +712,10 @@ next_char_1 (void)
switch (isp->type)
{
case INPUT_STRING:
- ch = to_uchar (*isp->u.u_s.string);
- if (ch != '\0')
- {
- isp->u.u_s.string++;
- return ch;
- }
- break;
+ if (!isp->u.u_s.len)
+ break;
+ isp->u.u_s.len--;
+ return to_uchar (*isp->u.u_s.str++);
case INPUT_FILE:
if (start_of_input_line)
@@ -600,6 +741,28 @@ next_char_1 (void)
pop_input (true);
return CHAR_MACRO;
+ case INPUT_CHAIN:
+ chain = isp->u.u_c.chain;
+ while (chain)
+ {
+ if (chain->str)
+ {
+ if (chain->len)
+ {
+ chain->len--;
+ return to_uchar (*chain->str++);
+ }
+ }
+ else
+ {
+ // TODO - read from argv
+ assert (!"implemented yet");
+ abort ();
+ }
+ isp->u.u_c.chain = chain = chain->next;
+ }
+ break;
+
default:
assert (!"next_char_1");
abort ();
diff --git a/src/m4.h b/src/m4.h
index f7b0d37e..111f1678 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -284,7 +284,7 @@ enum token_data_type
struct token_chain
{
token_chain *next; /* Pointer to next link of chain. */
- char *str; /* NUL-terminated string if text, else NULL. */
+ const char *str; /* NUL-terminated string if text, else NULL. */
size_t len; /* Length of str, else 0. */
macro_arguments *argv;/* Reference to earlier $@. */
unsigned int index; /* Argument index within argv. */
@@ -303,7 +303,7 @@ struct token_data
cache for now. But it will be essential if we ever DO
support NUL. */
size_t len;
- char *text;
+ char *text; /* The contents of the token. */
/* The value of quote_age when this token was scanned. If
this token is later encountered in the context of
scanning a quoted string, and quote_age has not changed,
@@ -312,7 +312,11 @@ struct token_data
might change the parse on rescan. Ignored for 0 len. */
unsigned int quote_age;
#ifdef ENABLE_CHANGEWORD
- char *original_text;
+ /* If changeword is in effect, and contains a () group, then
+ this contains the entire token, while text contains the
+ portion that matched the () group to form a macro name.
+ Otherwise, this field is unused. */
+ const char *original_text;
#endif
}
u_t;
@@ -346,6 +350,7 @@ void skip_line (const char *);
void push_file (FILE *, const char *, bool);
void push_macro (builtin_func *);
struct obstack *push_string_init (void);
+void push_token (token_data *, int);
const input_block *push_string_finish (void);
void push_wrapup (const char *);
bool pop_wrapup (void);
diff --git a/src/macro.c b/src/macro.c
index c4eaaddb..873e82ca 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -727,8 +727,7 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index)
token = arg_token (argv, index);
// TODO handle func tokens?
assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
- // TODO actually push a reference, rather than copying data
- obstack_grow (obs, TOKEN_DATA_TEXT (token), TOKEN_DATA_LEN (token));
+ push_token (token, expansion_level - 1);
}
/* Push series of comma-separated arguments from ARGV, which should
@@ -739,23 +738,49 @@ void
push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
{
token_data *token;
- unsigned int i;
- bool comma = false;
+ token_data sep;
+ unsigned int i = skip ? 2 : 1;
+ bool use_sep = false;
+ static char comma[2] = ",";
+
+ if (i >= argv->argc)
+ return;
- // TODO push reference, rather than copying data
- for (i = skip ? 2 : 1; i < argv->argc; i++)
+ TOKEN_DATA_TYPE (&sep) = TOKEN_TEXT;
+ TOKEN_DATA_QUOTE_AGE (&sep) = 0;
+ if (quote)
+ {
+ char *str;
+ obstack_grow (obs, lquote.string, lquote.length);
+ TOKEN_DATA_LEN (&sep) = obstack_object_size (obs);
+ obstack_1grow (obs, '\0');
+ str = (char *) obstack_finish (obs);
+ TOKEN_DATA_TEXT (&sep) = str;
+ push_token (&sep, -1);
+ obstack_grow (obs, rquote.string, rquote.length);
+ obstack_1grow (obs, ',');
+ obstack_grow0 (obs, lquote.string, lquote.length);
+ str = (char *) obstack_finish (obs);
+ TOKEN_DATA_TEXT (&sep) = str;
+ TOKEN_DATA_LEN (&sep) = rquote.length + 1 + lquote.length;
+ }
+ else
+ {
+ TOKEN_DATA_TEXT (&sep) = comma;
+ TOKEN_DATA_LEN (&sep) = 1;
+ }
+ // TODO push entire $@ reference, rather than pushing each arg
+ for ( ; i < argv->argc; i++)
{
token = arg_token (argv, i);
- if (comma)
- obstack_1grow (obs, ',');
+ if (use_sep)
+ push_token (&sep, -1);
else
- comma = true;
+ use_sep = true;
// TODO handle func tokens?
assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
- if (quote)
- obstack_grow (obs, lquote.string, lquote.length);
- obstack_grow (obs, TOKEN_DATA_TEXT (token), TOKEN_DATA_LEN (token));
- if (quote)
- obstack_grow (obs, rquote.string, rquote.length);
+ push_token (token, expansion_level - 1);
}
+ if (quote)
+ obstack_grow (obs, rquote.string, rquote.length);
}