diff options
author | Eric Blake <ebb9@byu.net> | 2007-10-25 10:47:43 -0600 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2007-12-11 09:40:13 -0700 |
commit | 687dd577f66622e0b69a8cd03b7e5e76fa546c52 (patch) | |
tree | e118788b604717cf4f41d10d83d86cf7b5317c9d | |
parent | 6dcf7d2e3c5deac2d16ee9a29b6a307474603dc7 (diff) | |
download | m4-687dd577f66622e0b69a8cd03b7e5e76fa546c52.tar.gz |
Stage 7: add chained token support to input parser
-rw-r--r-- | src/input.c | 211 | ||||
-rw-r--r-- | src/m4.h | 11 | ||||
-rw-r--r-- | src/macro.c | 53 |
3 files changed, 234 insertions, 41 deletions
diff --git a/src/input.c b/src/input.c index 4e5d2990..f2f14e95 100644 --- a/src/input.c +++ b/src/input.c @@ -69,7 +69,8 @@ enum input_type { INPUT_STRING, /* String resulting from macro expansion. */ INPUT_FILE, /* File from command line or include. */ - INPUT_MACRO /* Builtin resulting from defn. */ + INPUT_MACRO, /* Builtin resulting from defn. */ + INPUT_CHAIN /* FIFO chain of separate strings and $@ refs. */ }; typedef enum input_type input_type; @@ -85,7 +86,8 @@ struct input_block { struct { - char *string; /* Remaining string value. */ + char *str; /* Remaining string value. */ + size_t len; /* Remaining length. */ } u_s; /* INPUT_STRING */ struct @@ -96,7 +98,13 @@ struct input_block bool_bitfield advance : 1; /* Track previous start_of_input_line. */ } u_f; /* INPUT_FILE */ - builtin_func *func; /* Pointer to macro's function. */ + builtin_func *func; /* INPUT_MACRO */ + struct + { + token_chain *chain; /* Current link in chain. */ + token_chain *end; /* Last link in chain. */ + } + u_c; /* INPUT_CHAIN */ } u; }; @@ -184,6 +192,36 @@ static const char *token_type_string (token_type); /*-------------------------------------------------------------------. +| Given an obstack OBS, capture any unfinished text as a link in the | +| chain that starts at *START and ends at *END. START may be NULL | +| if *END is non-NULL. | +`-------------------------------------------------------------------*/ +static void +make_text_link (struct obstack *obs, token_chain **start, token_chain **end) +{ + token_chain *chain; + size_t len = obstack_object_size (obs); + + assert (end && (start || *end)); + if (len) + { + char *str = (char *) obstack_finish (obs); + chain = (token_chain *) obstack_alloc (obs, sizeof *chain); + if (*end) + (*end)->next = chain; + else + *start = chain; + *end = chain; + chain->next = NULL; + chain->str = str; + chain->len = len; + chain->argv = NULL; + chain->index = 0; + chain->flatten = false; + } +} + +/*-------------------------------------------------------------------. | push_file () pushes an input file on the input stack, saving the | | current file name and line number. If next is non-NULL, this push | | invalidates a call to push_string_init (), whose storage is | @@ -272,6 +310,54 @@ push_string_init (void) } /*-------------------------------------------------------------------. +| If TOKEN contains text, then convert the current string into a | +| chain if it is not one already, and add the contents of TOKEN as a | +| new link in the chain. LEVEL describes the current expansion | +| level, or -1 if the contents of TOKEN reside entirely on the | +| current_input stack and TOKEN lives in temporary storage. Allows | +| gathering input from multiple locations, rather than copying | +| everything consecutively onto the input stack. Must be called | +| between push_string_init and push_string_finish. | +`-------------------------------------------------------------------*/ +void +push_token (token_data *token, int level) +{ + token_chain *chain; + + assert (next); + // TODO - also accept TOKEN_COMP chains + assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); + if (TOKEN_DATA_LEN (token) == 0) + return; + + if (next->type == INPUT_STRING) + { + next->type = INPUT_CHAIN; + next->u.u_c.chain = next->u.u_c.end = NULL; + } + make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end); + chain = (token_chain *) obstack_alloc (current_input, sizeof *chain); + if (next->u.u_c.end) + next->u.u_c.end->next = chain; + else + next->u.u_c.chain = chain; + next->u.u_c.end = chain; + chain->next = NULL; + if (level >= 0) + // TODO - use token as-is, rather than copying data. This implies + // lengthening lifetime of $@ arguments until the rescan is complete, + // rather than the current approach of freeing them during expand_macro + chain->str = (char *) obstack_copy (current_input, TOKEN_DATA_TEXT (token), + TOKEN_DATA_LEN (token)); + else + chain->str = TOKEN_DATA_TEXT (token); + chain->len = TOKEN_DATA_LEN (token); + chain->argv = NULL; + chain->index = 0; + chain->flatten = false; +} + +/*-------------------------------------------------------------------. | Last half of push_string (). If next is now NULL, a call to | | push_file () or push_macro () has invalidated the previous call to | | push_string_init (), so we just give up. If the new object is | @@ -294,10 +380,15 @@ push_string_finish (void) return NULL; } - if (len) + if (len || next->type == INPUT_CHAIN) { - obstack_1grow (current_input, '\0'); - next->u.u_s.string = (char *) obstack_finish (current_input); + if (next->type == INPUT_STRING) + { + next->u.u_s.str = (char *) obstack_finish (current_input); + next->u.u_s.len = len; + } + else + make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end); next->prev = isp; isp = next; input_change = true; @@ -327,7 +418,8 @@ push_wrapup (const char *s) i->type = INPUT_STRING; i->file = current_file; i->line = current_line; - i->u.u_s.string = (char *) obstack_copy0 (wrapup_stack, s, strlen (s)); + i->u.u_s.len = strlen (s); + i->u.u_s.str = (char *) obstack_copy (wrapup_stack, s, i->u.u_s.len); wsp = i; } @@ -345,12 +437,13 @@ static bool pop_input (bool cleanup) { input_block *tmp = isp->prev; + token_chain *chain; switch (isp->type) { case INPUT_STRING: - assert (!cleanup || !*isp->u.u_s.string); - if (*isp->u.u_s.string) + assert (!cleanup || !isp->u.u_s.len); + if (isp->u.u_s.len) return false; break; @@ -359,6 +452,26 @@ pop_input (bool cleanup) return false; break; + case INPUT_CHAIN: + chain = isp->u.u_c.chain; + assert (!chain || !cleanup); + while (chain) + { + if (chain->str) + { + if (chain->len) + return false; + } + else + { + // TODO - peek into argv + assert (!"implemented yet"); + abort (); + } + chain = chain->next; + } + break; + case INPUT_FILE: if (!cleanup) return false; @@ -451,12 +564,13 @@ void input_print (struct obstack *obs, const input_block *input) { int maxlen = max_debug_argument_length; + token_chain *chain; assert (input); switch (input->type) { case INPUT_STRING: - obstack_print (obs, input->u.u_s.string, SIZE_MAX, &maxlen); + obstack_print (obs, input->u.u_s.str, input->u.u_s.len, &maxlen); break; case INPUT_FILE: obstack_grow (obs, "<file: ", strlen ("<file: ")); @@ -472,6 +586,17 @@ input_print (struct obstack *obs, const input_block *input) obstack_1grow (obs, '>'); } break; + case INPUT_CHAIN: + chain = input->u.u_c.chain; + while (chain) + { + // TODO support argv refs as well + assert (chain->str); + if (obstack_print (obs, chain->str, chain->len, &maxlen)) + return; + chain = chain->next; + } + break; default: assert (!"input_print"); abort (); @@ -493,6 +618,7 @@ peek_input (void) { int ch; input_block *block = isp; + token_chain *chain; while (1) { @@ -502,10 +628,9 @@ peek_input (void) switch (block->type) { case INPUT_STRING: - ch = to_uchar (block->u.u_s.string[0]); - if (ch != '\0') - return ch; - break; + if (!block->u.u_s.len) + break; + return to_uchar (block->u.u_s.str[0]); case INPUT_FILE: ch = getc (block->u.u_f.fp); @@ -520,6 +645,25 @@ peek_input (void) case INPUT_MACRO: return CHAR_MACRO; + case INPUT_CHAIN: + chain = block->u.u_c.chain; + while (chain) + { + if (chain->str) + { + if (chain->len) + return to_uchar (chain->str[0]); + } + else + { + // TODO - peek into argv + assert (!"implemented yet"); + abort (); + } + chain = chain->next; + } + break; + default: assert (!"peek_input"); abort (); @@ -539,15 +683,15 @@ peek_input (void) `-------------------------------------------------------------------------*/ #define next_char() \ - (isp && isp->type == INPUT_STRING && isp->u.u_s.string[0] \ - && !input_change \ - ? to_uchar (*isp->u.u_s.string++) \ + (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \ + ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \ : next_char_1 ()) static int next_char_1 (void) { int ch; + token_chain *chain; while (1) { @@ -568,13 +712,10 @@ next_char_1 (void) switch (isp->type) { case INPUT_STRING: - ch = to_uchar (*isp->u.u_s.string); - if (ch != '\0') - { - isp->u.u_s.string++; - return ch; - } - break; + if (!isp->u.u_s.len) + break; + isp->u.u_s.len--; + return to_uchar (*isp->u.u_s.str++); case INPUT_FILE: if (start_of_input_line) @@ -600,6 +741,28 @@ next_char_1 (void) pop_input (true); return CHAR_MACRO; + case INPUT_CHAIN: + chain = isp->u.u_c.chain; + while (chain) + { + if (chain->str) + { + if (chain->len) + { + chain->len--; + return to_uchar (*chain->str++); + } + } + else + { + // TODO - read from argv + assert (!"implemented yet"); + abort (); + } + isp->u.u_c.chain = chain = chain->next; + } + break; + default: assert (!"next_char_1"); abort (); @@ -284,7 +284,7 @@ enum token_data_type struct token_chain { token_chain *next; /* Pointer to next link of chain. */ - char *str; /* NUL-terminated string if text, else NULL. */ + const char *str; /* NUL-terminated string if text, else NULL. */ size_t len; /* Length of str, else 0. */ macro_arguments *argv;/* Reference to earlier $@. */ unsigned int index; /* Argument index within argv. */ @@ -303,7 +303,7 @@ struct token_data cache for now. But it will be essential if we ever DO support NUL. */ size_t len; - char *text; + char *text; /* The contents of the token. */ /* The value of quote_age when this token was scanned. If this token is later encountered in the context of scanning a quoted string, and quote_age has not changed, @@ -312,7 +312,11 @@ struct token_data might change the parse on rescan. Ignored for 0 len. */ unsigned int quote_age; #ifdef ENABLE_CHANGEWORD - char *original_text; + /* If changeword is in effect, and contains a () group, then + this contains the entire token, while text contains the + portion that matched the () group to form a macro name. + Otherwise, this field is unused. */ + const char *original_text; #endif } u_t; @@ -346,6 +350,7 @@ void skip_line (const char *); void push_file (FILE *, const char *, bool); void push_macro (builtin_func *); struct obstack *push_string_init (void); +void push_token (token_data *, int); const input_block *push_string_finish (void); void push_wrapup (const char *); bool pop_wrapup (void); diff --git a/src/macro.c b/src/macro.c index c4eaaddb..873e82ca 100644 --- a/src/macro.c +++ b/src/macro.c @@ -727,8 +727,7 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) token = arg_token (argv, index); // TODO handle func tokens? assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); - // TODO actually push a reference, rather than copying data - obstack_grow (obs, TOKEN_DATA_TEXT (token), TOKEN_DATA_LEN (token)); + push_token (token, expansion_level - 1); } /* Push series of comma-separated arguments from ARGV, which should @@ -739,23 +738,49 @@ void push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) { token_data *token; - unsigned int i; - bool comma = false; + token_data sep; + unsigned int i = skip ? 2 : 1; + bool use_sep = false; + static char comma[2] = ","; + + if (i >= argv->argc) + return; - // TODO push reference, rather than copying data - for (i = skip ? 2 : 1; i < argv->argc; i++) + TOKEN_DATA_TYPE (&sep) = TOKEN_TEXT; + TOKEN_DATA_QUOTE_AGE (&sep) = 0; + if (quote) + { + char *str; + obstack_grow (obs, lquote.string, lquote.length); + TOKEN_DATA_LEN (&sep) = obstack_object_size (obs); + obstack_1grow (obs, '\0'); + str = (char *) obstack_finish (obs); + TOKEN_DATA_TEXT (&sep) = str; + push_token (&sep, -1); + obstack_grow (obs, rquote.string, rquote.length); + obstack_1grow (obs, ','); + obstack_grow0 (obs, lquote.string, lquote.length); + str = (char *) obstack_finish (obs); + TOKEN_DATA_TEXT (&sep) = str; + TOKEN_DATA_LEN (&sep) = rquote.length + 1 + lquote.length; + } + else + { + TOKEN_DATA_TEXT (&sep) = comma; + TOKEN_DATA_LEN (&sep) = 1; + } + // TODO push entire $@ reference, rather than pushing each arg + for ( ; i < argv->argc; i++) { token = arg_token (argv, i); - if (comma) - obstack_1grow (obs, ','); + if (use_sep) + push_token (&sep, -1); else - comma = true; + use_sep = true; // TODO handle func tokens? assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); - if (quote) - obstack_grow (obs, lquote.string, lquote.length); - obstack_grow (obs, TOKEN_DATA_TEXT (token), TOKEN_DATA_LEN (token)); - if (quote) - obstack_grow (obs, rquote.string, rquote.length); + push_token (token, expansion_level - 1); } + if (quote) + obstack_grow (obs, rquote.string, rquote.length); } |