diff options
author | Eric Blake <ebb9@byu.net> | 2007-10-27 05:44:09 -0600 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-01-19 15:24:14 -0700 |
commit | b1fef201f5d121e25e5dd61ec8ca3eac41a899ba (patch) | |
tree | a76568b9a8d798657905f9090405a87b3c933628 | |
parent | 622fc8cb2cb6ce0fc7391a6414bb0aaffeec6fc0 (diff) | |
download | m4-b1fef201f5d121e25e5dd61ec8ca3eac41a899ba.tar.gz |
Stage11: full circle for single argument references
-rw-r--r-- | src/input.c | 207 | ||||
-rw-r--r-- | src/m4.h | 25 | ||||
-rw-r--r-- | src/macro.c | 234 |
3 files changed, 348 insertions, 118 deletions
diff --git a/src/input.c b/src/input.c index 09cf7088..bf92bc44 100644 --- a/src/input.c +++ b/src/input.c @@ -153,6 +153,7 @@ static bool input_change; #define CHAR_EOF 256 /* Character return on EOF. */ #define CHAR_MACRO 257 /* Character return for MACRO token. */ +#define CHAR_QUOTE 258 /* Character return for quoted string. */ /* Quote chars. */ STRING rquote; @@ -167,7 +168,7 @@ STRING ecomm; # define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*" /* Table of characters that can start a word. */ -static char *word_start; +static char word_start[256]; /* Current regular expression for detecting words. */ static struct re_pattern_buffer word_regexp; @@ -201,7 +202,7 @@ static const char *token_type_string (token_type); | chain that starts at *START and ends at *END. START may be NULL | | if *END is non-NULL. | `-------------------------------------------------------------------*/ -static void +void make_text_link (struct obstack *obs, token_chain **start, token_chain **end) { token_chain *chain; @@ -218,6 +219,7 @@ make_text_link (struct obstack *obs, token_chain **start, token_chain **end) *start = chain; *end = chain; chain->next = NULL; + chain->quote_age = 0; chain->str = str; chain->len = len; chain->level = -1; @@ -361,6 +363,7 @@ push_token (token_data *token, int level) next->u.u_c.chain = chain; next->u.u_c.end = chain; chain->next = NULL; + chain->quote_age = TOKEN_DATA_QUOTE_AGE (token); chain->str = TOKEN_DATA_TEXT (token); chain->len = TOKEN_DATA_LEN (token); chain->level = level; @@ -563,19 +566,6 @@ pop_wrapup (void) return true; } -/*-------------------------------------------------------------------. -| When a MACRO token is seen, next_token () uses init_macro_token () | -| to retrieve the value of the function pointer and store it in TD. | -`-------------------------------------------------------------------*/ - -static void -init_macro_token (token_data *td) -{ - assert (isp->type == INPUT_MACRO); - TOKEN_DATA_TYPE (td) = TOKEN_FUNC; - TOKEN_DATA_FUNC (td) = isp->u.func; -} - /*--------------------------------------------------------------. | Dump a representation of INPUT to the obstack OBS, for use in | | tracing. | @@ -699,16 +689,19 @@ peek_input (void) | consisting of a newline alone is taken as belonging to the line it | | ends, and the current line number is not incremented until the | | next character is read. 99.9% of all calls will read from a | -| string, so factor that out into a macro for speed. | +| string, so factor that out into a macro for speed. If | +| ALLOW_QUOTE, and the current input matches the current quote age, | +| return CHAR_QUOTE and leave consumption of data for | +| append_quote_token. | `-------------------------------------------------------------------*/ -#define next_char() \ +#define next_char(AQ) \ (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \ ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \ - : next_char_1 ()) + : next_char_1 (AQ)) static int -next_char_1 (void) +next_char_1 (bool allow_quote) { int ch; token_chain *chain; @@ -765,10 +758,14 @@ next_char_1 (void) chain = isp->u.u_c.chain; while (chain) { + if (allow_quote && chain->quote_age == current_quote_age) + return CHAR_QUOTE; if (chain->str) { if (chain->len) { + /* Partial consumption invalidates quote age. */ + chain->quote_age = 0; chain->len--; return to_uchar (*chain->str++); } @@ -808,7 +805,7 @@ skip_line (const char *name) const char *file = current_file; int line = current_line; - while ((ch = next_char ()) != CHAR_EOF && ch != '\n') + while ((ch = next_char (false)) != CHAR_EOF && ch != '\n') ; if (ch == CHAR_EOF) /* current_file changed to "" if we see CHAR_EOF, use the @@ -825,6 +822,49 @@ skip_line (const char *name) } +/*-------------------------------------------------------------------. +| When a MACRO token is seen, next_token () uses init_macro_token () | +| to retrieve the value of the function pointer and store it in TD. | +`-------------------------------------------------------------------*/ + +static void +init_macro_token (token_data *td) +{ + assert (isp->type == INPUT_MACRO); + TOKEN_DATA_TYPE (td) = TOKEN_FUNC; + TOKEN_DATA_FUNC (td) = isp->u.func; +} + +/*-------------------------------------------------------------------. +| When a QUOTE token is seen, convert TD to a composite (if it is | +| not one already), consisting of any unfinished text on OBS, as | +| well as the quoted token from the top of the input stack. Use OBS | +| for any additional allocations needed to store the token chain. | +`-------------------------------------------------------------------*/ +static void +append_quote_token (struct obstack *obs, token_data *td) +{ + token_chain *src_chain = isp->u.u_c.chain; + token_chain *chain; + assert (isp->type == INPUT_CHAIN && obs && current_quote_age); + + if (TOKEN_DATA_TYPE (td) == TOKEN_VOID) + { + TOKEN_DATA_TYPE (td) = TOKEN_COMP; + td->u.u_c.chain = td->u.u_c.end = NULL; + } + assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP); + make_text_link (obs, &td->u.u_c.chain, &td->u.u_c.end); + chain = (token_chain *) obstack_copy (obs, src_chain, sizeof *chain); + if (td->u.u_c.end) + td->u.u_c.end->next = chain; + else + td->u.u_c.chain = chain; + td->u.u_c.end = chain; + td->u.u_c.end->next = NULL; + isp->u.u_c.chain = src_chain->next; +} + /*------------------------------------------------------------------. | This function is for matching a string against a prefix of the | | input stream. If the string S matches the input and CONSUME is | @@ -848,14 +888,14 @@ match_input (const char *s, bool consume) if (s[1] == '\0') { if (consume) - (void) next_char (); + next_char (false); return true; /* short match */ } - (void) next_char (); + next_char (false); for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); ) { - (void) next_char (); + next_char (false); n++; if (*s == '\0') /* long match */ { @@ -1016,7 +1056,6 @@ void set_word_regexp (const char *caller, const char *regexp) { int i; - char test[2]; const char *msg; struct re_pattern_buffer new_word_regexp; @@ -1048,15 +1087,10 @@ set_word_regexp (const char *caller, const char *regexp) default_word_regexp = false; set_quote_age (); - if (word_start == NULL) - word_start = (char *) xmalloc (256); - - word_start[0] = '\0'; - test[1] = '\0'; for (i = 1; i < 256; i++) { - test[0] = i; - word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0; + char test = i; + word_start[i] = re_match (&word_regexp, &test, 1, 0, NULL) > 0; } } @@ -1140,16 +1174,17 @@ safe_quotes (void) /*--------------------------------------------------------------------. -| Parse and return a single token from the input stream. A token | -| can either be TOKEN_EOF, if the input_stack is empty; it can be | -| TOKEN_STRING for a quoted string or comment; TOKEN_WORD for | -| something that is a potential macro name; and TOKEN_SIMPLE for any | -| single character that is not a part of any of the previous types. | -| If LINE is not NULL, set *LINE to the line where the token starts. | -| If OBS is not NULL, expand TOKEN_STRING directly into OBS rather | -| than in token_stack temporary storage area. Report errors | -| (unterminated comments or strings) on behalf of CALLER, if | -| non-NULL. | +| Parse a single token from the input stream, set TD to its | +| contents, and return its type. A token is TOKEN_EOF if the | +| input_stack is empty; TOKEN_STRING for a quoted string or comment; | +| TOKEN_WORD for something that is a potential macro name; and | +| TOKEN_SIMPLE for any single character that is not a part of any of | +| the previous types. If LINE is not NULL, set *LINE to the line | +| where the token starts. If OBS is not NULL, expand TOKEN_STRING | +| directly into OBS rather than in token_stack temporary storage | +| area, and TD could be a TOKEN_COMP instead of the usual | +| TOKEN_TEXT. Report errors (unterminated comments or strings) on | +| behalf of CALLER, if non-NULL. | | | | Next_token () returns the token type, and passes back a pointer to | | the token data through TD. Non-string token text is collected on | @@ -1165,7 +1200,6 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) int quote_level; token_type type; #ifdef ENABLE_CHANGEWORD - int startpos; char *orig_text = NULL; #endif /* ENABLE_CHANGEWORD */ const char *file; @@ -1181,19 +1215,20 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) line = &dummy; /* Can't consume character until after CHAR_MACRO is handled. */ + TOKEN_DATA_TYPE (td) = TOKEN_VOID; ch = peek_input (); if (ch == CHAR_EOF) { #ifdef DEBUG_INPUT xfprintf (stderr, "next_token -> EOF\n"); #endif /* DEBUG_INPUT */ - next_char (); + next_char (false); return TOKEN_EOF; } if (ch == CHAR_MACRO) { init_macro_token (td); - next_char (); + next_char (false); #ifdef DEBUG_INPUT xfprintf (stderr, "next_token -> MACDEF (%s)\n", find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name); @@ -1201,7 +1236,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) return TOKEN_MACDEF; } - next_char (); /* Consume character we already peeked at. */ + next_char (false); /* Consume character we already peeked at. */ file = current_file; *line = current_line; if (MATCH (ch, bcomm.string, true)) @@ -1209,11 +1244,14 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) if (obs) obs_td = obs; obstack_grow (obs_td, bcomm.string, bcomm.length); - while ((ch = next_char ()) != CHAR_EOF + while ((ch = next_char (false)) < CHAR_EOF && !MATCH (ch, ecomm.string, true)) obstack_1grow (obs_td, ch); if (ch != CHAR_EOF) - obstack_grow (obs_td, ecomm.string, ecomm.length); + { + assert (ch < CHAR_EOF); + obstack_grow (obs_td, ecomm.string, ecomm.length); + } else /* Current_file changed to "" if we see CHAR_EOF, use the previous value we stored earlier. */ @@ -1225,10 +1263,10 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) else if (default_word_regexp && (isalpha (ch) || ch == '_')) { obstack_1grow (&token_stack, ch); - while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_')) + while ((ch = peek_input ()) < CHAR_EOF && (isalnum (ch) || ch == '_')) { obstack_1grow (&token_stack, ch); - (void) next_char (); + next_char (false); } type = TOKEN_WORD; } @@ -1241,20 +1279,17 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) while (1) { ch = peek_input (); - if (ch == CHAR_EOF) + if (ch >= CHAR_EOF) break; obstack_1grow (&token_stack, ch); - startpos = re_search (&word_regexp, - (char *) obstack_base (&token_stack), - obstack_object_size (&token_stack), 0, 0, - ®s); - if (startpos != 0 || - regs.end [0] != obstack_object_size (&token_stack)) + if (re_match (&word_regexp, (char *) obstack_base (&token_stack), + obstack_object_size (&token_stack), 0, ®s) + != obstack_object_size (&token_stack)) { obstack_blank (&token_stack, -1); break; } - next_char (); + next_char (false); } obstack_1grow (&token_stack, '\0'); @@ -1297,14 +1332,16 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) quote_level = 1; while (1) { - ch = next_char (); + ch = next_char (obs != NULL && current_quote_age); if (ch == CHAR_EOF) /* Current_file changed to "" if we see CHAR_EOF, use the previous value we stored earlier. */ m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller, _("end of file in string")); - if (MATCH (ch, rquote.string, true)) + if (ch == CHAR_QUOTE) + append_quote_token (obs, td); + else if (MATCH (ch, rquote.string, true)) { if (--quote_level == 0) break; @@ -1316,35 +1353,49 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) obstack_grow (obs_td, lquote.string, lquote.length); } else - obstack_1grow (obs_td, ch); + { + assert (ch < CHAR_EOF); + obstack_1grow (obs_td, ch); + } } type = TOKEN_STRING; } - TOKEN_DATA_TYPE (td) = TOKEN_TEXT; - TOKEN_DATA_LEN (td) = obstack_object_size (obs_td); - if (obs_td != obs) + if (TOKEN_DATA_TYPE (td) == TOKEN_VOID) { - obstack_1grow (obs_td, '\0'); - TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td); - } - else - TOKEN_DATA_TEXT (td) = NULL; - TOKEN_DATA_QUOTE_AGE (td) = current_quote_age; + TOKEN_DATA_TYPE (td) = TOKEN_TEXT; + TOKEN_DATA_LEN (td) = obstack_object_size (obs_td); + if (obs_td != obs) + { + obstack_1grow (obs_td, '\0'); + TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td); + } + else + TOKEN_DATA_TEXT (td) = NULL; + TOKEN_DATA_QUOTE_AGE (td) = current_quote_age; #ifdef ENABLE_CHANGEWORD - if (orig_text == NULL) - TOKEN_DATA_ORIG_TEXT (td) = TOKEN_DATA_TEXT (td); + if (orig_text == NULL) + TOKEN_DATA_ORIG_TEXT (td) = TOKEN_DATA_TEXT (td); + else + { + TOKEN_DATA_ORIG_TEXT (td) = orig_text; + TOKEN_DATA_LEN (td) = strlen (orig_text); + } +#endif /* ENABLE_CHANGEWORD */ +#ifdef DEBUG_INPUT + xfprintf (stderr, "next_token -> %s (%s), len %zu\n", + token_type_string (type), TOKEN_DATA_TEXT (td), + TOKEN_DATA_LEN (td)); +#endif /* DEBUG_INPUT */ + } else { - TOKEN_DATA_ORIG_TEXT (td) = orig_text; - TOKEN_DATA_LEN (td) = strlen (orig_text); - } -#endif /* ENABLE_CHANGEWORD */ + assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP && type == TOKEN_STRING); #ifdef DEBUG_INPUT - xfprintf (stderr, "next_token -> %s (%s), len %zu\n", - token_type_string (type), TOKEN_DATA_TEXT (td), - TOKEN_DATA_LEN (td)); + xfprintf (stderr, "next_token -> %s <chain>\n", + token_type_string (type)); #endif /* DEBUG_INPUT */ + } return type; } @@ -271,19 +271,20 @@ enum token_data_type TOKEN_VOID, /* Token still being constructed, u is invalid. */ TOKEN_TEXT, /* Straight text, u.u_t is valid. */ TOKEN_FUNC, /* Builtin function definition, u.func is valid. */ - TOKEN_COMP /* Composite argument, u.chain is valid. */ + TOKEN_COMP /* Composite argument, u.u_c is valid. */ }; /* Composite tokens are built of a linked list of chains. */ struct token_chain { - token_chain *next; /* Pointer to next link of chain. */ - const char *str; /* NUL-terminated string if text, else NULL. */ - size_t len; /* Length of str, else 0. */ - int level; /* Expansion level of link content, or -1. */ - macro_arguments *argv;/* Reference to earlier $@. */ - unsigned int index; /* Argument index within argv. */ - bool flatten; /* True to treat builtins as text. */ + token_chain *next; /* Pointer to next link of chain. */ + unsigned int quote_age; /* Quote_age of this link of chain, or 0. */ + const char *str; /* NUL-terminated string if text, or NULL. */ + size_t len; /* Length of str, else 0. */ + int level; /* Expansion level of link content, or -1. */ + macro_arguments *argv; /* Reference to earlier $@. */ + unsigned int index; /* Argument index within argv. */ + bool flatten; /* True to treat builtins as text. */ }; /* The content of a token or macro argument. */ @@ -319,7 +320,12 @@ struct token_data /* Composite text: a linked list of straight text and $@ placeholders. */ - token_chain *chain; + struct + { + token_chain *chain; /* First link of the chain. */ + token_chain *end; /* Last link of the chain. */ + } + u_c; } u; }; @@ -342,6 +348,7 @@ token_type next_token (token_data *, int *, struct obstack *, const char *); void skip_line (const char *); /* push back input */ +void make_text_link (struct obstack *, token_chain **, token_chain **); void push_file (FILE *, const char *, bool); void push_macro (builtin_func *); struct obstack *push_string_init (void); diff --git a/src/macro.c b/src/macro.c index 228f82d5..bb8f4fa9 100644 --- a/src/macro.c +++ b/src/macro.c @@ -45,6 +45,9 @@ struct macro_arguments bool_bitfield inuse : 1; /* False if all arguments are just text or func, true if this argv refers to another one. */ + bool_bitfield wrapper : 1; + /* False if all arguments belong to this argv, true if some of them + include references to another. */ bool_bitfield has_ref : 1; const char *argv0; /* The macro name being expanded. */ size_t argv0_len; /* Length of argv0. */ @@ -382,11 +385,16 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) return t == TOKEN_COMMA; warn_builtin_concat (caller, TOKEN_DATA_FUNC (argp)); } - obstack_1grow (obs, '\0'); - TOKEN_DATA_TYPE (argp) = TOKEN_TEXT; - TOKEN_DATA_TEXT (argp) = (char *) obstack_finish (obs); - TOKEN_DATA_LEN (argp) = len; - TOKEN_DATA_QUOTE_AGE (argp) = age; + if (TOKEN_DATA_TYPE (argp) != TOKEN_COMP) + { + obstack_1grow (obs, '\0'); + TOKEN_DATA_TYPE (argp) = TOKEN_TEXT; + TOKEN_DATA_TEXT (argp) = (char *) obstack_finish (obs); + TOKEN_DATA_LEN (argp) = len; + TOKEN_DATA_QUOTE_AGE (argp) = age; + } + else + make_text_link (obs, NULL, &argp->u.u_c.end); return t == TOKEN_COMMA; } /* fallthru */ @@ -411,6 +419,23 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller) case TOKEN_STRING: if (!expand_token (obs, t, &td, line, first)) age = 0; + if (TOKEN_DATA_TYPE (&td) == TOKEN_COMP) + { + if (TOKEN_DATA_TYPE (argp) != TOKEN_COMP) + { + if (TOKEN_DATA_TYPE (argp) == TOKEN_FUNC) + warn_builtin_concat (caller, TOKEN_DATA_FUNC (argp)); + TOKEN_DATA_TYPE (argp) = TOKEN_COMP; + argp->u.u_c.chain = td.u.u_c.chain; + argp->u.u_c.end = td.u.u_c.end; + } + else + { + assert (argp->u.u_c.end); + argp->u.u_c.end->next = td.u.u_c.chain; + argp->u.u_c.end = td.u.u_c.end; + } + } break; case TOKEN_MACDEF: @@ -459,6 +484,7 @@ collect_arguments (symbol *sym, struct obstack *arguments, args.argc = 1; args.inuse = false; + args.wrapper = false; args.has_ref = false; args.argv0 = SYMBOL_NAME (sym); args.argv0_len = strlen (args.argv0); @@ -490,11 +516,14 @@ collect_arguments (symbol *sym, struct obstack *arguments, && TOKEN_DATA_LEN (tdp) > 0 && TOKEN_DATA_QUOTE_AGE (tdp) != args.quote_age) args.quote_age = 0; + else if (TOKEN_DATA_TYPE (tdp) == TOKEN_COMP) + args.has_ref = true; } while (more_args); } argv = (macro_arguments *) obstack_finish (argv_stack); argv->argc = args.argc; + argv->has_ref = args.has_ref; if (args.quote_age != quote_age ()) argv->quote_age = 0; argv->arraylen = args.arraylen; @@ -633,8 +662,23 @@ expand_macro (symbol *sym) if (SYMBOL_DELETED (sym)) free_symbol (sym); - /* If argv contains references, those refcounts can be reduced now. */ - // TODO - support references in argv + /* If argv contains references, those refcounts must be reduced now. */ + if (argv->has_ref) + { + token_chain *chain; + size_t i; + for (i = 0; i < argv->arraylen; i++) + if (TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP) + { + chain = argv->array[i]->u.u_c.chain; + while (chain) + { + if (chain->level >= 0) + adjust_refcount (chain->level, false); + chain = chain->next; + } + } + } /* We no longer need argv, so reduce the refcount. Additionally, if no other references to argv were created, we can free our portion @@ -698,7 +742,7 @@ arg_token (macro_arguments *argv, unsigned int index) token_data *token; assert (index && index < argv->argc); - if (!argv->has_ref) + if (!argv->wrapper) return argv->array[index - 1]; /* Must cycle through all tokens, until we find index, since a ref may occupy multiple indices. */ @@ -707,7 +751,7 @@ arg_token (macro_arguments *argv, unsigned int index) token = argv->array[i]; if (TOKEN_DATA_TYPE (token) == TOKEN_COMP) { - token_chain *chain = token->u.chain; + token_chain *chain = token->u.u_c.chain; // TODO for now we support only a single-length $@ chain... assert (!chain->next && !chain->str); if (index < chain->argv->argc - (chain->index - 1)) @@ -731,14 +775,14 @@ static void arg_mark (macro_arguments *argv) { argv->inuse = true; - if (argv->has_ref) + if (argv->wrapper) { // TODO for now we support only a single-length $@ chain... assert (argv->arraylen == 1 && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP - && !argv->array[0]->u.chain->next - && !argv->array[0]->u.chain->str); - argv->array[0]->u.chain->argv->inuse = true; + && !argv->array[0]->u.u_c.chain->next + && !argv->array[0]->u.u_c.chain->str); + argv->array[0]->u.u_c.chain->argv->inuse = true; } } @@ -761,17 +805,22 @@ arg_type (macro_arguments *argv, unsigned int index) return TOKEN_TEXT; token = arg_token (argv, index); type = TOKEN_DATA_TYPE (token); - assert (type != TOKEN_COMP); + /* Composite tokens are currently sequences of text only. */ + if (type == TOKEN_COMP) + type = TOKEN_TEXT; return type; } /* Given ARGV, return the text at argument INDEX. Abort if the argument is not text. Index 0 is always text, and indices beyond - argc return the empty string. */ + argc return the empty string. The result is always NUL-terminated, + even if it includes embedded NUL characters. */ const char * arg_text (macro_arguments *argv, unsigned int index) { token_data *token; + token_chain *chain; + struct obstack *obs; if (index == 0) return argv->argv0; @@ -783,8 +832,19 @@ arg_text (macro_arguments *argv, unsigned int index) case TOKEN_TEXT: return TOKEN_DATA_TEXT (token); case TOKEN_COMP: - // TODO - how to concatenate multiple arguments? For now, we expect - // only one element in the chain, and arg_token dereferences it... + // TODO - concatenate argv refs, or even functions? For now, we assume + // all chain elements are text. + chain = token->u.u_c.chain; + obs = arg_scratch (); + while (chain) + { + // TODO - cache compiled chains? + assert (chain->str); + obstack_grow (obs, chain->str, chain->len); + chain = chain->next; + } + obstack_1grow (obs, '\0'); + return (char *) obstack_finish (obs); default: break; } @@ -801,14 +861,84 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) { token_data *ta = arg_token (argv, indexa); token_data *tb = arg_token (argv, indexb); + token_chain tmpa; + token_chain tmpb; + token_chain *ca = &tmpa; + token_chain *cb = &tmpb; + /* Quick tests. */ if (ta == &empty_token || tb == &empty_token) return ta == tb; + if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT + && TOKEN_DATA_TYPE (tb) == TOKEN_TEXT) + return (TOKEN_DATA_LEN (ta) == TOKEN_DATA_LEN (tb) + && memcmp (TOKEN_DATA_TEXT (ta), TOKEN_DATA_TEXT (tb), + TOKEN_DATA_LEN (ta)) == 0); + + /* Convert both arguments to chains, if not one already. */ // TODO - allow builtin tokens in the comparison? - assert (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT - && TOKEN_DATA_TYPE (tb) == TOKEN_TEXT); - return (TOKEN_DATA_LEN (ta) == TOKEN_DATA_LEN (tb) - && strcmp (TOKEN_DATA_TEXT (ta), TOKEN_DATA_TEXT (tb)) == 0); + if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT) + { + tmpa.next = NULL; + tmpa.str = TOKEN_DATA_TEXT (ta); + tmpa.len = TOKEN_DATA_LEN (ta); + } + else + { + assert (TOKEN_DATA_TYPE (ta) == TOKEN_COMP); + ca = ta->u.u_c.chain; + } + if (TOKEN_DATA_TYPE (tb) == TOKEN_TEXT) + { + tmpb.next = NULL; + tmpb.str = TOKEN_DATA_TEXT (tb); + tmpb.len = TOKEN_DATA_LEN (tb); + } + else + { + assert (TOKEN_DATA_TYPE (tb) == TOKEN_COMP); + cb = tb->u.u_c.chain; + } + + /* Compare each link of the chain. */ + while (ca && cb) + { + // TODO support comparison against $@ refs. + assert (ca->str && cb->str); + if (ca->len == cb->len) + { + if (memcmp (ca->str, cb->str, ca->len) != 0) + return false; + ca = ca->next; + cb = cb->next; + } + else if (ca->len < cb->len) + { + if (memcmp (ca->str, cb->str, ca->len) != 0) + return false; + tmpb.next = cb->next; + tmpb.str = cb->str + ca->len; + tmpb.len = cb->len - ca->len; + ca = ca->next; + cb = &tmpb; + } + else + { + assert (ca->len > cb->len); + if (memcmp (ca->str, cb->str, cb->len) != 0) + return false; + tmpa.next = ca->next; + tmpa.str = ca->str + cb->len; + tmpa.len = ca->len - cb->len; + ca = &tmpa; + cb = cb->next; + } + } + + /* If we get this far, the two tokens are equal only if both chains + are exhausted. */ + assert (ca != cb || ca == NULL); + return ca == cb; } /* Given ARGV, return true if argument INDEX is the empty string. @@ -830,6 +960,8 @@ size_t arg_len (macro_arguments *argv, unsigned int index) { token_data *token; + token_chain *chain; + size_t len; if (index == 0) return argv->argv0_len; @@ -842,8 +974,18 @@ arg_len (macro_arguments *argv, unsigned int index) assert ((token == &empty_token) == (TOKEN_DATA_LEN (token) == 0)); return TOKEN_DATA_LEN (token); case TOKEN_COMP: - // TODO - how to concatenate multiple arguments? For now, we expect - // only one element in the chain, and arg_token dereferences it... + // TODO - concatenate argv refs, or even functions? For now, we assume + // all chain elements are text. + chain = token->u.u_c.chain; + len = 0; + while (chain) + { + assert (chain->str); + len += chain->len; + chain = chain->next; + } + assert (len); + return len; default: break; } @@ -892,12 +1034,12 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len, /* When making a reference through a reference, point to the original if possible. */ - if (argv->has_ref) + if (argv->wrapper) { // TODO for now we support only a single-length $@ chain... assert (argv->arraylen == 1 && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP); - chain = argv->array[0]->u.chain; + chain = argv->array[0]->u.u_c.chain; assert (!chain->next && !chain->str); argv = chain->argv; index += chain->index - 1; @@ -907,6 +1049,7 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len, new_argv = (macro_arguments *) obstack_alloc (obs, offsetof (macro_arguments, array)); new_argv->arraylen = 0; + new_argv->wrapper = false; new_argv->has_ref = false; } else @@ -918,10 +1061,12 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len, chain = (token_chain *) obstack_alloc (obs, sizeof *chain); new_argv->arraylen = 1; new_argv->array[0] = token; + new_argv->wrapper = true; new_argv->has_ref = true; TOKEN_DATA_TYPE (token) = TOKEN_COMP; - token->u.chain = chain; + token->u.u_c.chain = token->u.u_c.end = chain; chain->next = NULL; + chain->quote_age = argv->quote_age; chain->str = NULL; chain->len = 0; chain->level = expansion_level - 1; @@ -955,9 +1100,23 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) return; token = arg_token (argv, index); // TODO handle func tokens? - assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); - if (push_token (token, expansion_level - 1)) - arg_mark (argv); + if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT) + { + if (push_token (token, expansion_level - 1)) + arg_mark (argv); + } + else if (TOKEN_DATA_TYPE (token) == TOKEN_COMP) + { + // TODO - really handle composites; for now, just flatten the + // composite and push its text + token_chain *chain = token->u.u_c.chain; + while (chain) + { + assert (chain->str); + obstack_grow (obs, chain->str, chain->len); + chain = chain->next; + } + } } /* Push series of comma-separated arguments from ARGV, which should @@ -968,6 +1127,7 @@ void push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) { token_data *token; + token_chain *chain; unsigned int i = skip ? 2 : 1; const char *sep = ","; size_t sep_len = 1; @@ -1007,8 +1167,20 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) else use_sep = true; // TODO handle func tokens? - assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); - inuse |= push_token (token, expansion_level - 1); + if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT) + inuse |= push_token (token, expansion_level - 1); + else + { + // TODO - handle composite text in push_token + assert (TOKEN_DATA_TYPE (token) == TOKEN_COMP); + chain = token->u.u_c.chain; + while (chain) + { + assert (chain->str); + obstack_grow (obs, chain->str, chain->len); + chain = chain->next; + } + } } if (quote) obstack_grow (obs, rquote.string, rquote.length); |