diff options
author | Eric Blake <ebb9@byu.net> | 2007-10-30 11:17:51 -0600 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-01-26 21:06:41 -0700 |
commit | 290301246eefb3f58fe29b8ccd9118b23c76c61c (patch) | |
tree | 5d7b6991f170254bf4365ae8a357adb793221e92 | |
parent | d8324ac481f69682f6953ba3fb0c60cf67c7e8d7 (diff) | |
download | m4-290301246eefb3f58fe29b8ccd9118b23c76c61c.tar.gz |
Stage13: push composite text tokens
-rw-r--r-- | src/input.c | 174 | ||||
-rw-r--r-- | src/m4.h | 2 | ||||
-rw-r--r-- | src/macro.c | 38 |
3 files changed, 141 insertions, 73 deletions
diff --git a/src/input.c b/src/input.c index d8f347ef..17e16e47 100644 --- a/src/input.c +++ b/src/input.c @@ -313,37 +313,74 @@ push_string_init (void) return current_input; } -/*-------------------------------------------------------------------. -| If TOKEN contains text, then convert the current string into a | -| chain if it is not one already, and add the contents of TOKEN as a | -| new link in the chain. LEVEL describes the current expansion | -| level, or -1 if the contents of TOKEN reside entirely on the | -| current_input stack and TOKEN lives in temporary storage. Allows | -| gathering input from multiple locations, rather than copying | -| everything consecutively onto the input stack. Must be called | -| between push_string_init and push_string_finish. Return true only | -| if LEVEL is non-negative, and a reference was created to TOKEN, in | -| which case, the lifetime of TOKEN and its contents must last as | -| long as the input engine can parse references to it. | -`-------------------------------------------------------------------*/ +/*--------------------------------------------------------------------. +| This function allows gathering input from multiple locations, | +| rather than copying everything consecutively onto the input stack. | +| Must be called between push_string_init and push_string_finish. | +| | +| If TOKEN contains text, then convert the current input block into | +| a chain if it is not one already, and add the contents of TOKEN as | +| a new link in the chain. LEVEL describes the current expansion | +| level, or -1 if TOKEN is composite, its contents reside entirely | +| on the current_input stack, and TOKEN lives in temporary storage. | +| If TOKEN is a simple string, then it belongs to the current macro | +| expansion. If TOKEN is composite, then each text link has a level | +| of -1 if it belongs to the current macro expansion, otherwise it | +| is a back-reference where level tracks which stack it came from. | +| The resulting input block chain contains links with a level of -1 | +| if the text belongs to the input stack, otherwise the level where | +| the back-reference comes from. | +| | +| Return true only if a reference was created to the contents of | +| TOKEN, in which case, LEVEL was non-negative and the lifetime of | +| TOKEN and its contents must last as long as the input engine can | +| parse references to it. INUSE determines whether composite tokens | +| should favor creating back-references or copying text. | +`--------------------------------------------------------------------*/ bool -push_token (token_data *token, int level) +push_token (token_data *token, int level, bool inuse) { + token_chain *src_chain = NULL; token_chain *chain; - bool result = false; assert (next); // TODO - also accept TOKEN_COMP chains containing single $@ ref - assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT); /* Speed consideration - for short enough tokens, the speed and memory overhead of parsing another INPUT_CHAIN link outweighs the - time to inline the token text. */ - if (TOKEN_DATA_LEN (token) <= INPUT_INLINE_THRESHOLD) + time to inline the token text. But don't re-copy text if it + already lives on the obstack. */ + if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT) { - obstack_grow (current_input, TOKEN_DATA_TEXT (token), - TOKEN_DATA_LEN (token)); - return false; + assert (level >= 0); + if (TOKEN_DATA_LEN (token) <= INPUT_INLINE_THRESHOLD) + { + obstack_grow (current_input, TOKEN_DATA_TEXT (token), + TOKEN_DATA_LEN (token)); + return false; + } + } + else + { + /* For composite tokens, if argv is already in use, creating + additional references for long text segments is more + efficient in time. But if argv is not yet in use, and we + have a composite token, then the token must already contain a + back-reference, and memory usage is more efficient if we can + avoid using the current expand_macro, even if it means larger + copies. */ + assert (TOKEN_DATA_TYPE (token) == TOKEN_COMP); + src_chain = token->u.u_c.chain; + while (level >= 0 && src_chain && src_chain->type == CHAIN_STR + && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD + || (!inuse && src_chain->u.u_s.level == -1))) + { + obstack_grow (current_input, src_chain->u.u_s.str, + src_chain->u.u_s.len); + src_chain = src_chain->next; + } + if (!src_chain) + return false; } if (next->type == INPUT_STRING) @@ -352,24 +389,71 @@ push_token (token_data *token, int level) next->u.u_c.chain = next->u.u_c.end = NULL; } make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end); - chain = (token_chain *) obstack_alloc (current_input, sizeof *chain); - if (next->u.u_c.end) - next->u.u_c.end->next = chain; - else - next->u.u_c.chain = chain; - next->u.u_c.end = chain; - chain->next = NULL; - chain->type = CHAIN_STR; - chain->quote_age = TOKEN_DATA_QUOTE_AGE (token); - chain->u.u_s.str = TOKEN_DATA_TEXT (token); - chain->u.u_s.len = TOKEN_DATA_LEN (token); - chain->u.u_s.level = level; - if (level >= 0) + if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT) { + chain = (token_chain *) obstack_alloc (current_input, sizeof *chain); + if (next->u.u_c.end) + next->u.u_c.end->next = chain; + else + next->u.u_c.chain = chain; + next->u.u_c.end = chain; + chain->next = NULL; + chain->type = CHAIN_STR; + chain->quote_age = TOKEN_DATA_QUOTE_AGE (token); + chain->u.u_s.str = TOKEN_DATA_TEXT (token); + chain->u.u_s.len = TOKEN_DATA_LEN (token); + chain->u.u_s.level = level; adjust_refcount (level, true); - result = true; + inuse = true; } - return result; + while (src_chain) + { + if (level == -1) + { + /* Nothing to copy, since link already lives on obstack. */ + assert (src_chain->type != CHAIN_STR + || src_chain->u.u_s.level == -1); + chain = src_chain; + } + else + { + /* Allow inlining the final link with subsequent text. */ + if (!src_chain->next && src_chain->type == CHAIN_STR + && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD + || (!inuse && src_chain->u.u_s.level == -1))) + { + obstack_grow (current_input, src_chain->u.u_s.str, + src_chain->u.u_s.len); + break; + } + /* We must clone each link in the chain, since next_char + destructively modifies the chain it is parsing. */ + chain = (token_chain *) obstack_copy (current_input, src_chain, + sizeof *chain); + if (chain->type == CHAIN_STR && chain->u.u_s.level == -1) + { + if (chain->u.u_s.len <= INPUT_INLINE_THRESHOLD || !inuse) + chain->u.u_s.str = (char *) obstack_copy (current_input, + chain->u.u_s.str, + chain->u.u_s.len); + else + { + chain->u.u_s.level = level; + inuse = true; + } + } + } + if (next->u.u_c.end) + next->u.u_c.end->next = chain; + else + next->u.u_c.chain = chain; + next->u.u_c.end = chain; + assert (chain->type == CHAIN_STR); + if (chain->u.u_s.level >= 0) + adjust_refcount (chain->u.u_s.level, true); + src_chain = src_chain->next; + } + return inuse; } /*-------------------------------------------------------------------. @@ -849,7 +933,20 @@ append_quote_token (struct obstack *obs, token_data *td) { token_chain *src_chain = isp->u.u_c.chain; token_chain *chain; - assert (isp->type == INPUT_CHAIN && obs && current_quote_age); + + assert (isp->type == INPUT_CHAIN && obs && current_quote_age + && src_chain->type == CHAIN_STR && src_chain->u.u_s.level >= 0); + isp->u.u_c.chain = src_chain->next; + + /* Speed consideration - for short enough tokens, the speed and + memory overhead of parsing another INPUT_CHAIN link outweighs the + time to inline the token text. */ + if (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD) + { + obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len); + adjust_refcount (src_chain->u.u_s.level, false); + return; + } if (TOKEN_DATA_TYPE (td) == TOKEN_VOID) { @@ -864,8 +961,7 @@ append_quote_token (struct obstack *obs, token_data *td) else td->u.u_c.chain = chain; td->u.u_c.end = chain; - td->u.u_c.end->next = NULL; - isp->u.u_c.chain = src_chain->next; + chain->next = NULL; } /*------------------------------------------------------------------. @@ -378,7 +378,7 @@ void make_text_link (struct obstack *, token_chain **, token_chain **); void push_file (FILE *, const char *, bool); void push_macro (builtin_func *); struct obstack *push_string_init (void); -bool push_token (token_data *, int); +bool push_token (token_data *, int, bool); const input_block *push_string_finish (void); void push_wrapup (const char *); bool pop_wrapup (void); diff --git a/src/macro.c b/src/macro.c index 05d9bf32..baf5353c 100644 --- a/src/macro.c +++ b/src/macro.c @@ -808,7 +808,8 @@ arg_type (macro_arguments *argv, unsigned int index) return TOKEN_TEXT; token = arg_token (argv, index); type = TOKEN_DATA_TYPE (token); - /* Composite tokens are currently sequences of text only. */ + /* When accessed via the arg_* interface, composite tokens are + currently sequences of text only. */ if (type == TOKEN_COMP) type = TOKEN_TEXT; return type; @@ -1104,23 +1105,8 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) return; token = arg_token (argv, index); // TODO handle func tokens? - if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT) - { - if (push_token (token, expansion_level - 1)) - arg_mark (argv); - } - else if (TOKEN_DATA_TYPE (token) == TOKEN_COMP) - { - // TODO - really handle composites; for now, just flatten the - // composite and push its text - token_chain *chain = token->u.u_c.chain; - while (chain) - { - assert (chain->type == CHAIN_STR); - obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); - chain = chain->next; - } - } + if (push_token (token, expansion_level - 1, argv->inuse)) + arg_mark (argv); } /* Push series of comma-separated arguments from ARGV, which should @@ -1131,7 +1117,6 @@ void push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) { token_data *token; - token_chain *chain; unsigned int i = skip ? 2 : 1; const char *sep = ","; size_t sep_len = 1; @@ -1171,20 +1156,7 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) else use_sep = true; // TODO handle func tokens? - if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT) - inuse |= push_token (token, expansion_level - 1); - else - { - // TODO - handle composite text in push_token - assert (TOKEN_DATA_TYPE (token) == TOKEN_COMP); - chain = token->u.u_c.chain; - while (chain) - { - assert (chain->type == CHAIN_STR); - obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); - chain = chain->next; - } - } + inuse |= push_token (token, expansion_level - 1, inuse); } if (quote) obstack_grow (obs, curr_quote.str2, curr_quote.len2); |