summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-10-30 11:17:51 -0600
committerEric Blake <ebb9@byu.net>2008-01-26 21:06:41 -0700
commit290301246eefb3f58fe29b8ccd9118b23c76c61c (patch)
tree5d7b6991f170254bf4365ae8a357adb793221e92
parentd8324ac481f69682f6953ba3fb0c60cf67c7e8d7 (diff)
downloadm4-290301246eefb3f58fe29b8ccd9118b23c76c61c.tar.gz
Stage13: push composite text tokens
-rw-r--r--src/input.c174
-rw-r--r--src/m4.h2
-rw-r--r--src/macro.c38
3 files changed, 141 insertions, 73 deletions
diff --git a/src/input.c b/src/input.c
index d8f347ef..17e16e47 100644
--- a/src/input.c
+++ b/src/input.c
@@ -313,37 +313,74 @@ push_string_init (void)
return current_input;
}
-/*-------------------------------------------------------------------.
-| If TOKEN contains text, then convert the current string into a |
-| chain if it is not one already, and add the contents of TOKEN as a |
-| new link in the chain. LEVEL describes the current expansion |
-| level, or -1 if the contents of TOKEN reside entirely on the |
-| current_input stack and TOKEN lives in temporary storage. Allows |
-| gathering input from multiple locations, rather than copying |
-| everything consecutively onto the input stack. Must be called |
-| between push_string_init and push_string_finish. Return true only |
-| if LEVEL is non-negative, and a reference was created to TOKEN, in |
-| which case, the lifetime of TOKEN and its contents must last as |
-| long as the input engine can parse references to it. |
-`-------------------------------------------------------------------*/
+/*--------------------------------------------------------------------.
+| This function allows gathering input from multiple locations, |
+| rather than copying everything consecutively onto the input stack. |
+| Must be called between push_string_init and push_string_finish. |
+| |
+| If TOKEN contains text, then convert the current input block into |
+| a chain if it is not one already, and add the contents of TOKEN as |
+| a new link in the chain. LEVEL describes the current expansion |
+| level, or -1 if TOKEN is composite, its contents reside entirely |
+| on the current_input stack, and TOKEN lives in temporary storage. |
+| If TOKEN is a simple string, then it belongs to the current macro |
+| expansion. If TOKEN is composite, then each text link has a level |
+| of -1 if it belongs to the current macro expansion, otherwise it |
+| is a back-reference where level tracks which stack it came from. |
+| The resulting input block chain contains links with a level of -1 |
+| if the text belongs to the input stack, otherwise the level where |
+| the back-reference comes from. |
+| |
+| Return true only if a reference was created to the contents of |
+| TOKEN, in which case, LEVEL was non-negative and the lifetime of |
+| TOKEN and its contents must last as long as the input engine can |
+| parse references to it. INUSE determines whether composite tokens |
+| should favor creating back-references or copying text. |
+`--------------------------------------------------------------------*/
bool
-push_token (token_data *token, int level)
+push_token (token_data *token, int level, bool inuse)
{
+ token_chain *src_chain = NULL;
token_chain *chain;
- bool result = false;
assert (next);
// TODO - also accept TOKEN_COMP chains containing single $@ ref
- assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
/* Speed consideration - for short enough tokens, the speed and
memory overhead of parsing another INPUT_CHAIN link outweighs the
- time to inline the token text. */
- if (TOKEN_DATA_LEN (token) <= INPUT_INLINE_THRESHOLD)
+ time to inline the token text. But don't re-copy text if it
+ already lives on the obstack. */
+ if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
{
- obstack_grow (current_input, TOKEN_DATA_TEXT (token),
- TOKEN_DATA_LEN (token));
- return false;
+ assert (level >= 0);
+ if (TOKEN_DATA_LEN (token) <= INPUT_INLINE_THRESHOLD)
+ {
+ obstack_grow (current_input, TOKEN_DATA_TEXT (token),
+ TOKEN_DATA_LEN (token));
+ return false;
+ }
+ }
+ else
+ {
+ /* For composite tokens, if argv is already in use, creating
+ additional references for long text segments is more
+ efficient in time. But if argv is not yet in use, and we
+ have a composite token, then the token must already contain a
+ back-reference, and memory usage is more efficient if we can
+ avoid using the current expand_macro, even if it means larger
+ copies. */
+ assert (TOKEN_DATA_TYPE (token) == TOKEN_COMP);
+ src_chain = token->u.u_c.chain;
+ while (level >= 0 && src_chain && src_chain->type == CHAIN_STR
+ && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
+ || (!inuse && src_chain->u.u_s.level == -1)))
+ {
+ obstack_grow (current_input, src_chain->u.u_s.str,
+ src_chain->u.u_s.len);
+ src_chain = src_chain->next;
+ }
+ if (!src_chain)
+ return false;
}
if (next->type == INPUT_STRING)
@@ -352,24 +389,71 @@ push_token (token_data *token, int level)
next->u.u_c.chain = next->u.u_c.end = NULL;
}
make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end);
- chain = (token_chain *) obstack_alloc (current_input, sizeof *chain);
- if (next->u.u_c.end)
- next->u.u_c.end->next = chain;
- else
- next->u.u_c.chain = chain;
- next->u.u_c.end = chain;
- chain->next = NULL;
- chain->type = CHAIN_STR;
- chain->quote_age = TOKEN_DATA_QUOTE_AGE (token);
- chain->u.u_s.str = TOKEN_DATA_TEXT (token);
- chain->u.u_s.len = TOKEN_DATA_LEN (token);
- chain->u.u_s.level = level;
- if (level >= 0)
+ if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
{
+ chain = (token_chain *) obstack_alloc (current_input, sizeof *chain);
+ if (next->u.u_c.end)
+ next->u.u_c.end->next = chain;
+ else
+ next->u.u_c.chain = chain;
+ next->u.u_c.end = chain;
+ chain->next = NULL;
+ chain->type = CHAIN_STR;
+ chain->quote_age = TOKEN_DATA_QUOTE_AGE (token);
+ chain->u.u_s.str = TOKEN_DATA_TEXT (token);
+ chain->u.u_s.len = TOKEN_DATA_LEN (token);
+ chain->u.u_s.level = level;
adjust_refcount (level, true);
- result = true;
+ inuse = true;
}
- return result;
+ while (src_chain)
+ {
+ if (level == -1)
+ {
+ /* Nothing to copy, since link already lives on obstack. */
+ assert (src_chain->type != CHAIN_STR
+ || src_chain->u.u_s.level == -1);
+ chain = src_chain;
+ }
+ else
+ {
+ /* Allow inlining the final link with subsequent text. */
+ if (!src_chain->next && src_chain->type == CHAIN_STR
+ && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
+ || (!inuse && src_chain->u.u_s.level == -1)))
+ {
+ obstack_grow (current_input, src_chain->u.u_s.str,
+ src_chain->u.u_s.len);
+ break;
+ }
+ /* We must clone each link in the chain, since next_char
+ destructively modifies the chain it is parsing. */
+ chain = (token_chain *) obstack_copy (current_input, src_chain,
+ sizeof *chain);
+ if (chain->type == CHAIN_STR && chain->u.u_s.level == -1)
+ {
+ if (chain->u.u_s.len <= INPUT_INLINE_THRESHOLD || !inuse)
+ chain->u.u_s.str = (char *) obstack_copy (current_input,
+ chain->u.u_s.str,
+ chain->u.u_s.len);
+ else
+ {
+ chain->u.u_s.level = level;
+ inuse = true;
+ }
+ }
+ }
+ if (next->u.u_c.end)
+ next->u.u_c.end->next = chain;
+ else
+ next->u.u_c.chain = chain;
+ next->u.u_c.end = chain;
+ assert (chain->type == CHAIN_STR);
+ if (chain->u.u_s.level >= 0)
+ adjust_refcount (chain->u.u_s.level, true);
+ src_chain = src_chain->next;
+ }
+ return inuse;
}
/*-------------------------------------------------------------------.
@@ -849,7 +933,20 @@ append_quote_token (struct obstack *obs, token_data *td)
{
token_chain *src_chain = isp->u.u_c.chain;
token_chain *chain;
- assert (isp->type == INPUT_CHAIN && obs && current_quote_age);
+
+ assert (isp->type == INPUT_CHAIN && obs && current_quote_age
+ && src_chain->type == CHAIN_STR && src_chain->u.u_s.level >= 0);
+ isp->u.u_c.chain = src_chain->next;
+
+ /* Speed consideration - for short enough tokens, the speed and
+ memory overhead of parsing another INPUT_CHAIN link outweighs the
+ time to inline the token text. */
+ if (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
+ {
+ obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len);
+ adjust_refcount (src_chain->u.u_s.level, false);
+ return;
+ }
if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
{
@@ -864,8 +961,7 @@ append_quote_token (struct obstack *obs, token_data *td)
else
td->u.u_c.chain = chain;
td->u.u_c.end = chain;
- td->u.u_c.end->next = NULL;
- isp->u.u_c.chain = src_chain->next;
+ chain->next = NULL;
}
/*------------------------------------------------------------------.
diff --git a/src/m4.h b/src/m4.h
index 8d51a9a8..edaea0b7 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -378,7 +378,7 @@ void make_text_link (struct obstack *, token_chain **, token_chain **);
void push_file (FILE *, const char *, bool);
void push_macro (builtin_func *);
struct obstack *push_string_init (void);
-bool push_token (token_data *, int);
+bool push_token (token_data *, int, bool);
const input_block *push_string_finish (void);
void push_wrapup (const char *);
bool pop_wrapup (void);
diff --git a/src/macro.c b/src/macro.c
index 05d9bf32..baf5353c 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -808,7 +808,8 @@ arg_type (macro_arguments *argv, unsigned int index)
return TOKEN_TEXT;
token = arg_token (argv, index);
type = TOKEN_DATA_TYPE (token);
- /* Composite tokens are currently sequences of text only. */
+ /* When accessed via the arg_* interface, composite tokens are
+ currently sequences of text only. */
if (type == TOKEN_COMP)
type = TOKEN_TEXT;
return type;
@@ -1104,23 +1105,8 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index)
return;
token = arg_token (argv, index);
// TODO handle func tokens?
- if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
- {
- if (push_token (token, expansion_level - 1))
- arg_mark (argv);
- }
- else if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
- {
- // TODO - really handle composites; for now, just flatten the
- // composite and push its text
- token_chain *chain = token->u.u_c.chain;
- while (chain)
- {
- assert (chain->type == CHAIN_STR);
- obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
- chain = chain->next;
- }
- }
+ if (push_token (token, expansion_level - 1, argv->inuse))
+ arg_mark (argv);
}
/* Push series of comma-separated arguments from ARGV, which should
@@ -1131,7 +1117,6 @@ void
push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
{
token_data *token;
- token_chain *chain;
unsigned int i = skip ? 2 : 1;
const char *sep = ",";
size_t sep_len = 1;
@@ -1171,20 +1156,7 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
else
use_sep = true;
// TODO handle func tokens?
- if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
- inuse |= push_token (token, expansion_level - 1);
- else
- {
- // TODO - handle composite text in push_token
- assert (TOKEN_DATA_TYPE (token) == TOKEN_COMP);
- chain = token->u.u_c.chain;
- while (chain)
- {
- assert (chain->type == CHAIN_STR);
- obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len);
- chain = chain->next;
- }
- }
+ inuse |= push_token (token, expansion_level - 1, inuse);
}
if (quote)
obstack_grow (obs, curr_quote.str2, curr_quote.len2);