diff options
author | Eric Blake <ebb9@byu.net> | 2007-12-20 10:56:29 -0700 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2007-12-20 14:46:08 -0700 |
commit | e38bf1ca50e3c5038fab27266ecb3ce3d1a0296f (patch) | |
tree | b6b490db3c71a43267b5b1ff3deaaae920aa0acd /m4 | |
parent | fa34c3f77687fac1d5264955e11a7c704a89e875 (diff) | |
download | m4-e38bf1ca50e3c5038fab27266ecb3ce3d1a0296f.tar.gz |
Stage 9: share rather than copy single-arg refs.
* ltdl/m4/gnulib-cache.m4: Import memmem and quote modules.
* m4/m4module.h (m4_arg_scratch): New prototype.
* m4/m4private.h (m4__push_symbol): Add parameter.
(m4_arg_scratch): Add fast accessor.
(struct m4): Add expansion_level member, taken...
* m4/macro.c (expansion_level): ...from here. Adjust all users.
(expand_argument): Minor cleanup.
(expand_macro): Track scratch space per macro call.
(m4_arg_scratch): New function.
(m4_make_argv_ref): Call new function.
(m4_push_arg): Push reference to $0.
(m4_push_args): Rework separator usage, since separators will
usually be inlined.
(process_macro): Allow embedded NUL.
* m4/input.c (INPUT_INLINE_THRESHOLD): New define.
(m4__push_symbol): Add parameter. Inline short strings, and save
references through rescanning.
* m4/symtab.c (m4_set_symbol_value_text): Weaken assertion.
* modules/m4.c (errprint, index): Handle NUL transparently.
(dumpdef, translit): Use scratch space, rather than expansion
stack.
* modules/gnu.c (renamesyms, m4symbols): Likewise.
* tests/others.at (nul character): New test.
(iso8859): Quote absolute file name, remove XFAIL.
* tests/iso8859.m4: Avoid raw NUL in output.
* tests/null.m4: New file.
* tests/null.out: Likewise.
* tests/null.err: Likewise.
* Makefile.am (OTHER_FILES): Distribute new files.
* .gitattributes: Treat new files as text.
Signed-off-by: Eric Blake <ebb9@byu.net>
Diffstat (limited to 'm4')
-rw-r--r-- | m4/input.c | 43 | ||||
-rw-r--r-- | m4/m4module.h | 1 | ||||
-rw-r--r-- | m4/m4private.h | 5 | ||||
-rw-r--r-- | m4/macro.c | 126 | ||||
-rw-r--r-- | m4/symtab.c | 9 |
5 files changed, 113 insertions, 71 deletions
@@ -27,6 +27,11 @@ /* Define this to see runtime debug info. Implied by DEBUG. */ /*#define DEBUG_INPUT */ +/* Maximum number of bytes where it is more efficient to inline the + reference as a string than it is to track reference bookkeeping for + those bytes. */ +#define INPUT_INLINE_THRESHOLD 16 + /* Unread input can be either files that should be read (from the command line or by include/sinclude), strings which should be @@ -528,17 +533,27 @@ m4_push_string_init (m4 *context) everything consecutively onto the input stack. Must be called between push_string_init and push_string_finish. Return true only if LEVEL is less than SIZE_MAX and a reference was created to - VALUE. */ + VALUE, in which case, the lifetime of the contents of VALUE must + last as long as the input engine can parse references from it. */ bool -m4__push_symbol (m4_symbol_value *value, size_t level) +m4__push_symbol (m4 *context, m4_symbol_value *value, size_t level) { m4_symbol_chain *chain; + bool result = false; assert (next); /* TODO - also accept TOKEN_COMP chains. */ assert (m4_is_symbol_value_text (value)); - if (m4_get_symbol_value_len (value) == 0) - return false; + + /* Speed consideration - for short enough symbols, the speed and + memory overhead of parsing another INPUT_CHAIN link outweighs the + time to inline the symbol text. */ + if (m4_get_symbol_value_len (value) <= INPUT_INLINE_THRESHOLD) + { + obstack_grow (current_input, m4_get_symbol_value_text (value), + m4_get_symbol_value_len (value)); + return false; + } if (next->funcs == &string_funcs) { @@ -553,22 +568,18 @@ m4__push_symbol (m4_symbol_value *value, size_t level) next->u.u_c.chain = chain; next->u.u_c.end = chain; chain->next = NULL; - if (level != SIZE_MAX) - /* TODO - use token as-is, rather than copying data. This implies - lengthening lifetime of $@ arguments until the rescan is - complete, rather than the current approach of freeing them - during expand_macro. */ - chain->str = (char *) obstack_copy (current_input, - m4_get_symbol_value_text (value), - m4_get_symbol_value_len (value)); - else - chain->str = m4_get_symbol_value_text (value); + chain->str = m4_get_symbol_value_text (value); chain->len = m4_get_symbol_value_len (value); - chain->level = SIZE_MAX; + chain->level = level; chain->argv = NULL; chain->index = 0; chain->flatten = false; - return false; /* Only return true when data is reused, not copied. */ + if (level < SIZE_MAX) + { + m4__adjust_refcount (context, level, true); + result = true; + } + return result; } /* Last half of m4_push_string (). If next is now NULL, a call to diff --git a/m4/m4module.h b/m4/m4module.h index 520bd4e6..03025af1 100644 --- a/m4/m4module.h +++ b/m4/m4module.h @@ -310,6 +310,7 @@ extern bool m4_arg_equal (m4_macro_args *, unsigned int, extern bool m4_arg_empty (m4_macro_args *, unsigned int); extern size_t m4_arg_len (m4_macro_args *, unsigned int); extern m4_builtin_func *m4_arg_func (m4_macro_args *, unsigned int); +extern m4_obstack *m4_arg_scratch (m4 *); extern m4_macro_args *m4_make_argv_ref (m4 *, m4_macro_args *, const char *, size_t, bool, bool); extern void m4_push_arg (m4 *, m4_obstack *, m4_macro_args *, diff --git a/m4/m4private.h b/m4/m4private.h index f24942f0..272069e0 100644 --- a/m4/m4private.h +++ b/m4/m4private.h @@ -78,6 +78,7 @@ struct m4 { m4__search_path_info *search_path; /* The list of path directories. */ m4__macro_arg_stacks *arg_stacks; /* Array of current argv refs. */ size_t stacks_count; /* Size of arg_stacks. */ + size_t expansion_level;/* Macro call nesting level. */ }; #define M4_OPT_PREFIX_BUILTINS_BIT (1 << 0) /* -P */ @@ -450,7 +451,7 @@ typedef enum { M4_TOKEN_MACDEF /* Macro's definition (see "defn"), M4_SYMBOL_FUNC. */ } m4__token_type; -extern bool m4__push_symbol (m4_symbol_value *, size_t); +extern bool m4__push_symbol (m4 *, m4_symbol_value *, size_t); extern m4__token_type m4__next_token (m4 *, m4_symbol_value *, int *, const char *); extern bool m4__next_token_is_open (m4 *); @@ -459,6 +460,8 @@ extern bool m4__next_token_is_open (m4 *); that also have an identically named function exported in m4module.h. */ #ifdef NDEBUG # define m4_arg_argc(A) (A)->argc +# define m4_arg_scratch(C) \ + ((C)->arg_stacks[(C)->expansion_level - 1].argv) #endif /* NDEBUG */ @@ -145,9 +145,6 @@ static void trace_header (m4 *, size_t); static void trace_flush (m4 *); -/* Current recursion level in expand_macro (). */ -static size_t expansion_level = 0; - /* The number of the current call of expand_macro (). */ static size_t macro_call_id = 0; @@ -325,10 +322,9 @@ expand_argument (m4 *context, m4_obstack *obs, m4_symbol_value *argp, except we don't issue warnings. But in the future, we want to allow concatenation of builtins and text. */ - if (argp->type == M4_SYMBOL_FUNC - && obstack_object_size (obs) == 0) - return type == M4_TOKEN_COMMA; len = obstack_object_size (obs); + if (argp->type == M4_SYMBOL_FUNC && !len) + return type == M4_TOKEN_COMMA; obstack_1grow (obs, '\0'); VALUE_MODULE (argp) = NULL; m4_set_symbol_value_text (argp, obstack_finish (obs), len, age); @@ -379,7 +375,7 @@ expand_argument (m4 *context, m4_obstack *obs, m4_symbol_value *argp, /* The macro expansion is handled by expand_macro (). It parses the arguments, using collect_arguments (), and builds a table of pointers to the arguments. The arguments themselves are stored on a local obstack. - Expand_macro () uses call_macro () to do the call of the macro. + Expand_macro () uses m4_macro_call () to do the call of the macro. Expand_macro () is potentially recursive, since it calls expand_argument (), which might call expand_token (), which might call expand_macro (). @@ -391,6 +387,7 @@ static void expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol) { void *args_base; /* Base of stack->args on entry. */ + void *args_scratch; /* Base of scratch space for m4_macro_call. */ void *argv_base; /* Base of stack->argv on entry. */ m4_macro_args *argv; /* Arguments to the called macro. */ m4_obstack *expansion; /* Collects the macro's expansion. */ @@ -399,7 +396,7 @@ expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol) bool trace_expansion = false; /* True if trace and debugmode(`e'). */ size_t my_call_id; /* Sequence id for this macro. */ m4_symbol_value *value; /* Original value of this macro. */ - size_t level = expansion_level; /* Expansion level of this macro. */ + size_t level; /* Expansion level of this macro. */ m4__macro_arg_stacks *stack; /* Storage for this macro. */ /* Report errors at the location where the open parenthesis (if any) @@ -414,6 +411,7 @@ expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol) int loc_close_line; /* Obstack preparation. */ + level = context->expansion_level; if (context->stacks_count <= level) { size_t count = context->stacks_count; @@ -453,7 +451,7 @@ expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol) /* Prepare for macro expansion. */ VALUE_PENDING (value)++; - if (m4_get_nesting_limit_opt (context) < ++expansion_level) + if (m4_get_nesting_limit_opt (context) < ++context->expansion_level) m4_error (context, EXIT_FAILURE, 0, NULL, _("\ recursion limit of %zu exceeded, use -L<N> to change it"), m4_get_nesting_limit_opt (context)); @@ -464,6 +462,11 @@ recursion limit of %zu exceeded, use -L<N> to change it"), argv = collect_arguments (context, name, len, symbol, stack->args, stack->argv); + /* Since collect_arguments can invalidate stack by reallocating + context->arg_stacks during a recursive expand_macro call, we must + reset it here. */ + stack = &context->arg_stacks[level]; + args_scratch = obstack_finish (stack->args); /* The actual macro call. */ loc_close_file = m4_get_current_file (context); @@ -485,7 +488,7 @@ recursion limit of %zu exceeded, use -L<N> to change it"), m4_set_current_file (context, loc_close_file); m4_set_current_line (context, loc_close_line); - --expansion_level; + --context->expansion_level; --VALUE_PENDING (value); if (BIT_TEST (VALUE_FLAGS (value), VALUE_DELETED_BIT)) m4_symbol_value_delete (value); @@ -502,6 +505,7 @@ recursion limit of %zu exceeded, use -L<N> to change it"), { if (argv->inuse) { + obstack_free (stack->args, args_scratch); if (debug_macro_level & PRINT_ARGCOUNT_CHANGES) xfprintf (stderr, "m4debug: -%d- `%s' in use, level=%d, " "refcount=%zu, argcount=%zu\n", my_call_id, argv->argv0, @@ -622,14 +626,15 @@ static void process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs, int argc, m4_macro_args *argv) { - const char *text; + const char *text = m4_get_symbol_value_text (value); + size_t len = m4_get_symbol_value_len (value); int i; - for (text = m4_get_symbol_value_text (value); *text != '\0';) + while (len--) { char ch; - if (!m4_has_syntax (M4SYNTAX, *text, M4_SYNTAX_DOLLAR)) + if (!m4_has_syntax (M4SYNTAX, *text, M4_SYNTAX_DOLLAR) || !len) { obstack_1grow (obs, *text); text++; @@ -647,11 +652,13 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs, if (m4_get_posixly_correct_opt (context) || !isdigit(text[1])) { i = *text++ - '0'; + len--; } else { char *endp; i = (int) strtol (text, &endp, 10); + len -= endp - text; text = endp; } if (i < argc) @@ -662,12 +669,14 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs, case '#': /* number of arguments */ m4_shipout_int (obs, argc - 1); text++; + len--; break; case '*': /* all arguments */ case '@': /* ... same, but quoted */ m4_push_args (context, obs, argv, false, *text == '@'); text++; + len--; break; default: @@ -678,19 +687,20 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs, } else { - size_t len = 0; + size_t len1 = 0; const char *endp; char *key; for (endp = ++text; - *endp && m4_has_syntax (M4SYNTAX, *endp, - (M4_SYNTAX_OTHER | M4_SYNTAX_ALPHA - | M4_SYNTAX_NUM)); + len1 < len && m4_has_syntax (M4SYNTAX, *endp, + (M4_SYNTAX_OTHER + | M4_SYNTAX_ALPHA + | M4_SYNTAX_NUM)); ++endp) { - ++len; + ++len1; } - key = xstrndup (text, len); + key = xstrndup (text, len1); if (*endp) { @@ -713,7 +723,8 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs, key); } - text = *endp ? 1 + endp : endp; + len -= endp - text; + text = endp; free (key); break; @@ -802,7 +813,7 @@ trace_header (m4 *context, size_t id) if (m4_is_debug_bit (context, M4_DEBUG_TRACE_LINE)) trace_format (context, "%d:", m4_get_current_line (context)); } - trace_format (context, " -%zu- ", expansion_level); + trace_format (context, " -%zu- ", context->expansion_level); if (m4_is_debug_bit (context, M4_DEBUG_TRACE_CALLID)) trace_format (context, "id %zu: ", id); } @@ -1078,9 +1089,8 @@ m4_make_argv_ref (m4 *context, m4_macro_args *argv, const char *argv0, m4_symbol_value *value; m4_symbol_chain *chain; unsigned int index = skip ? 2 : 1; - m4_obstack *obs = context->arg_stacks[expansion_level - 1].argv; + m4_obstack *obs = m4_arg_scratch (context); - assert (obstack_object_size (obs) == 0); /* When making a reference through a reference, point to the original if possible. */ if (argv->has_ref) @@ -1114,7 +1124,7 @@ m4_make_argv_ref (m4 *context, m4_macro_args *argv, const char *argv0, chain->next = NULL; chain->str = NULL; chain->len = 0; - chain->level = expansion_level - 1; + chain->level = context->expansion_level - 1; chain->argv = argv; chain->index = index; chain->flatten = flatten; @@ -1134,18 +1144,22 @@ m4_push_arg (m4 *context, m4_obstack *obs, m4_macro_args *argv, unsigned int index) { m4_symbol_value *value; + m4_symbol_value temp; if (index == 0) { - obstack_grow (obs, argv->argv0, argv->argv0_len); - return; + value = &temp; + m4_set_symbol_value_text (value, argv->argv0, argv->argv0_len, 0); + } + else + { + value = m4_arg_symbol (argv, index); + if (value == &empty_symbol) + return; } - value = m4_arg_symbol (argv, index); - if (value == &empty_symbol) - return; /* TODO handle builtin tokens? */ assert (value->type == M4_SYMBOL_TEXT); - if (m4__push_symbol (value, expansion_level - 1)) + if (m4__push_symbol (context, value, context->expansion_level - 1)) arg_mark (argv); } @@ -1158,47 +1172,50 @@ m4_push_args (m4 *context, m4_obstack *obs, m4_macro_args *argv, bool skip, bool quote) { m4_symbol_value *value; - m4_symbol_value sep; unsigned int i = skip ? 2 : 1; + const char *sep = ","; + size_t sep_len = 1; bool use_sep = false; bool inuse = false; const char *lquote = m4_get_syntax_lquote (M4SYNTAX); const char *rquote = m4_get_syntax_rquote (M4SYNTAX); + m4_obstack *scratch = m4_arg_scratch (context); if (argv->argc <= i) return; + if (argv->argc == i + 1) + { + if (quote) + obstack_grow (obs, lquote, strlen (lquote)); + m4_push_arg (context, obs, argv, i); + if (quote) + obstack_grow (obs, rquote, strlen (rquote)); + return; + } + + /* Compute the separator in the scratch space. */ if (quote) { - const char *str; - size_t len; obstack_grow (obs, lquote, strlen (lquote)); - len = obstack_object_size (obs); - obstack_1grow (obs, '\0'); - str = (char *) obstack_finish (obs); - m4_set_symbol_value_text (&sep, str, len, 0); - m4__push_symbol (&sep, SIZE_MAX); - obstack_grow (obs, rquote, strlen (rquote)); - obstack_1grow (obs, ','); - obstack_grow0 (obs, lquote, strlen (lquote)); - str = (char *) obstack_finish (obs); - m4_set_symbol_value_text (&sep, str, - strlen (rquote) + 1 + strlen (lquote), 0); + obstack_grow (scratch, rquote, strlen (rquote)); + obstack_1grow (scratch, ','); + obstack_grow0 (scratch, lquote, strlen (lquote)); + sep = (char *) obstack_finish (scratch); + sep_len += strlen (lquote) + strlen (rquote); } - else - m4_set_symbol_value_text (&sep, ",", 1, 0); /* TODO push entire $@ ref, rather than each arg. */ for ( ; i < argv->argc; i++) { value = m4_arg_symbol (argv, i); if (use_sep) - m4__push_symbol (&sep, SIZE_MAX); + obstack_grow (obs, sep, sep_len); else use_sep = true; /* TODO handle builtin tokens? */ assert (value->type == M4_SYMBOL_TEXT); - inuse |= m4__push_symbol (value, expansion_level - 1); + inuse |= m4__push_symbol (context, value, context->expansion_level - 1); } if (quote) obstack_grow (obs, rquote, strlen (rquote)); @@ -1218,3 +1235,16 @@ m4_arg_argc (m4_macro_args *argv) { return argv->argc; } + +/* Return an obstack useful for scratch calculations, and which will + not interfere with macro expansion. The obstack will be reset when + expand_macro completes. */ +#undef m4_arg_scratch +m4_obstack * +m4_arg_scratch (m4 *context) +{ + m4__macro_arg_stacks *stack + = &context->arg_stacks[context->expansion_level - 1]; + assert (obstack_object_size (stack->args) == 0); + return stack->args; +} diff --git a/m4/symtab.c b/m4/symtab.c index 95ed36ed..30a61eda 100644 --- a/m4/symtab.c +++ b/m4/symtab.c @@ -701,12 +701,9 @@ m4_set_symbol_value_text (m4_symbol_value *value, const char *text, size_t len, unsigned int quote_age) { assert (value && text); - /* TODO - this assertion enforces NUL-terminated text with no - intermediate NULs. Do we want to optimize memory usage and use - purely length-based manipulation, for one less byte per string? - Perhaps only without NDEBUG? Also, do we want to support - embedded NUL? */ - assert (strlen (text) == len); + /* In practice, it is easier to debug when we guarantee a + terminating NUL, even when there are embedded NULs. */ + assert (!text[len]); value->type = M4_SYMBOL_TEXT; value->u.u_t.text = text; |