summaryrefslogtreecommitdiff
path: root/m4
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2007-12-20 10:56:29 -0700
committerEric Blake <ebb9@byu.net>2007-12-20 14:46:08 -0700
commite38bf1ca50e3c5038fab27266ecb3ce3d1a0296f (patch)
treeb6b490db3c71a43267b5b1ff3deaaae920aa0acd /m4
parentfa34c3f77687fac1d5264955e11a7c704a89e875 (diff)
downloadm4-e38bf1ca50e3c5038fab27266ecb3ce3d1a0296f.tar.gz
Stage 9: share rather than copy single-arg refs.
* ltdl/m4/gnulib-cache.m4: Import memmem and quote modules. * m4/m4module.h (m4_arg_scratch): New prototype. * m4/m4private.h (m4__push_symbol): Add parameter. (m4_arg_scratch): Add fast accessor. (struct m4): Add expansion_level member, taken... * m4/macro.c (expansion_level): ...from here. Adjust all users. (expand_argument): Minor cleanup. (expand_macro): Track scratch space per macro call. (m4_arg_scratch): New function. (m4_make_argv_ref): Call new function. (m4_push_arg): Push reference to $0. (m4_push_args): Rework separator usage, since separators will usually be inlined. (process_macro): Allow embedded NUL. * m4/input.c (INPUT_INLINE_THRESHOLD): New define. (m4__push_symbol): Add parameter. Inline short strings, and save references through rescanning. * m4/symtab.c (m4_set_symbol_value_text): Weaken assertion. * modules/m4.c (errprint, index): Handle NUL transparently. (dumpdef, translit): Use scratch space, rather than expansion stack. * modules/gnu.c (renamesyms, m4symbols): Likewise. * tests/others.at (nul character): New test. (iso8859): Quote absolute file name, remove XFAIL. * tests/iso8859.m4: Avoid raw NUL in output. * tests/null.m4: New file. * tests/null.out: Likewise. * tests/null.err: Likewise. * Makefile.am (OTHER_FILES): Distribute new files. * .gitattributes: Treat new files as text. Signed-off-by: Eric Blake <ebb9@byu.net>
Diffstat (limited to 'm4')
-rw-r--r--m4/input.c43
-rw-r--r--m4/m4module.h1
-rw-r--r--m4/m4private.h5
-rw-r--r--m4/macro.c126
-rw-r--r--m4/symtab.c9
5 files changed, 113 insertions, 71 deletions
diff --git a/m4/input.c b/m4/input.c
index bdacfbff..26298fb8 100644
--- a/m4/input.c
+++ b/m4/input.c
@@ -27,6 +27,11 @@
/* Define this to see runtime debug info. Implied by DEBUG. */
/*#define DEBUG_INPUT */
+/* Maximum number of bytes where it is more efficient to inline the
+ reference as a string than it is to track reference bookkeeping for
+ those bytes. */
+#define INPUT_INLINE_THRESHOLD 16
+
/*
Unread input can be either files that should be read (from the
command line or by include/sinclude), strings which should be
@@ -528,17 +533,27 @@ m4_push_string_init (m4 *context)
everything consecutively onto the input stack. Must be called
between push_string_init and push_string_finish. Return true only
if LEVEL is less than SIZE_MAX and a reference was created to
- VALUE. */
+ VALUE, in which case, the lifetime of the contents of VALUE must
+ last as long as the input engine can parse references from it. */
bool
-m4__push_symbol (m4_symbol_value *value, size_t level)
+m4__push_symbol (m4 *context, m4_symbol_value *value, size_t level)
{
m4_symbol_chain *chain;
+ bool result = false;
assert (next);
/* TODO - also accept TOKEN_COMP chains. */
assert (m4_is_symbol_value_text (value));
- if (m4_get_symbol_value_len (value) == 0)
- return false;
+
+ /* Speed consideration - for short enough symbols, the speed and
+ memory overhead of parsing another INPUT_CHAIN link outweighs the
+ time to inline the symbol text. */
+ if (m4_get_symbol_value_len (value) <= INPUT_INLINE_THRESHOLD)
+ {
+ obstack_grow (current_input, m4_get_symbol_value_text (value),
+ m4_get_symbol_value_len (value));
+ return false;
+ }
if (next->funcs == &string_funcs)
{
@@ -553,22 +568,18 @@ m4__push_symbol (m4_symbol_value *value, size_t level)
next->u.u_c.chain = chain;
next->u.u_c.end = chain;
chain->next = NULL;
- if (level != SIZE_MAX)
- /* TODO - use token as-is, rather than copying data. This implies
- lengthening lifetime of $@ arguments until the rescan is
- complete, rather than the current approach of freeing them
- during expand_macro. */
- chain->str = (char *) obstack_copy (current_input,
- m4_get_symbol_value_text (value),
- m4_get_symbol_value_len (value));
- else
- chain->str = m4_get_symbol_value_text (value);
+ chain->str = m4_get_symbol_value_text (value);
chain->len = m4_get_symbol_value_len (value);
- chain->level = SIZE_MAX;
+ chain->level = level;
chain->argv = NULL;
chain->index = 0;
chain->flatten = false;
- return false; /* Only return true when data is reused, not copied. */
+ if (level < SIZE_MAX)
+ {
+ m4__adjust_refcount (context, level, true);
+ result = true;
+ }
+ return result;
}
/* Last half of m4_push_string (). If next is now NULL, a call to
diff --git a/m4/m4module.h b/m4/m4module.h
index 520bd4e6..03025af1 100644
--- a/m4/m4module.h
+++ b/m4/m4module.h
@@ -310,6 +310,7 @@ extern bool m4_arg_equal (m4_macro_args *, unsigned int,
extern bool m4_arg_empty (m4_macro_args *, unsigned int);
extern size_t m4_arg_len (m4_macro_args *, unsigned int);
extern m4_builtin_func *m4_arg_func (m4_macro_args *, unsigned int);
+extern m4_obstack *m4_arg_scratch (m4 *);
extern m4_macro_args *m4_make_argv_ref (m4 *, m4_macro_args *, const char *,
size_t, bool, bool);
extern void m4_push_arg (m4 *, m4_obstack *, m4_macro_args *,
diff --git a/m4/m4private.h b/m4/m4private.h
index f24942f0..272069e0 100644
--- a/m4/m4private.h
+++ b/m4/m4private.h
@@ -78,6 +78,7 @@ struct m4 {
m4__search_path_info *search_path; /* The list of path directories. */
m4__macro_arg_stacks *arg_stacks; /* Array of current argv refs. */
size_t stacks_count; /* Size of arg_stacks. */
+ size_t expansion_level;/* Macro call nesting level. */
};
#define M4_OPT_PREFIX_BUILTINS_BIT (1 << 0) /* -P */
@@ -450,7 +451,7 @@ typedef enum {
M4_TOKEN_MACDEF /* Macro's definition (see "defn"), M4_SYMBOL_FUNC. */
} m4__token_type;
-extern bool m4__push_symbol (m4_symbol_value *, size_t);
+extern bool m4__push_symbol (m4 *, m4_symbol_value *, size_t);
extern m4__token_type m4__next_token (m4 *, m4_symbol_value *, int *,
const char *);
extern bool m4__next_token_is_open (m4 *);
@@ -459,6 +460,8 @@ extern bool m4__next_token_is_open (m4 *);
that also have an identically named function exported in m4module.h. */
#ifdef NDEBUG
# define m4_arg_argc(A) (A)->argc
+# define m4_arg_scratch(C) \
+ ((C)->arg_stacks[(C)->expansion_level - 1].argv)
#endif /* NDEBUG */
diff --git a/m4/macro.c b/m4/macro.c
index bb0d3b08..15ec4d95 100644
--- a/m4/macro.c
+++ b/m4/macro.c
@@ -145,9 +145,6 @@ static void trace_header (m4 *, size_t);
static void trace_flush (m4 *);
-/* Current recursion level in expand_macro (). */
-static size_t expansion_level = 0;
-
/* The number of the current call of expand_macro (). */
static size_t macro_call_id = 0;
@@ -325,10 +322,9 @@ expand_argument (m4 *context, m4_obstack *obs, m4_symbol_value *argp,
except we don't issue warnings. But in the future,
we want to allow concatenation of builtins and
text. */
- if (argp->type == M4_SYMBOL_FUNC
- && obstack_object_size (obs) == 0)
- return type == M4_TOKEN_COMMA;
len = obstack_object_size (obs);
+ if (argp->type == M4_SYMBOL_FUNC && !len)
+ return type == M4_TOKEN_COMMA;
obstack_1grow (obs, '\0');
VALUE_MODULE (argp) = NULL;
m4_set_symbol_value_text (argp, obstack_finish (obs), len, age);
@@ -379,7 +375,7 @@ expand_argument (m4 *context, m4_obstack *obs, m4_symbol_value *argp,
/* The macro expansion is handled by expand_macro (). It parses the
arguments, using collect_arguments (), and builds a table of pointers to
the arguments. The arguments themselves are stored on a local obstack.
- Expand_macro () uses call_macro () to do the call of the macro.
+ Expand_macro () uses m4_macro_call () to do the call of the macro.
Expand_macro () is potentially recursive, since it calls expand_argument
(), which might call expand_token (), which might call expand_macro ().
@@ -391,6 +387,7 @@ static void
expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol)
{
void *args_base; /* Base of stack->args on entry. */
+ void *args_scratch; /* Base of scratch space for m4_macro_call. */
void *argv_base; /* Base of stack->argv on entry. */
m4_macro_args *argv; /* Arguments to the called macro. */
m4_obstack *expansion; /* Collects the macro's expansion. */
@@ -399,7 +396,7 @@ expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol)
bool trace_expansion = false; /* True if trace and debugmode(`e'). */
size_t my_call_id; /* Sequence id for this macro. */
m4_symbol_value *value; /* Original value of this macro. */
- size_t level = expansion_level; /* Expansion level of this macro. */
+ size_t level; /* Expansion level of this macro. */
m4__macro_arg_stacks *stack; /* Storage for this macro. */
/* Report errors at the location where the open parenthesis (if any)
@@ -414,6 +411,7 @@ expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol)
int loc_close_line;
/* Obstack preparation. */
+ level = context->expansion_level;
if (context->stacks_count <= level)
{
size_t count = context->stacks_count;
@@ -453,7 +451,7 @@ expand_macro (m4 *context, const char *name, size_t len, m4_symbol *symbol)
/* Prepare for macro expansion. */
VALUE_PENDING (value)++;
- if (m4_get_nesting_limit_opt (context) < ++expansion_level)
+ if (m4_get_nesting_limit_opt (context) < ++context->expansion_level)
m4_error (context, EXIT_FAILURE, 0, NULL, _("\
recursion limit of %zu exceeded, use -L<N> to change it"),
m4_get_nesting_limit_opt (context));
@@ -464,6 +462,11 @@ recursion limit of %zu exceeded, use -L<N> to change it"),
argv = collect_arguments (context, name, len, symbol, stack->args,
stack->argv);
+ /* Since collect_arguments can invalidate stack by reallocating
+ context->arg_stacks during a recursive expand_macro call, we must
+ reset it here. */
+ stack = &context->arg_stacks[level];
+ args_scratch = obstack_finish (stack->args);
/* The actual macro call. */
loc_close_file = m4_get_current_file (context);
@@ -485,7 +488,7 @@ recursion limit of %zu exceeded, use -L<N> to change it"),
m4_set_current_file (context, loc_close_file);
m4_set_current_line (context, loc_close_line);
- --expansion_level;
+ --context->expansion_level;
--VALUE_PENDING (value);
if (BIT_TEST (VALUE_FLAGS (value), VALUE_DELETED_BIT))
m4_symbol_value_delete (value);
@@ -502,6 +505,7 @@ recursion limit of %zu exceeded, use -L<N> to change it"),
{
if (argv->inuse)
{
+ obstack_free (stack->args, args_scratch);
if (debug_macro_level & PRINT_ARGCOUNT_CHANGES)
xfprintf (stderr, "m4debug: -%d- `%s' in use, level=%d, "
"refcount=%zu, argcount=%zu\n", my_call_id, argv->argv0,
@@ -622,14 +626,15 @@ static void
process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs,
int argc, m4_macro_args *argv)
{
- const char *text;
+ const char *text = m4_get_symbol_value_text (value);
+ size_t len = m4_get_symbol_value_len (value);
int i;
- for (text = m4_get_symbol_value_text (value); *text != '\0';)
+ while (len--)
{
char ch;
- if (!m4_has_syntax (M4SYNTAX, *text, M4_SYNTAX_DOLLAR))
+ if (!m4_has_syntax (M4SYNTAX, *text, M4_SYNTAX_DOLLAR) || !len)
{
obstack_1grow (obs, *text);
text++;
@@ -647,11 +652,13 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs,
if (m4_get_posixly_correct_opt (context) || !isdigit(text[1]))
{
i = *text++ - '0';
+ len--;
}
else
{
char *endp;
i = (int) strtol (text, &endp, 10);
+ len -= endp - text;
text = endp;
}
if (i < argc)
@@ -662,12 +669,14 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs,
case '#': /* number of arguments */
m4_shipout_int (obs, argc - 1);
text++;
+ len--;
break;
case '*': /* all arguments */
case '@': /* ... same, but quoted */
m4_push_args (context, obs, argv, false, *text == '@');
text++;
+ len--;
break;
default:
@@ -678,19 +687,20 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs,
}
else
{
- size_t len = 0;
+ size_t len1 = 0;
const char *endp;
char *key;
for (endp = ++text;
- *endp && m4_has_syntax (M4SYNTAX, *endp,
- (M4_SYNTAX_OTHER | M4_SYNTAX_ALPHA
- | M4_SYNTAX_NUM));
+ len1 < len && m4_has_syntax (M4SYNTAX, *endp,
+ (M4_SYNTAX_OTHER
+ | M4_SYNTAX_ALPHA
+ | M4_SYNTAX_NUM));
++endp)
{
- ++len;
+ ++len1;
}
- key = xstrndup (text, len);
+ key = xstrndup (text, len1);
if (*endp)
{
@@ -713,7 +723,8 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs,
key);
}
- text = *endp ? 1 + endp : endp;
+ len -= endp - text;
+ text = endp;
free (key);
break;
@@ -802,7 +813,7 @@ trace_header (m4 *context, size_t id)
if (m4_is_debug_bit (context, M4_DEBUG_TRACE_LINE))
trace_format (context, "%d:", m4_get_current_line (context));
}
- trace_format (context, " -%zu- ", expansion_level);
+ trace_format (context, " -%zu- ", context->expansion_level);
if (m4_is_debug_bit (context, M4_DEBUG_TRACE_CALLID))
trace_format (context, "id %zu: ", id);
}
@@ -1078,9 +1089,8 @@ m4_make_argv_ref (m4 *context, m4_macro_args *argv, const char *argv0,
m4_symbol_value *value;
m4_symbol_chain *chain;
unsigned int index = skip ? 2 : 1;
- m4_obstack *obs = context->arg_stacks[expansion_level - 1].argv;
+ m4_obstack *obs = m4_arg_scratch (context);
- assert (obstack_object_size (obs) == 0);
/* When making a reference through a reference, point to the
original if possible. */
if (argv->has_ref)
@@ -1114,7 +1124,7 @@ m4_make_argv_ref (m4 *context, m4_macro_args *argv, const char *argv0,
chain->next = NULL;
chain->str = NULL;
chain->len = 0;
- chain->level = expansion_level - 1;
+ chain->level = context->expansion_level - 1;
chain->argv = argv;
chain->index = index;
chain->flatten = flatten;
@@ -1134,18 +1144,22 @@ m4_push_arg (m4 *context, m4_obstack *obs, m4_macro_args *argv,
unsigned int index)
{
m4_symbol_value *value;
+ m4_symbol_value temp;
if (index == 0)
{
- obstack_grow (obs, argv->argv0, argv->argv0_len);
- return;
+ value = &temp;
+ m4_set_symbol_value_text (value, argv->argv0, argv->argv0_len, 0);
+ }
+ else
+ {
+ value = m4_arg_symbol (argv, index);
+ if (value == &empty_symbol)
+ return;
}
- value = m4_arg_symbol (argv, index);
- if (value == &empty_symbol)
- return;
/* TODO handle builtin tokens? */
assert (value->type == M4_SYMBOL_TEXT);
- if (m4__push_symbol (value, expansion_level - 1))
+ if (m4__push_symbol (context, value, context->expansion_level - 1))
arg_mark (argv);
}
@@ -1158,47 +1172,50 @@ m4_push_args (m4 *context, m4_obstack *obs, m4_macro_args *argv, bool skip,
bool quote)
{
m4_symbol_value *value;
- m4_symbol_value sep;
unsigned int i = skip ? 2 : 1;
+ const char *sep = ",";
+ size_t sep_len = 1;
bool use_sep = false;
bool inuse = false;
const char *lquote = m4_get_syntax_lquote (M4SYNTAX);
const char *rquote = m4_get_syntax_rquote (M4SYNTAX);
+ m4_obstack *scratch = m4_arg_scratch (context);
if (argv->argc <= i)
return;
+ if (argv->argc == i + 1)
+ {
+ if (quote)
+ obstack_grow (obs, lquote, strlen (lquote));
+ m4_push_arg (context, obs, argv, i);
+ if (quote)
+ obstack_grow (obs, rquote, strlen (rquote));
+ return;
+ }
+
+ /* Compute the separator in the scratch space. */
if (quote)
{
- const char *str;
- size_t len;
obstack_grow (obs, lquote, strlen (lquote));
- len = obstack_object_size (obs);
- obstack_1grow (obs, '\0');
- str = (char *) obstack_finish (obs);
- m4_set_symbol_value_text (&sep, str, len, 0);
- m4__push_symbol (&sep, SIZE_MAX);
- obstack_grow (obs, rquote, strlen (rquote));
- obstack_1grow (obs, ',');
- obstack_grow0 (obs, lquote, strlen (lquote));
- str = (char *) obstack_finish (obs);
- m4_set_symbol_value_text (&sep, str,
- strlen (rquote) + 1 + strlen (lquote), 0);
+ obstack_grow (scratch, rquote, strlen (rquote));
+ obstack_1grow (scratch, ',');
+ obstack_grow0 (scratch, lquote, strlen (lquote));
+ sep = (char *) obstack_finish (scratch);
+ sep_len += strlen (lquote) + strlen (rquote);
}
- else
- m4_set_symbol_value_text (&sep, ",", 1, 0);
/* TODO push entire $@ ref, rather than each arg. */
for ( ; i < argv->argc; i++)
{
value = m4_arg_symbol (argv, i);
if (use_sep)
- m4__push_symbol (&sep, SIZE_MAX);
+ obstack_grow (obs, sep, sep_len);
else
use_sep = true;
/* TODO handle builtin tokens? */
assert (value->type == M4_SYMBOL_TEXT);
- inuse |= m4__push_symbol (value, expansion_level - 1);
+ inuse |= m4__push_symbol (context, value, context->expansion_level - 1);
}
if (quote)
obstack_grow (obs, rquote, strlen (rquote));
@@ -1218,3 +1235,16 @@ m4_arg_argc (m4_macro_args *argv)
{
return argv->argc;
}
+
+/* Return an obstack useful for scratch calculations, and which will
+ not interfere with macro expansion. The obstack will be reset when
+ expand_macro completes. */
+#undef m4_arg_scratch
+m4_obstack *
+m4_arg_scratch (m4 *context)
+{
+ m4__macro_arg_stacks *stack
+ = &context->arg_stacks[context->expansion_level - 1];
+ assert (obstack_object_size (stack->args) == 0);
+ return stack->args;
+}
diff --git a/m4/symtab.c b/m4/symtab.c
index 95ed36ed..30a61eda 100644
--- a/m4/symtab.c
+++ b/m4/symtab.c
@@ -701,12 +701,9 @@ m4_set_symbol_value_text (m4_symbol_value *value, const char *text, size_t len,
unsigned int quote_age)
{
assert (value && text);
- /* TODO - this assertion enforces NUL-terminated text with no
- intermediate NULs. Do we want to optimize memory usage and use
- purely length-based manipulation, for one less byte per string?
- Perhaps only without NDEBUG? Also, do we want to support
- embedded NUL? */
- assert (strlen (text) == len);
+ /* In practice, it is easier to debug when we guarantee a
+ terminating NUL, even when there are embedded NULs. */
+ assert (!text[len]);
value->type = M4_SYMBOL_TEXT;
value->u.u_t.text = text;