summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2008-07-14 12:21:07 -0600
committerEric Blake <ebb9@byu.net>2009-04-17 07:05:15 -0600
commita404a0e157c3cccff9e2cc49df29f5e192e1af66 (patch)
treeb0ff8b8997ef024aba8f945563ddc52b501accac
parent48a3bd03d4a03ccb6af87b5496b8d7c4698135ff (diff)
downloadm4-a404a0e157c3cccff9e2cc49df29f5e192e1af66.tar.gz
Stage31: speed up defn by tracking quote_age
-rw-r--r--examples/Makefile.am3
-rw-r--r--examples/append.m418
-rw-r--r--src/builtin.c35
-rw-r--r--src/freeze.c2
-rw-r--r--src/input.c43
-rw-r--r--src/m4.c2
-rw-r--r--src/m4.h15
-rw-r--r--src/macro.c13
8 files changed, 105 insertions, 26 deletions
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 254d2ab9..62323971 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -1,6 +1,6 @@
## Makefile.am - template for generating Makefile via Automake
##
-## Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
+## Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
##
## This file is part of GNU M4.
##
@@ -20,6 +20,7 @@
## This file written by Eric Blake <ebb9@byu.net>
EXTRA_DIST =\
+append.m4 \
capitalize.m4 \
capitalize2.m4 \
comments.m4 \
diff --git a/examples/append.m4 b/examples/append.m4
new file mode 100644
index 00000000..be6ed3d4
--- /dev/null
+++ b/examples/append.m4
@@ -0,0 +1,18 @@
+dnl Stress test for appending algorithm. Usage:
+dnl m4 -Ipath/to/examples [-Doptions] append.m4
+dnl Options include:
+dnl -Dlimit=<num> - set upper limit of sequence to <num>, default 1000
+dnl -Dverbose - print progress to the screen, rather than discarding
+dnl -Dnext=<code> - append <code> each iteration, default to the current count
+dnl -Ddebug[=<code>] - execute <code> after loop
+dnl -Dsleep=<num> - sleep for <num> seconds before exit, to allow time
+dnl to examine peak process memory usage
+include(`forloop2.m4')dnl
+ifdef(`limit', `', `define(`limit', `1000')')dnl
+ifdef(`verbose', `', `divert(`-1')')dnl
+ifdef(`next', `', `define(`next', `i')')dnl
+ifdef(`debug', `', `define(`debug')')dnl
+define(`var')define(`append', `define(`var', defn(`var')`$1')')dnl
+forloop(`i', `1', limit, `i
+append(next)')debug
+ifdef(`sleep',`syscmd(`echo done>/dev/tty;sleep 'sleep)')dnl
diff --git a/src/builtin.c b/src/builtin.c
index 003aa7df..4f965a6f 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -441,17 +441,19 @@ free_regex (void)
}
}
-/*------------------------------------------------------------------.
-| Define a predefined or user-defined macro, with name NAME of |
-| length NAME_LEN, and expansion TEXT of length LEN. LEN may be |
-| SIZE_MAX, to use the string length of TEXT instead. MODE is |
-| SYMBOL_INSERT for "define" or SYMBOL_PUSHDEF for "pushdef". This |
-| function is also used from main (). |
-`------------------------------------------------------------------*/
+/*-----------------------------------------------------------------.
+| Define a predefined or user-defined macro, with name NAME of |
+| length NAME_LEN, and expansion TEXT of length LEN. LEN may be |
+| SIZE_MAX, to use the string length of TEXT instead. QUOTE_AGE |
+| describes the quoting used by TEXT, or zero to force rescanning |
+| when defn is later used to retrieve TEXT. MODE is SYMBOL_INSERT |
+| for "define" or SYMBOL_PUSHDEF for "pushdef". This function is |
+| also used from main (). |
+`-----------------------------------------------------------------*/
void
define_user_macro (const char *name, size_t name_len, const char *text,
- size_t len, symbol_lookup mode)
+ size_t len, symbol_lookup mode, unsigned int quote_age)
{
symbol *s;
char *defn;
@@ -467,6 +469,7 @@ define_user_macro (const char *name, size_t name_len, const char *text,
SYMBOL_TYPE (s) = TOKEN_TEXT;
SYMBOL_TEXT (s) = defn;
SYMBOL_TEXT_LEN (s) = len;
+ SYMBOL_TEXT_QUOTE_AGE (s) = quote_age;
SYMBOL_MACRO_ARGS (s) = true;
/* Implement --warn-macro-sequence. */
@@ -532,13 +535,13 @@ builtin_init (void)
{
if (pp->unix_name != NULL)
define_user_macro (pp->unix_name, strlen (pp->unix_name),
- pp->func, SIZE_MAX, SYMBOL_INSERT);
+ pp->func, SIZE_MAX, SYMBOL_INSERT, 0);
}
else
{
if (pp->gnu_name != NULL)
define_user_macro (pp->gnu_name, strlen (pp->gnu_name),
- pp->func, SIZE_MAX, SYMBOL_INSERT);
+ pp->func, SIZE_MAX, SYMBOL_INSERT, 0);
}
}
@@ -693,7 +696,7 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode)
if (argc == 2)
{
- define_user_macro (ARG (1), ARG_LEN (1), "", 0, mode);
+ define_user_macro (ARG (1), ARG_LEN (1), "", 0, mode, 0);
return;
}
@@ -704,7 +707,7 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode)
/* fallthru */
case TOKEN_TEXT:
define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true),
- arg_len (argv, 2, true), mode);
+ arg_len (argv, 2, true), mode, arg_quote_age (argv));
break;
case TOKEN_FUNC:
@@ -1091,7 +1094,7 @@ m4_defn (struct obstack *obs, int argc, macro_arguments *argv)
{
case TOKEN_TEXT:
obstack_grow (obs, curr_quote.str1, curr_quote.len1);
- obstack_grow (obs, SYMBOL_TEXT (s), SYMBOL_TEXT_LEN (s));
+ push_defn (s);
obstack_grow (obs, curr_quote.str2, curr_quote.len2);
break;
@@ -2459,6 +2462,12 @@ expand_user_macro (struct obstack *obs, symbol *sym,
int i;
const char *dollar = memchr (text, '$', len);
+ if (!dollar)
+ {
+ push_defn (sym);
+ return;
+ }
+
while (dollar)
{
obstack_grow (obs, text, dollar - text);
diff --git a/src/freeze.c b/src/freeze.c
index 75e112b6..7dde2c00 100644
--- a/src/freeze.c
+++ b/src/freeze.c
@@ -359,7 +359,7 @@ ill-formed frozen file, invalid builtin %s encountered"),
/* Enter a macro having an expansion text as a definition. */
define_user_macro (string[0], number[0], string[1], number[1],
- SYMBOL_PUSHDEF);
+ SYMBOL_PUSHDEF, 0);
break;
case 'Q':
diff --git a/src/input.c b/src/input.c
index ee96f645..aaad0d7f 100644
--- a/src/input.c
+++ b/src/input.c
@@ -353,6 +353,39 @@ push_string_init (const char *file, int line)
return current_input;
}
+/*-------------------------------------------------------------.
+| Push the text macro definition in SYM onto the input stack. |
+`-------------------------------------------------------------*/
+void
+push_defn (symbol *sym)
+{
+ size_t len = SYMBOL_TEXT_LEN (sym);
+
+ assert (next && SYMBOL_TYPE (sym) == TOKEN_TEXT);
+
+ /* Speed consideration - for short enough tokens, the speed and
+ memory overhead of parsing another INPUT_CHAIN link outweighs the
+ time to inline the token text. */
+ if (len <= INPUT_INLINE_THRESHOLD)
+ {
+ obstack_grow (current_input, SYMBOL_TEXT (sym), len);
+ return;
+ }
+
+ if (next->type == INPUT_STRING)
+ {
+ next->type = INPUT_CHAIN;
+ next->u.u_c.chain = next->u.u_c.end = NULL;
+ }
+ make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end);
+
+ /* TODO - optimize this to increment the symbol's reference counter,
+ then decrement it again upon rescan, rather than copying. */
+ obstack_grow (current_input, SYMBOL_TEXT (sym), len);
+ make_text_link (current_input, &next->u.u_c.chain, &next->u.u_c.end);
+ next->u.u_c.end->quote_age = SYMBOL_TEXT_QUOTE_AGE (sym);
+}
+
/*--------------------------------------------------------------------.
| This function allows gathering input from multiple locations, |
| rather than copying everything consecutively onto the input stack. |
@@ -1469,13 +1502,15 @@ append_quote_token (struct obstack *obs, token_data *td)
/* Speed consideration - for short enough tokens, the speed and
memory overhead of parsing another INPUT_CHAIN link outweighs the
- time to inline the token text. */
+ time to inline the token text. Also, if the quoted string does
+ not live in a back-reference, it must be copied. */
if (src_chain->type == CHAIN_STR
- && src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD)
+ && (src_chain->u.u_s.len <= INPUT_INLINE_THRESHOLD
+ || src_chain->u.u_s.level < 0))
{
- assert (src_chain->u.u_s.level >= 0);
obstack_grow (obs, src_chain->u.u_s.str, src_chain->u.u_s.len);
- adjust_refcount (src_chain->u.u_s.level, false);
+ if (src_chain->u.u_s.level >= 0)
+ adjust_refcount (src_chain->u.u_s.level, false);
return;
}
diff --git a/src/m4.c b/src/m4.c
index 26faef1f..ec264ce3 100644
--- a/src/m4.c
+++ b/src/m4.c
@@ -623,7 +623,7 @@ main (int argc, char *const *argv, char *const *envp)
const char *value = strchr (defines->arg, '=');
size_t len = value ? value - defines->arg : strlen (defines->arg);
define_user_macro (defines->arg, len, value ? value + 1 : "",
- value ? SIZE_MAX : 0, SYMBOL_INSERT);
+ value ? SIZE_MAX : 0, SYMBOL_INSERT, 0);
}
break;
diff --git a/src/m4.h b/src/m4.h
index fa9eb6d8..d2e20ff9 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -98,6 +98,7 @@ typedef struct string_pair string_pair;
/* These must come first. */
typedef struct token_data token_data;
typedef struct macro_arguments macro_arguments;
+typedef struct symbol symbol;
typedef void builtin_func (struct obstack *, int, macro_arguments *);
/* Gnulib's stdbool doesn't work with bool bitfields. For nicer
@@ -358,6 +359,7 @@ void append_macro (struct obstack *, builtin_func *, token_chain **,
token_chain **);
void push_macro (struct obstack *, builtin_func *);
struct obstack *push_string_init (const char *, int);
+void push_defn (symbol *);
bool push_token (token_data *, int, bool);
void push_quote_wrapper (void);
void push_string_finish (void);
@@ -441,12 +443,16 @@ struct symbol
#define SYMBOL_NAME(S) ((S)->name)
#define SYMBOL_NAME_LEN(S) ((S)->len)
#define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data))
-#define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
-#define SYMBOL_TEXT_LEN(S) (TOKEN_DATA_LEN (&(S)->data))
+
+/* Only safe when SYMBOL_TYPE(S) == TOKEN_TEXT: */
+#define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
+#define SYMBOL_TEXT_LEN(S) (TOKEN_DATA_LEN (&(S)->data))
+#define SYMBOL_TEXT_QUOTE_AGE(S) (TOKEN_DATA_QUOTE_AGE (&(S)->data))
+
+/* Only safe when SYMBOL_TYPE(S) == TOKEN_FUNC: */
#define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data))
typedef enum symbol_lookup symbol_lookup;
-typedef struct symbol symbol;
typedef void hack_symbol (symbol *, void *);
#define HASHMAX 509 /* default, overridden by -Hsize */
@@ -469,6 +475,7 @@ size_t adjust_refcount (int, bool);
bool arg_adjust_refcount (macro_arguments *, bool);
unsigned int arg_argc (macro_arguments *);
const call_info *arg_info (macro_arguments *);
+unsigned int arg_quote_age (macro_arguments *);
token_data_type arg_type (macro_arguments *, unsigned int);
const char *arg_text (macro_arguments *, unsigned int, bool);
bool arg_equal (macro_arguments *, unsigned int, unsigned int);
@@ -530,7 +537,7 @@ void define_builtin (const char *, size_t, const builtin *, symbol_lookup);
void set_macro_sequence (const char *);
void free_regex (void);
void define_user_macro (const char *, size_t, const char *, size_t,
- symbol_lookup);
+ symbol_lookup, unsigned int);
void undivert_all (void);
void expand_user_macro (struct obstack *, symbol *, int, macro_arguments *);
void m4_placeholder (struct obstack *, int, macro_arguments *);
diff --git a/src/macro.c b/src/macro.c
index e3fa1095..f565f5e5 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -1,7 +1,7 @@
/* GNU m4 -- A simple macro processor
- Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008 Free
- Software Foundation, Inc.
+ Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008,
+ 2009 Free Software Foundation, Inc.
This file is part of GNU M4.
@@ -869,6 +869,15 @@ arg_info (macro_arguments *argv)
return argv->info;
}
+/* Given ARGV, return the quote age in effect when argument collection
+ completed, or zero if all arguments do not have the same quote
+ age. */
+unsigned int
+arg_quote_age (macro_arguments *argv)
+{
+ return argv->quote_age;
+}
+
/* Given ARGV, return the type of argument ARG. Arg 0 is always text,
and indices beyond argc are likewise treated as text. */
token_data_type