diff options
author | Eric Blake <ebb9@byu.net> | 2007-10-27 08:03:03 -0600 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-01-25 22:15:06 -0700 |
commit | 6ef07af4d24ef59c2a6939ef9b7265cb4d01e19c (patch) | |
tree | b223e556c2c98848f08ba6d89450a3bdcb111dfc | |
parent | c2c0a7ddc9f559d66a17184ea8be2c363dd4807c (diff) | |
download | m4-6ef07af4d24ef59c2a6939ef9b7265cb4d01e19c.tar.gz |
Stage 12: make token_chain a union, add string_pair.
* src/m4.h (STRING): Delete typedef.
(struct string_pair, enum token_chain_type): New types.
(struct token_chain): Reduce size via a union.
(ARG_LEN): New macro.
(ARG): Move here...
* src/builtin.c (ARG): ...from here.
(dump_args, define_macro, m4_dumpdef, m4_builtin, m4_indir)
(m4_defn, mkstemp_helper, m4_maketemp, m4_mkstemp, m4___file__)
(m4___program__, m4_m4wrap, m4_len, m4_index, m4_substr)
(m4_regexp, m4_patsubst): Adjust callers.
* src/input.c (rquote, lquote, bcomm, ecomm): Delete...
(curr_quote, curr_comm): ...replaced by these.
(make_text_link, push_token, pop_input, input_print, peek_input)
(next_char_1, input_init, set_quotes, set_comment, set_quote_age)
(next_token, peek_token): Adjust callers.
* src/macro.c (expand_macro, arg_token, arg_mark, arg_text)
(arg_equal, arg_len, make_argv_ref, push_arg, push_args):
Likewise.
* src/format.c (ARG_INT, ARG_LONG, ARG_STR, ARG_DOUBLE, format):
Likewise.
* src/freeze.c (produce_frozen_state): Likewise.
* src/debug.c (trace_format, trace_pre): Likewise.
(debug_decode): Don't lose partial traces prior to reducing
debugmode.
(cherry picked from commit d8324ac481f69682f6953ba3fb0c60cf67c7e8d7)
Signed-off-by: Eric Blake <ebb9@byu.net>
-rw-r--r-- | ChangeLog | 32 | ||||
-rw-r--r-- | src/builtin.c | 62 | ||||
-rw-r--r-- | src/debug.c | 39 | ||||
-rw-r--r-- | src/format.c | 10 | ||||
-rw-r--r-- | src/freeze.c | 23 | ||||
-rw-r--r-- | src/input.c | 185 | ||||
-rw-r--r-- | src/m4.h | 60 | ||||
-rw-r--r-- | src/macro.c | 103 |
8 files changed, 292 insertions, 222 deletions
@@ -1,3 +1,35 @@ +2008-01-26 Eric Blake <ebb9@byu.net> + + Stage 12: make token_chain a union, add string_pair. + Shrink size of symbol chains by using a union. Make passing quote + delimiters around more efficient. Other code cleanups. + Memory impact: slight improvement, due to smaller struct. + Speed impact: slight penalty, due to more bookkeeping. + * src/m4.h (STRING): Delete typedef. + (struct string_pair, enum token_chain_type): New types. + (struct token_chain): Reduce size via a union. + (ARG_LEN): New macro. + (ARG): Move here... + * src/builtin.c (ARG): ...from here. + (dump_args, define_macro, m4_dumpdef, m4_builtin, m4_indir) + (m4_defn, mkstemp_helper, m4_maketemp, m4_mkstemp, m4___file__) + (m4___program__, m4_m4wrap, m4_len, m4_index, m4_substr) + (m4_regexp, m4_patsubst): Adjust callers. + * src/input.c (rquote, lquote, bcomm, ecomm): Delete... + (curr_quote, curr_comm): ...replaced by these. + (make_text_link, push_token, pop_input, input_print, peek_input) + (next_char_1, input_init, set_quotes, set_comment, set_quote_age) + (next_token, peek_token): Adjust callers. + * src/macro.c (expand_macro, arg_token, arg_mark, arg_text) + (arg_equal, arg_len, make_argv_ref, push_arg, push_args): + Likewise. + * src/format.c (ARG_INT, ARG_LONG, ARG_STR, ARG_DOUBLE, format): + Likewise. + * src/freeze.c (produce_frozen_state): Likewise. + * src/debug.c (trace_format, trace_pre): Likewise. + (debug_decode): Don't lose partial traces prior to reducing + debugmode. + 2008-01-22 Eric Blake <ebb9@byu.net> Stage 11: full circle for single argument references. diff --git a/src/builtin.c b/src/builtin.c index 007ca553..c89ad44e 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -30,10 +30,6 @@ # include <sys/wait.h> #endif -/* Grab the text at argv index I. Assumes a macro_argument *argv is - in scope. */ -#define ARG(i) arg_text (argv, i) - /* Initialization of builtin and predefined macros. The table "builtin_tab" is both used for initialization, and by the "builtin" builtin. */ @@ -623,10 +619,10 @@ dump_args (struct obstack *obs, int start, macro_arguments *argv, else dump_sep = true; if (quoted) - obstack_grow (obs, lquote.string, lquote.length); - obstack_grow (obs, ARG (i), arg_len (argv, i)); + obstack_grow (obs, curr_quote.str1, curr_quote.len1); + obstack_grow (obs, ARG (i), ARG_LEN (i)); if (quoted) - obstack_grow (obs, rquote.string, rquote.length); + obstack_grow (obs, curr_quote.str2, curr_quote.len2); } } @@ -668,14 +664,14 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode) if (argc == 2) { - define_user_macro (ARG (1), arg_len (argv, 1), "", mode); + define_user_macro (ARG (1), ARG_LEN (1), "", mode); return; } switch (arg_type (argv, 2)) { case TOKEN_TEXT: - define_user_macro (ARG (1), arg_len (argv, 1), ARG (2), mode); + define_user_macro (ARG (1), ARG_LEN (1), ARG (2), mode); break; case TOKEN_FUNC: @@ -865,7 +861,8 @@ m4_dumpdef (struct obstack *obs, int argc, macro_arguments *argv) case TOKEN_TEXT: if (debug_level & DEBUG_TRACE_QUOTE) DEBUG_PRINT3 ("%s%s%s\n", - lquote.string, SYMBOL_TEXT (data.base[0]), rquote.string); + curr_quote.str1, SYMBOL_TEXT (data.base[0]), + curr_quote.str2); else DEBUG_PRINT1 ("%s\n", SYMBOL_TEXT (data.base[0])); break; @@ -916,7 +913,7 @@ m4_builtin (struct obstack *obs, int argc, macro_arguments *argv) m4_warn (0, me, _("undefined builtin `%s'"), name); else { - macro_arguments *new_argv = make_argv_ref (argv, name, arg_len (argv, 1), + macro_arguments *new_argv = make_argv_ref (argv, name, ARG_LEN (1), true, !bp->groks_macro_args); bp->func (obs, argc - 1, new_argv); } @@ -950,7 +947,7 @@ m4_indir (struct obstack *obs, int argc, macro_arguments *argv) m4_warn (0, me, _("undefined macro `%s'"), name); else { - macro_arguments *new_argv = make_argv_ref (argv, name, arg_len (argv, 1), + macro_arguments *new_argv = make_argv_ref (argv, name, ARG_LEN (1), true, !SYMBOL_MACRO_ARGS (s)); call_macro (s, argc - 1, new_argv, obs); } @@ -982,9 +979,9 @@ m4_defn (struct obstack *obs, int argc, macro_arguments *argv) switch (SYMBOL_TYPE (s)) { case TOKEN_TEXT: - obstack_grow (obs, lquote.string, lquote.length); + obstack_grow (obs, curr_quote.str1, curr_quote.len1); obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s))); - obstack_grow (obs, rquote.string, rquote.length); + obstack_grow (obs, curr_quote.str2, curr_quote.len2); break; case TOKEN_FUNC: @@ -1429,13 +1426,13 @@ mkstemp_helper (struct obstack *obs, const char *me, const char *pattern, /* Guarantee that there are six trailing 'X' characters, even if the user forgot to supply them. Output must be quoted if successful. */ - obstack_grow (obs, lquote.string, lquote.length); + obstack_grow (obs, curr_quote.str1, curr_quote.len1); obstack_grow (obs, pattern, len); for (i = 0; len > 0 && i < 6; i++) if (pattern[--len] != 'X') break; obstack_grow0 (obs, "XXXXXX", 6 - i); - name = (char *) obstack_base (obs) + lquote.length; + name = (char *) obstack_base (obs) + curr_quote.len1; errno = 0; fd = mkstemp (name); @@ -1449,7 +1446,7 @@ mkstemp_helper (struct obstack *obs, const char *me, const char *pattern, close (fd); /* Remove NUL, then finish quote. */ obstack_blank (obs, -1); - obstack_grow (obs, rquote.string, rquote.length); + obstack_grow (obs, curr_quote.str2, curr_quote.len2); } } @@ -1474,7 +1471,7 @@ m4_maketemp (struct obstack *obs, int argc, macro_arguments *argv) maketemp(XXXXXXXX) -> `X00nnnnn', where nnnnn is 16-bit pid */ const char *str = ARG (1); - size_t len = arg_len (argv, 1); + size_t len = ARG_LEN (1); size_t i; size_t len2; @@ -1495,7 +1492,7 @@ m4_maketemp (struct obstack *obs, int argc, macro_arguments *argv) } } else - mkstemp_helper (obs, me, ARG (1), arg_len (argv, 1)); + mkstemp_helper (obs, me, ARG (1), ARG_LEN (1)); } static void @@ -1505,7 +1502,7 @@ m4_mkstemp (struct obstack *obs, int argc, macro_arguments *argv) if (bad_argc (me, argc, 1, 1)) return; - mkstemp_helper (obs, me, ARG (1), arg_len (argv, 1)); + mkstemp_helper (obs, me, ARG (1), ARG_LEN (1)); } /*----------------------------------------. @@ -1532,9 +1529,9 @@ static void m4___file__ (struct obstack *obs, int argc, macro_arguments *argv) { bad_argc (ARG (0), argc, 0, 0); - obstack_grow (obs, lquote.string, lquote.length); + obstack_grow (obs, curr_quote.str1, curr_quote.len1); obstack_grow (obs, current_file, strlen (current_file)); - obstack_grow (obs, rquote.string, rquote.length); + obstack_grow (obs, curr_quote.str2, curr_quote.len2); } static void @@ -1548,9 +1545,9 @@ static void m4___program__ (struct obstack *obs, int argc, macro_arguments *argv) { bad_argc (ARG (0), argc, 0, 0); - obstack_grow (obs, lquote.string, lquote.length); + obstack_grow (obs, curr_quote.str1, curr_quote.len1); obstack_grow (obs, program_name, strlen (program_name)); - obstack_grow (obs, rquote.string, rquote.length); + obstack_grow (obs, curr_quote.str2, curr_quote.len2); } /* This section contains various macros for exiting, saving input until @@ -1601,7 +1598,7 @@ m4_m4wrap (struct obstack *obs, int argc, macro_arguments *argv) if (bad_argc (ARG (0), argc, 1, -1)) return; if (no_gnu_extensions) - obstack_grow (obs, ARG (1), arg_len (argv, 1)); + obstack_grow (obs, ARG (1), ARG_LEN (1)); else dump_args (obs, 1, argv, " ", false); obstack_1grow (obs, '\0'); @@ -1748,7 +1745,7 @@ m4_len (struct obstack *obs, int argc, macro_arguments *argv) { if (bad_argc (ARG (0), argc, 1, 1)) return; - shipout_int (obs, arg_len (argv, 1)); + shipout_int (obs, ARG_LEN (1)); } /*-------------------------------------------------------------------------. @@ -1777,8 +1774,7 @@ m4_index (struct obstack *obs, int argc, macro_arguments *argv) /* Rely on the optimizations guaranteed by gnulib's memmem module. */ - result = (char *) memmem (haystack, arg_len (argv, 1), - needle, arg_len (argv, 2)); + result = (char *) memmem (haystack, ARG_LEN (1), needle, ARG_LEN (2)); if (result) retval = result - haystack; @@ -1808,7 +1804,7 @@ m4_substr (struct obstack *obs, int argc, macro_arguments *argv) return; } - length = avail = arg_len (argv, 1); + length = avail = ARG_LEN (1); if (!numeric_arg (me, ARG (2), &start)) return; @@ -2074,14 +2070,14 @@ m4_regexp (struct obstack *obs, int argc, macro_arguments *argv) argc == 3 ? "" : "{", repl, argc == 3 ? "" : "}"); #endif /* DEBUG_REGEX */ - msg = compile_pattern (regexp, arg_len (argv, 2), &buf, ®s); + msg = compile_pattern (regexp, ARG_LEN (2), &buf, ®s); if (msg != NULL) { m4_warn (0, me, _("bad regular expression: `%s': %s"), regexp, msg); return; } - length = arg_len (argv, 1); + length = ARG_LEN (1); /* Avoid overhead of allocating regs if we won't use it. */ startpos = re_search (buf, victim, length, 0, length, argc == 3 ? NULL : regs); @@ -2142,14 +2138,14 @@ m4_patsubst (struct obstack *obs, int argc, macro_arguments *argv) xfprintf (trace_file, "p:{%s}:{%s}\n", regexp, repl); #endif /* DEBUG_REGEX */ - msg = compile_pattern (regexp, arg_len (argv, 2), &buf, ®s); + msg = compile_pattern (regexp, ARG_LEN (2), &buf, ®s); if (msg != NULL) { m4_warn (0, me, _("bad regular expression `%s': %s"), regexp, msg); return; } - length = arg_len (argv, 1); + length = ARG_LEN (1); offset = 0; matchpos = 0; diff --git a/src/debug.c b/src/debug.c index 2ca7a0d6..d6b2ddc9 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,7 +1,7 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1991, 1992, 1993, 1994, 2004, 2006, 2007 Free Software - Foundation, Inc. + Copyright (C) 1991, 1992, 1993, 1994, 2004, 2006, 2007, 2008 Free + Software Foundation, Inc. This file is part of GNU M4. @@ -110,12 +110,6 @@ debug_decode (const char *opts) } } } - - /* This is to avoid screwing up the trace output due to changes in the - debug_level. */ - - obstack_free (&trace, obstack_finish (&trace)); - return level; } @@ -283,11 +277,11 @@ trace_format (const char *fmt, ...) break; case 'l': - s = (debug_level & DEBUG_TRACE_QUOTE) ? lquote.string : ""; + s = (debug_level & DEBUG_TRACE_QUOTE) ? curr_quote.str1 : ""; break; case 'r': - s = (debug_level & DEBUG_TRACE_QUOTE) ? rquote.string : ""; + s = (debug_level & DEBUG_TRACE_QUOTE) ? curr_quote.str2 : ""; break; case 'd': @@ -309,6 +303,7 @@ trace_format (const char *fmt, ...) /*------------------------------------------------------------------. | Format the standard header attached to all tracing output lines. | +| ID is the current macro id. | `------------------------------------------------------------------*/ static void @@ -342,10 +337,10 @@ trace_flush (void) obstack_free (&trace, line); } -/*-------------------------------------------------------------. -| Do pre-argument-collction tracing for macro NAME. Used from | -| expand_macro (). | -`-------------------------------------------------------------*/ +/*----------------------------------------------------------------. +| Do pre-argument-collection tracing for macro NAME, with a given | +| ID. Used from expand_macro (). | +`----------------------------------------------------------------*/ void trace_prepre (const char *name, int id) @@ -355,10 +350,11 @@ trace_prepre (const char *name, int id) trace_flush (); } -/*-----------------------------------------------------------------------. -| Format the parts of a trace line, that can be made before the macro is | -| actually expanded. Used from expand_macro (). | -`-----------------------------------------------------------------------*/ +/*-----------------------------------------------------------------. +| Format the parts of a trace line that are known before the macro | +| is actually expanded. Called for the macro NAME with ID, and | +| arguments ARGV. Used from expand_macro (). | +`-----------------------------------------------------------------*/ void trace_pre (const char *name, int id, macro_arguments *argv) @@ -382,7 +378,7 @@ trace_pre (const char *name, int id, macro_arguments *argv) switch (arg_type (argv, i)) { case TOKEN_TEXT: - trace_format ("%l%S%r", arg_text (argv, i)); + trace_format ("%l%S%r", ARG (i)); break; case TOKEN_FUNC: @@ -412,8 +408,9 @@ trace_pre (const char *name, int id, macro_arguments *argv) } /*-------------------------------------------------------------------. -| Format the final part of a trace line and print it all. Used from | -| expand_macro (). | +| Format the final part of a trace line and print it all. Print | +| details for macro NAME with ID, given arguemnts ARGV and expansion | +| EXPANDED. Used from expand_macro (). | `-------------------------------------------------------------------*/ void diff --git a/src/format.c b/src/format.c index 9c9508db..6808ad5d 100644 --- a/src/format.c +++ b/src/format.c @@ -101,16 +101,16 @@ arg_double (const char *me, const char *str) } #define ARG_INT(i, argc, argv) \ - ((argc <= ++i) ? 0 : arg_int (me, arg_text (argv, i))) + ((argc <= ++i) ? 0 : arg_int (me, ARG (i))) #define ARG_LONG(i, argc, argv) \ - ((argc <= ++i) ? 0L : arg_long (me, arg_text (argv, i))) + ((argc <= ++i) ? 0L : arg_long (me, ARG (i))) #define ARG_STR(i, argc, argv) \ - ((argc <= ++i) ? "" : arg_text (argv, i)) + ((argc <= ++i) ? "" : ARG (i)) #define ARG_DOUBLE(i, argc, argv) \ - ((argc <= ++i) ? 0.0 : arg_double (me, arg_text (argv, i))) + ((argc <= ++i) ? 0.0 : arg_double (me, ARG (i))) /*------------------------------------------------------------------. @@ -124,7 +124,7 @@ arg_double (const char *me, const char *str) void format (struct obstack *obs, int argc, macro_arguments *argv) { - const char *me = arg_text (argv, 0); + const char *me = ARG (0); /* Macro name. */ const char *f; /* Format control string. */ const char *fmt; /* Position within f. */ char fstart[] = "%'+- 0#*.*hhd"; /* Current format spec. */ diff --git a/src/freeze.c b/src/freeze.c index 52a69d17..383d008b 100644 --- a/src/freeze.c +++ b/src/freeze.c @@ -1,6 +1,6 @@ /* GNU m4 -- A simple macro processor - Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007 + Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008 Free Software Foundation, Inc. This file is part of GNU M4. @@ -71,23 +71,16 @@ produce_frozen_state (const char *name) /* Dump quote delimiters. */ - if (strcmp (lquote.string, DEF_LQUOTE) || strcmp (rquote.string, DEF_RQUOTE)) - { - xfprintf (file, "Q%d,%d\n", (int) lquote.length, (int) rquote.length); - fputs (lquote.string, file); - fputs (rquote.string, file); - fputc ('\n', file); - } + if (strcmp (curr_quote.str1, DEF_LQUOTE) + || strcmp (curr_quote.str2, DEF_RQUOTE)) + xfprintf (file, "Q%d,%d\n%s%s\n", (int) curr_quote.len1, + (int) curr_quote.len2, curr_quote.str1, curr_quote.str2); /* Dump comment delimiters. */ - if (strcmp (bcomm.string, DEF_BCOMM) || strcmp (ecomm.string, DEF_ECOMM)) - { - xfprintf (file, "C%d,%d\n", (int) bcomm.length, (int) ecomm.length); - fputs (bcomm.string, file); - fputs (ecomm.string, file); - fputc ('\n', file); - } + if (strcmp (curr_comm.str1, DEF_BCOMM) || strcmp (curr_comm.str2, DEF_ECOMM)) + xfprintf (file, "C%d,%d\n%s%s\n", (int) curr_comm.len1, + (int) curr_comm.len2, curr_comm.str1, curr_comm.str2); /* Dump all symbols. */ diff --git a/src/input.c b/src/input.c index 9f25e8fd..5890bd28 100644 --- a/src/input.c +++ b/src/input.c @@ -156,12 +156,10 @@ static bool input_change; #define CHAR_QUOTE 258 /* Character return for quoted string. */ /* Quote chars. */ -STRING rquote; -STRING lquote; +string_pair curr_quote; /* Comment chars. */ -STRING bcomm; -STRING ecomm; +string_pair curr_comm; #ifdef ENABLE_CHANGEWORD @@ -219,13 +217,11 @@ make_text_link (struct obstack *obs, token_chain **start, token_chain **end) *start = chain; *end = chain; chain->next = NULL; + chain->type = CHAIN_STR; chain->quote_age = 0; - chain->str = str; - chain->len = len; - chain->level = -1; - chain->argv = NULL; - chain->index = 0; - chain->flatten = false; + chain->u.u_s.str = str; + chain->u.u_s.len = len; + chain->u.u_s.level = -1; } } @@ -363,13 +359,11 @@ push_token (token_data *token, int level) next->u.u_c.chain = chain; next->u.u_c.end = chain; chain->next = NULL; + chain->type = CHAIN_STR; chain->quote_age = TOKEN_DATA_QUOTE_AGE (token); - chain->str = TOKEN_DATA_TEXT (token); - chain->len = TOKEN_DATA_LEN (token); - chain->level = level; - chain->argv = NULL; - chain->index = 0; - chain->flatten = false; + chain->u.u_s.str = TOKEN_DATA_TEXT (token); + chain->u.u_s.len = TOKEN_DATA_LEN (token); + chain->u.u_s.level = level; if (level >= 0) { adjust_refcount (level, true); @@ -478,19 +472,20 @@ pop_input (bool cleanup) assert (!chain || !cleanup); while (chain) { - if (chain->str) + switch (chain->type) { - if (chain->len) + case CHAIN_STR: + if (chain->u.u_s.len) return false; - } - else - { + if (chain->u.u_s.level >= 0) + adjust_refcount (chain->u.u_s.level, false); + break; + case CHAIN_ARGV: /* TODO - peek into argv. */ - assert (!"implemented yet"); + default: + assert (!"pop_input"); abort (); } - if (chain->level >= 0) - adjust_refcount (chain->level, false); isp->u.u_c.chain = chain = chain->next; } break; @@ -601,8 +596,8 @@ input_print (struct obstack *obs, const input_block *input) while (chain) { /* TODO support argv refs as well. */ - assert (chain->str); - if (obstack_print (obs, chain->str, chain->len, &maxlen)) + assert (chain->type == CHAIN_STR); + if (obstack_print (obs, chain->u.u_s.str, chain->u.u_s.len, &maxlen)) return; chain = chain->next; } @@ -659,15 +654,16 @@ peek_input (void) chain = block->u.u_c.chain; while (chain) { - if (chain->str) - { - if (chain->len) - return to_uchar (chain->str[0]); - } - else + switch (chain->type) { + case CHAIN_STR: + if (chain->u.u_s.len) + return to_uchar (*chain->u.u_s.str); + break; + case CHAIN_ARGV: /* TODO - peek into argv. */ - assert (!"implemented yet"); + default: + assert (!"peek_input"); abort (); } chain = chain->next; @@ -760,24 +756,25 @@ next_char_1 (bool allow_quote) { if (allow_quote && chain->quote_age == current_quote_age) return CHAR_QUOTE; - if (chain->str) + switch (chain->type) { - if (chain->len) + case CHAIN_STR: + if (chain->u.u_s.len) { /* Partial consumption invalidates quote age. */ chain->quote_age = 0; - chain->len--; - return to_uchar (*chain->str++); + chain->u.u_s.len--; + return to_uchar (*chain->u.u_s.str++); } - } - else - { + if (chain->u.u_s.level >= 0) + adjust_refcount (chain->u.u_s.level, false); + break; + case CHAIN_ARGV: /* TODO - read from argv. */ - assert (!"implemented yet"); + default: + assert (!"next_char_1"); abort (); } - if (chain->level >= 0) - adjust_refcount (chain->level, false); isp->u.u_c.chain = chain = chain->next; } break; @@ -958,14 +955,14 @@ input_init (void) start_of_input_line = false; - lquote.string = xstrdup (DEF_LQUOTE); - lquote.length = strlen (lquote.string); - rquote.string = xstrdup (DEF_RQUOTE); - rquote.length = strlen (rquote.string); - bcomm.string = xstrdup (DEF_BCOMM); - bcomm.length = strlen (bcomm.string); - ecomm.string = xstrdup (DEF_ECOMM); - ecomm.length = strlen (ecomm.string); + curr_quote.str1 = xstrdup (DEF_LQUOTE); + curr_quote.len1 = strlen (curr_quote.str1); + curr_quote.str2 = xstrdup (DEF_RQUOTE); + curr_quote.len2 = strlen (curr_quote.str2); + curr_comm.str1 = xstrdup (DEF_BCOMM); + curr_comm.len1 = strlen (curr_comm.str1); + curr_comm.str2 = xstrdup (DEF_ECOMM); + curr_comm.len2 = strlen (curr_comm.str2); #ifdef ENABLE_CHANGEWORD set_word_regexp (NULL, user_word_regexp); @@ -999,15 +996,15 @@ set_quotes (const char *lq, const char *rq) else if (!rq || (*lq && !*rq)) rq = DEF_RQUOTE; - if (strcmp (lquote.string, lq) == 0 && strcmp (rquote.string, rq) == 0) + if (strcmp (curr_quote.str1, lq) == 0 && strcmp (curr_quote.str2, rq) == 0) return; - free (lquote.string); - free (rquote.string); - lquote.string = xstrdup (lq); - lquote.length = strlen (lquote.string); - rquote.string = xstrdup (rq); - rquote.length = strlen (rquote.string); + free (curr_quote.str1); + free (curr_quote.str2); + curr_quote.str1 = xstrdup (lq); + curr_quote.len1 = strlen (curr_quote.str1); + curr_quote.str2 = xstrdup (rq); + curr_quote.len2 = strlen (curr_quote.str2); set_quote_age (); } @@ -1032,15 +1029,15 @@ set_comment (const char *bc, const char *ec) else if (!ec || (*bc && !*ec)) ec = DEF_ECOMM; - if (strcmp (bcomm.string, bc) == 0 && strcmp (ecomm.string, ec) == 0) + if (strcmp (curr_comm.str1, bc) == 0 && strcmp (curr_comm.str2, ec) == 0) return; - free (bcomm.string); - free (ecomm.string); - bcomm.string = xstrdup (bc); - bcomm.length = strlen (bcomm.string); - ecomm.string = xstrdup (ec); - ecomm.length = strlen (ecomm.string); + free (curr_comm.str1); + free (curr_comm.str2); + curr_comm.str1 = xstrdup (bc); + curr_comm.len1 = strlen (curr_comm.str1); + curr_comm.str2 = xstrdup (ec); + curr_comm.len2 = strlen (curr_comm.str2); set_quote_age (); } @@ -1136,14 +1133,14 @@ set_quote_age (void) static const char unsafe[] = Letters "_0123456789(,) \t\n\r\f\v"; #undef Letters - if (lquote.length == 1 && rquote.length == 1 - && strpbrk(lquote.string, unsafe) == NULL - && strpbrk(rquote.string, unsafe) == NULL - && default_word_regexp && *lquote.string != *rquote.string - && *bcomm.string != '(' && *bcomm.string != ',' - && *bcomm.string != ')' && *bcomm.string != *lquote.string) - current_quote_age = (((*lquote.string & 0xff) << 8) - | (*rquote.string & 0xff)); + if (curr_quote.len1 == 1 && curr_quote.len2 == 1 + && strpbrk (curr_quote.str1, unsafe) == NULL + && strpbrk (curr_quote.str2, unsafe) == NULL + && default_word_regexp && *curr_quote.str1 != *curr_quote.str2 + && *curr_comm.str1 != '(' && *curr_comm.str1 != ',' + && *curr_comm.str1 != ')' && *curr_comm.str1 != *curr_quote.str1) + current_quote_age = (((*curr_quote.str1 & 0xff) << 8) + | (*curr_quote.str2 & 0xff)); else current_quote_age = 0; } @@ -1239,18 +1236,18 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) next_char (false); /* Consume character we already peeked at. */ file = current_file; *line = current_line; - if (MATCH (ch, bcomm.string, true)) + if (MATCH (ch, curr_comm.str1, true)) { if (obs) obs_td = obs; - obstack_grow (obs_td, bcomm.string, bcomm.length); + obstack_grow (obs_td, curr_comm.str1, curr_comm.len1); while ((ch = next_char (false)) < CHAR_EOF - && !MATCH (ch, ecomm.string, true)) + && !MATCH (ch, curr_comm.str2, true)) obstack_1grow (obs_td, ch); if (ch != CHAR_EOF) { assert (ch < CHAR_EOF); - obstack_grow (obs_td, ecomm.string, ecomm.length); + obstack_grow (obs_td, curr_comm.str2, curr_comm.len2); } else /* Current_file changed to "" if we see CHAR_EOF, use the @@ -1306,7 +1303,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) #endif /* ENABLE_CHANGEWORD */ - else if (!MATCH (ch, lquote.string, true)) + else if (!MATCH (ch, curr_quote.str1, true)) { switch (ch) { @@ -1341,16 +1338,16 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) if (ch == CHAR_QUOTE) append_quote_token (obs, td); - else if (MATCH (ch, rquote.string, true)) + else if (MATCH (ch, curr_quote.str2, true)) { if (--quote_level == 0) break; - obstack_grow (obs_td, rquote.string, rquote.length); + obstack_grow (obs_td, curr_quote.str2, curr_quote.len2); } - else if (MATCH (ch, lquote.string, true)) + else if (MATCH (ch, curr_quote.str1, true)) { quote_level++; - obstack_grow (obs_td, lquote.string, lquote.length); + obstack_grow (obs_td, curr_quote.str1, curr_quote.len1); } else { @@ -1392,8 +1389,24 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller) { assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP && type == TOKEN_STRING); #ifdef DEBUG_INPUT - xfprintf (stderr, "next_token -> %s <chain>\n", - token_type_string (type)); + { + token_chain *chain; + size_t len = 0; + int links = 0; + chain = td->u.u_c.chain; + xfprintf (stderr, "next_token -> %s <chain> (", + token_type_string (type)); + while (chain) + { + assert (chain->type == CHAIN_STR); + xfprintf (stderr, "%s", chain->u.u_s.str); + len += chain->u.u_s.len; + links++; + chain = chain->next; + } + xfprintf (stderr, "), %d links, len %zu\n", + links, len); + } #endif /* DEBUG_INPUT */ } return type; @@ -1417,7 +1430,7 @@ peek_token (void) { result = TOKEN_MACDEF; } - else if (MATCH (ch, bcomm.string, false)) + else if (MATCH (ch, curr_comm.str1, false)) { result = TOKEN_STRING; } @@ -1429,7 +1442,7 @@ peek_token (void) { result = TOKEN_WORD; } - else if (MATCH (ch, lquote.string, false)) + else if (MATCH (ch, curr_quote.str1, false)) { result = TOKEN_STRING; } @@ -79,12 +79,15 @@ /* Various declarations. */ -struct string +/* Describes a pair of strings, such as begin and end quotes. */ +struct string_pair { - char *string; /* characters of the string */ - size_t length; /* length of the string */ + char *str1; + size_t len1; + char *str2; + size_t len2; }; -typedef struct string STRING; +typedef struct string_pair string_pair; /* Memory allocation. */ #define obstack_chunk_alloc xmalloc @@ -274,17 +277,40 @@ enum token_data_type TOKEN_COMP /* Composite argument, u.u_c is valid. */ }; -/* Composite tokens are built of a linked list of chains. */ +/* A link in a chain of token data. */ +enum token_chain_type +{ + CHAIN_STR, /* Link contains a string, u.u_s is valid. */ + /* TODO add CHAIN_FUNC. */ + CHAIN_ARGV /* Link contains a $@ reference, u.u_a is valid. */ +}; + +/* Composite tokens are built of a linked list of chains. Each link + of the chain is either a single text reference (ie. $1), or an argv + reference (ie. $@). */ struct token_chain { token_chain *next; /* Pointer to next link of chain. */ + enum token_chain_type type; /* Type of this link. */ unsigned int quote_age; /* Quote_age of this link of chain, or 0. */ - const char *str; /* NUL-terminated string if text, or NULL. */ - size_t len; /* Length of str, else 0. */ - int level; /* Expansion level of link content, or -1. */ - macro_arguments *argv; /* Reference to earlier $@. */ - unsigned int index; /* Argument index within argv. */ - bool flatten; /* True to treat builtins as text. */ + union + { + struct + { + const char *str; /* Pointer to text. */ + size_t len; /* Remaining length of str. */ + int level; /* Expansion level of link content, or -1. */ + } + u_s; + struct + { + macro_arguments *argv; /* Reference to earlier $@. */ + unsigned int index; /* Argument index within argv. */ + bool flatten; /* True to treat builtins as text. */ + } + u_a; + } + u; }; /* The content of a token or macro argument. */ @@ -363,8 +389,8 @@ extern const char *current_file; extern int current_line; /* left and right quote, begin and end comment */ -extern STRING bcomm, ecomm; -extern STRING lquote, rquote; +extern string_pair curr_comm; +extern string_pair curr_quote; #define DEF_LQUOTE "`" #define DEF_RQUOTE "\'" @@ -465,6 +491,14 @@ void push_arg (struct obstack *, macro_arguments *, unsigned int); void push_args (struct obstack *, macro_arguments *, bool, bool); size_t adjust_refcount (int, bool); +/* Grab the text at argv index I. Assumes macro_argument *argv is in + scope, and aborts if the argument is not text. */ +#define ARG(i) arg_text (argv, i) + +/* Grab the text length at argv index I. Assumes macro_argument *argv + is in scope, and aborts if the argument is not text. */ +#define ARG_LEN(i) arg_len (argv, i) + /* File: builtin.c --- builtins. */ diff --git a/src/macro.c b/src/macro.c index 62af3981..d22226ea 100644 --- a/src/macro.c +++ b/src/macro.c @@ -673,8 +673,9 @@ expand_macro (symbol *sym) chain = argv->array[i]->u.u_c.chain; while (chain) { - if (chain->level >= 0) - adjust_refcount (chain->level, false); + assert (chain->type == CHAIN_STR); + if (chain->u.u_s.level >= 0) + adjust_refcount (chain->u.u_s.level, false); chain = chain->next; } } @@ -753,15 +754,17 @@ arg_token (macro_arguments *argv, unsigned int index) { token_chain *chain = token->u.u_c.chain; /* TODO - for now we support only a single-length $@ chain. */ - assert (!chain->next && !chain->str); - if (index < chain->argv->argc - (chain->index - 1)) + assert (!chain->next && chain->type == CHAIN_ARGV); + if (index < chain->u.u_a.argv->argc - (chain->u.u_a.index - 1)) { - token = arg_token (chain->argv, chain->index - 1 + index); - if (chain->flatten && TOKEN_DATA_TYPE (token) == TOKEN_FUNC) + token = arg_token (chain->u.u_a.argv, + chain->u.u_a.index - 1 + index); + if (chain->u.u_a.flatten + && TOKEN_DATA_TYPE (token) == TOKEN_FUNC) token = &empty_token; break; } - index -= chain->argv->argc - chain->index; + index -= chain->u.u_a.argv->argc - chain->u.u_a.index; } else if (--index == 0) break; @@ -781,8 +784,8 @@ arg_mark (macro_arguments *argv) assert (argv->arraylen == 1 && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP && !argv->array[0]->u.u_c.chain->next - && !argv->array[0]->u.u_c.chain->str); - argv->array[0]->u.u_c.chain->argv->inuse = true; + && argv->array[0]->u.u_c.chain->type == CHAIN_ARGV); + argv->array[0]->u.u_c.chain->u.u_a.argv->inuse = true; } } @@ -820,7 +823,7 @@ arg_text (macro_arguments *argv, unsigned int index) { token_data *token; token_chain *chain; - struct obstack *obs; + struct obstack *obs; /* Scratch space; cleaned at end of macro_expand. */ if (index == 0) return argv->argv0; @@ -838,8 +841,8 @@ arg_text (macro_arguments *argv, unsigned int index) obs = arg_scratch (); while (chain) { - assert (chain->str); - obstack_grow (obs, chain->str, chain->len); + assert (chain->type == CHAIN_STR); + obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); chain = chain->next; } obstack_1grow (obs, '\0'); @@ -879,8 +882,9 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT) { tmpa.next = NULL; - tmpa.str = TOKEN_DATA_TEXT (ta); - tmpa.len = TOKEN_DATA_LEN (ta); + tmpa.type = CHAIN_STR; + tmpa.u.u_s.str = TOKEN_DATA_TEXT (ta); + tmpa.u.u_s.len = TOKEN_DATA_LEN (ta); } else { @@ -890,8 +894,9 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) if (TOKEN_DATA_TYPE (tb) == TOKEN_TEXT) { tmpb.next = NULL; - tmpb.str = TOKEN_DATA_TEXT (tb); - tmpb.len = TOKEN_DATA_LEN (tb); + tmpb.type = CHAIN_STR; + tmpb.u.u_s.str = TOKEN_DATA_TEXT (tb); + tmpb.u.u_s.len = TOKEN_DATA_LEN (tb); } else { @@ -903,32 +908,34 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb) while (ca && cb) { /* TODO support comparison against $@ refs. */ - assert (ca->str && cb->str); - if (ca->len == cb->len) + assert (ca->type == CHAIN_STR && cb->type == CHAIN_STR); + if (ca->u.u_s.len == cb->u.u_s.len) { - if (memcmp (ca->str, cb->str, ca->len) != 0) + if (memcmp (ca->u.u_s.str, cb->u.u_s.str, ca->u.u_s.len) != 0) return false; ca = ca->next; cb = cb->next; } - else if (ca->len < cb->len) + else if (ca->u.u_s.len < cb->u.u_s.len) { - if (memcmp (ca->str, cb->str, ca->len) != 0) + if (memcmp (ca->u.u_s.str, cb->u.u_s.str, ca->u.u_s.len) != 0) return false; tmpb.next = cb->next; - tmpb.str = cb->str + ca->len; - tmpb.len = cb->len - ca->len; + tmpb.type = CHAIN_STR; + tmpb.u.u_s.str = cb->u.u_s.str + ca->u.u_s.len; + tmpb.u.u_s.len = cb->u.u_s.len - ca->u.u_s.len; ca = ca->next; cb = &tmpb; } else { - assert (ca->len > cb->len); - if (memcmp (ca->str, cb->str, cb->len) != 0) + assert (ca->u.u_s.len > cb->u.u_s.len); + if (memcmp (ca->u.u_s.str, cb->u.u_s.str, cb->u.u_s.len) != 0) return false; tmpa.next = ca->next; - tmpa.str = ca->str + cb->len; - tmpa.len = ca->len - cb->len; + tmpa.type = CHAIN_STR; + tmpa.u.u_s.str = ca->u.u_s.str + cb->u.u_s.len; + tmpa.u.u_s.len = ca->u.u_s.len - cb->u.u_s.len; ca = &tmpa; cb = cb->next; } @@ -979,8 +986,8 @@ arg_len (macro_arguments *argv, unsigned int index) len = 0; while (chain) { - assert (chain->str); - len += chain->len; + assert (chain->type == CHAIN_STR); + len += chain->u.u_s.len; chain = chain->next; } assert (len); @@ -1039,9 +1046,9 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len, assert (argv->arraylen == 1 && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP); chain = argv->array[0]->u.u_c.chain; - assert (!chain->next && !chain->str); - argv = chain->argv; - index += chain->index - 1; + assert (!chain->next && chain->type == CHAIN_ARGV); + argv = chain->u.u_a.argv; + index += chain->u.u_a.index - 1; } if (argv->argc <= index) { @@ -1065,13 +1072,11 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len, TOKEN_DATA_TYPE (token) = TOKEN_COMP; token->u.u_c.chain = token->u.u_c.end = chain; chain->next = NULL; + chain->type = CHAIN_ARGV; chain->quote_age = argv->quote_age; - chain->str = NULL; - chain->len = 0; - chain->level = expansion_level - 1; - chain->argv = argv; - chain->index = index; - chain->flatten = flatten; + chain->u.u_a.argv = argv; + chain->u.u_a.index = index; + chain->u.u_a.flatten = flatten; } new_argv->argc = argv->argc - (index - 1); new_argv->inuse = false; @@ -1111,8 +1116,8 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index) token_chain *chain = token->u.u_c.chain; while (chain) { - assert (chain->str); - obstack_grow (obs, chain->str, chain->len); + assert (chain->type == CHAIN_STR); + obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); chain = chain->next; } } @@ -1140,22 +1145,22 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) if (i + 1 == argv->argc) { if (quote) - obstack_grow (obs, lquote.string, lquote.length); + obstack_grow (obs, curr_quote.str1, curr_quote.len1); push_arg (obs, argv, i); if (quote) - obstack_grow (obs, rquote.string, rquote.length); + obstack_grow (obs, curr_quote.str2, curr_quote.len2); return; } /* Compute the separator in the scratch space. */ if (quote) { - obstack_grow (obs, lquote.string, lquote.length); - obstack_grow (scratch, rquote.string, rquote.length); + obstack_grow (obs, curr_quote.str1, curr_quote.len1); + obstack_grow (scratch, curr_quote.str2, curr_quote.len2); obstack_1grow (scratch, ','); - obstack_grow0 (scratch, lquote.string, lquote.length); + obstack_grow0 (scratch, curr_quote.str1, curr_quote.len1); sep = (char *) obstack_finish (scratch); - sep_len += lquote.length + rquote.length; + sep_len += curr_quote.len1 + curr_quote.len2; } /* TODO push entire $@ reference, rather than pushing each arg. */ for ( ; i < argv->argc; i++) @@ -1175,14 +1180,14 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote) chain = token->u.u_c.chain; while (chain) { - assert (chain->str); - obstack_grow (obs, chain->str, chain->len); + assert (chain->type == CHAIN_STR); + obstack_grow (obs, chain->u.u_s.str, chain->u.u_s.len); chain = chain->next; } } } if (quote) - obstack_grow (obs, rquote.string, rquote.length); + obstack_grow (obs, curr_quote.str2, curr_quote.len2); if (inuse) arg_mark (argv); } |