diff options
author | Eric Blake <ebb9@byu.net> | 2008-01-14 17:25:13 -0700 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-08-03 19:43:51 -0600 |
commit | cb26d7cb8b438224908d53df59b1d394ba1928f8 (patch) | |
tree | 5fd98e6324ecac4eae890f7b3148f7bb3a9a533a | |
parent | 40c640f486bf7a99c6e16d91332f25872f501488 (diff) | |
download | m4-cb26d7cb8b438224908d53df59b1d394ba1928f8.tar.gz |
Stage26: allow NUL in macro definitions
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | doc/m4.texinfo | 97 | ||||
-rw-r--r-- | examples/foreachq4.m4 | 13 | ||||
-rw-r--r-- | examples/null.err | bin | 572 -> 713 bytes | |||
-rw-r--r-- | examples/null.m4 | bin | 6189 -> 6499 bytes | |||
-rw-r--r-- | examples/null.out | bin | 468 -> 510 bytes | |||
-rw-r--r-- | m4/gnulib-cache.m4 | 56 | ||||
-rw-r--r-- | src/builtin.c | 82 | ||||
-rw-r--r-- | src/freeze.c | 6 | ||||
-rw-r--r-- | src/input.c | 131 | ||||
-rw-r--r-- | src/m4.c | 2 | ||||
-rw-r--r-- | src/m4.h | 13 | ||||
-rw-r--r-- | src/macro.c | 35 |
13 files changed, 317 insertions, 121 deletions
diff --git a/configure.ac b/configure.ac index ea4e130c..5b02c561 100644 --- a/configure.ac +++ b/configure.ac @@ -32,6 +32,9 @@ AC_CONFIG_HEADERS([lib/config.h:lib/config.hin]) AC_PROG_CC M4_EARLY +# M4 is single-threaded; so we can optimize gnulib code by using this: +gl_DISABLE_THREADS + AC_CHECK_HEADERS_ONCE([siginfo.h sys/wait.h]) AC_CHECK_TYPES([siginfo_t], [], [], [[#include <signal.h> diff --git a/doc/m4.texinfo b/doc/m4.texinfo index d6b7b59f..c8bf7ee1 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -1021,6 +1021,27 @@ The comment delimiters can be changed to any string at any time, using the builtin macro @code{changecom}. @xref{Changecom}, for more information. +@ignore +@comment Detect regression in 1.4.10b in regards to reparsing comments. +@comment Not worth including in the manual. +@example +define(`e', `$@@')define(`q', ``$@@'')define(`foo', `bar') +@result{} +q(e(`one +',#two ' foo +)) +@result{}`one +@result{}',`#two bar +@result{}'' +changecom(`<', `>')define(`n', `$#') +@result{} +n(e(<`>, <'>)) +@result{}1 +len(e(<`>, ,<'>)) +@result{}12 +@end example +@end ignore + @node Other tokens @section Other kinds of input tokens @@ -2578,7 +2599,8 @@ m4_indir(`m4_divnum') Note that @code{indir} and @code{builtin} can be used to invoke builtins without arguments, even when they normally require parameters to be -recognized; but it will provoke a warning, and result in a void expansion. +recognized; but it will provoke a warning, and the expansion will behave +as though empty strings had been passed as the required arguments. @example builtin @@ -2592,6 +2614,13 @@ builtin(`builtin') builtin(`builtin',) @error{}m4:stdin:4: Warning: builtin: undefined builtin `' @result{} +builtin(`builtin', ``' +') +@error{}m4:stdin:5: Warning: builtin: undefined builtin ``\'\n' +@result{} +indir(`index') +@error{}m4:stdin:7: Warning: index: too few arguments: 0 < 2 +@result{} @end example @ignore @@ -3779,6 +3808,22 @@ indir(`my_defn', indir(`shift', `', `foo')) @result{}bar @end example +@ignore +@comment not worth including in the manual, but this tests a trace code +@comment path that was temporarily broken +@comment options: -de --trace ifelse +@example +$ @kbd{m4 -de --trace ifelse} +define(`e', `ifelse(`$1', `$2', `ifelse(`$1', `$2', `e(shift($@@))')')') +@result{} +e(`1', `1') +@error{}m4trace: -1- ifelse -> ifelse(`1', `1', `e(shift(`1',`1'))') +@error{}m4trace: -1- ifelse -> e(shift(`1',`1')) +@error{}m4trace: -1- ifelse ->@w{ } +@result{} +@end example +@end ignore + @node Debug Levels @section Controlling debugging output @@ -6990,6 +7035,22 @@ traceon(`undefined')dnl @c Make sure freezing is successful. +@example +ifdef(`__unix__', , + `errprint(` skipping: syscmd does not have unix semantics +')m4exit(`77')')dnl +changequote(`[', `]')dnl +syscmd([echo 'changequote([,])pushdef([divnum],[hi])dnl' \ + | ]__program__[ -F in.m4f \ + && echo 'divnum popdef([divnum])divnum' \ + | ]__program__[ -R in.m4f \ + && rm in.m4f])status sysval +@result{}hi 0 +@result{}status 0 +@end example + +@c Detect inability to freeze. + @comment options: -F /none/such @comment status: 1 @example @@ -7006,13 +7067,13 @@ ifdef(`__unix__', , `errprint(` skipping: syscmd does not have unix semantics ')m4exit(`77')')dnl changequote(`[', `]')dnl -syscmd([printf 'define(-\0-,hi)changequote([,\0])changecom(--\0)dnl +syscmd([printf 'define(-\0-,\0-\0)changequote([,\0])changecom(--\0)dnl divert(1)undivert(null.out)' | ]__program__[ -F in.m4f \ - && printf 'errprint([divnum\0] #-- indir(-\0-))' \ + && printf 'errprint([divnum\0] #-- len(indir(-\0-)))' \ | ]__program__[ -R in.m4f \ && rm in.m4f])errprint([ ]sysval[ ])dnl -@error{}divnum #-- hi 0 +@error{}divnum #-- 3 0 @end example @end ignore @@ -7937,6 +7998,34 @@ include(`loop.m4')dnl @result{}10000 @end example +@comment foreach via forloop recursion + +@comment examples +@comment options: -Dlimit=10 -Dverbose -Dalt=4 +@example +$ @kbd {m4 -I examples -Dlimit=10 -Dverbose -Dalt=4} +include(`loop.m4')dnl +@result{} 1 2 3 4 5 6 7 8 9 10 +@end example + +@comment examples +@comment options: -Dlimit=2500 -Dalt=4 +@example +$ @kbd {m4 -I examples -Dlimit=2500 -Dalt=4} +include(`loop.m4')dnl +@end example + +@comment examples +@comment options: -Dlimit=10000 -Dalt=4 +@example +$ @kbd {m4 -I examples -Dlimit=10000 -Dalt=4} +define(`foo', `divert`'len(popdef(`_foreachq')_foreachq($@@))')dnl +define(`debug', `pushdef(`_foreachq', defn(`foo'))') +@result{} +include(`loop.m4')dnl +@result{}48894 +@end example + @end ignore @node Improved m4wrap diff --git a/examples/foreachq4.m4 b/examples/foreachq4.m4 new file mode 100644 index 00000000..3da64c92 --- /dev/null +++ b/examples/foreachq4.m4 @@ -0,0 +1,13 @@ +include(`forloop2.m4')dnl +divert(`-1') +# foreachq(x, `item_1, item_2, ..., item_n', stmt) +# quoted list, version based on forloop +define(`foreachq', +`ifelse(`$2', `', `', `_$0(`$1', `$3', $2)')') +define(`_foreachq', +`pushdef(`$1', forloop(`$1', `3', `$#', + `$0_(`1', `2', indir(`$1'))')`popdef( + `$1')')indir(`$1', $@)') +define(`_foreachq_', +``define(`$$1', `$$3')$$2`''') +divert`'dnl diff --git a/examples/null.err b/examples/null.err Binary files differindex 5f989ee6..897ce346 100644 --- a/examples/null.err +++ b/examples/null.err diff --git a/examples/null.m4 b/examples/null.m4 Binary files differindex de76742a..1823073d 100644 --- a/examples/null.m4 +++ b/examples/null.m4 diff --git a/examples/null.out b/examples/null.out Binary files differindex 5e90221e..dd834163 100644 --- a/examples/null.out +++ b/examples/null.out diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4 index dffdf8d1..5c03a924 100644 --- a/m4/gnulib-cache.m4 +++ b/m4/gnulib-cache.m4 @@ -15,11 +15,63 @@ # Specification in the form of a command-line invocation: -# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset binary-io c-stack clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 intprops memchr2 memmem mkstemp obstack obstack-printf-posix progname quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix +# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset binary-io c-stack clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 hash intprops memchr2 memmem mkstemp obstack obstack-printf-posix progname quote regex sigaction stdbool stdint stdlib-safer strsignal strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix # Specification in the form of a few gnulib-tool.m4 macro invocations: gl_LOCAL_DIR([local]) -gl_MODULES([announce-gen assert autobuild avltree-oset binary-io c-stack clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 intprops memchr2 memmem mkstemp obstack obstack-printf-posix progname quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix]) +gl_MODULES([ + announce-gen + assert + autobuild + avltree-oset + binary-io + c-stack + clean-temp + cloexec + close-stream + closein + config-h + error + fdl + fflush + flexmember + fopen-safer + freadptr + freadseek + fseeko + gendocs + getopt + git-version-gen + gnumakefile + gnupload + gpl-3.0 + hash + intprops + memchr2 + memmem + mkstemp + obstack + obstack-printf-posix + progname + quote + regex + sigaction + stdbool + stdint + stdlib-safer + strsignal + strtod + strtol + unlocked-io + vasnprintf-posix + verror + version-etc + version-etc-fsf + xalloc + xmemdup0 + xprintf + xvasprintf-posix +]) gl_AVOID([]) gl_SOURCE_BASE([lib]) gl_M4_BASE([m4]) diff --git a/src/builtin.c b/src/builtin.c index c171ea96..bcf7bb91 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -427,26 +427,32 @@ free_regex (void) } } -/*-----------------------------------------------------------------. -| Define a predefined or user-defined macro, with name NAME of | -| length NAME_LEN, and expansion TEXT. MODE is SYMBOL_INSERT for | -| "define" or SYMBOL_PUSHDEF for "pushdef". This function is also | -| used from main (). | -`-----------------------------------------------------------------*/ +/*------------------------------------------------------------------. +| Define a predefined or user-defined macro, with name NAME of | +| length NAME_LEN, and expansion TEXT of length LEN. LEN may be | +| SIZE_MAX, to use the string length of TEXT instead. MODE is | +| SYMBOL_INSERT for "define" or SYMBOL_PUSHDEF for "pushdef". This | +| function is also used from main (). | +`------------------------------------------------------------------*/ void define_user_macro (const char *name, size_t name_len, const char *text, - symbol_lookup mode) + size_t len, symbol_lookup mode) { symbol *s; - char *defn = xstrdup (text ? text : ""); + char *defn; + assert (text); + if (len == SIZE_MAX) + len = strlen (text); + defn = xmemdup (text, len); s = lookup_symbol (name, name_len, mode); if (SYMBOL_TYPE (s) == TOKEN_TEXT) free (SYMBOL_TEXT (s)); SYMBOL_TYPE (s) = TOKEN_TEXT; SYMBOL_TEXT (s) = defn; + SYMBOL_TEXT_LEN (s) = len; SYMBOL_MACRO_ARGS (s) = true; /* Implement --warn-macro-sequence. */ @@ -454,7 +460,6 @@ define_user_macro (const char *name, size_t name_len, const char *text, { regoff_t offset = 0; struct re_registers *regs = ¯o_sequence_regs; - size_t len = strlen (defn); while (offset < len && (offset = re_search (¯o_sequence_buf, defn, len, offset, @@ -513,13 +518,13 @@ builtin_init (void) { if (pp->unix_name != NULL) define_user_macro (pp->unix_name, strlen (pp->unix_name), - pp->func, SYMBOL_INSERT); + pp->func, SIZE_MAX, SYMBOL_INSERT); } else { if (pp->gnu_name != NULL) define_user_macro (pp->gnu_name, strlen (pp->gnu_name), - pp->func, SYMBOL_INSERT); + pp->func, SIZE_MAX, SYMBOL_INSERT); } } @@ -628,7 +633,10 @@ ntoa (int32_t value, int radix) static void shipout_int (struct obstack *obs, int val) { - obstack_printf (obs, "%d", val); + const char *s; + + s = ntoa ((int32_t) val, 10); + obstack_grow (obs, s, strlen (s)); } @@ -670,7 +678,7 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode) if (argc == 2) { - define_user_macro (ARG (1), ARG_LEN (1), "", mode); + define_user_macro (ARG (1), ARG_LEN (1), "", 0, mode); return; } @@ -680,7 +688,8 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode) m4_warn (0, me, _("cannot concatenate builtins")); /* fallthru */ case TOKEN_TEXT: - define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true), mode); + define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true), + arg_len (argv, 2, true), mode); break; case TOKEN_FUNC: @@ -905,7 +914,8 @@ m4_dumpdef (struct obstack *obs, int argc, macro_arguments *argv) case TOKEN_TEXT: if (debug_level & DEBUG_TRACE_QUOTE) fwrite (curr_quote.str1, 1, curr_quote.len1, debug); - fputs (SYMBOL_TEXT (data.base[0]), debug); + fwrite (SYMBOL_TEXT (data.base[0]), 1, + SYMBOL_TEXT_LEN (data.base[0]), debug); if (debug_level & DEBUG_TRACE_QUOTE) fwrite (curr_quote.str2, 1, curr_quote.len2, debug); break; @@ -1040,7 +1050,7 @@ m4_defn (struct obstack *obs, int argc, macro_arguments *argv) { case TOKEN_TEXT: obstack_grow (obs, curr_quote.str1, curr_quote.len1); - obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s))); + obstack_grow (obs, SYMBOL_TEXT (s), SYMBOL_TEXT_LEN (s)); obstack_grow (obs, curr_quote.str2, curr_quote.len2); break; @@ -1226,9 +1236,13 @@ m4_eval (struct obstack *obs, int argc, macro_arguments *argv) s++; } len = strlen (s); - if (min < len) - min = len; - obstack_printf (obs, "%.*d%s", min - len, 0, s); + if (len < min) + { + min -= len; + obstack_blank (obs, min); + memset ((char *) obstack_next_free (obs) - min, '0', min); + } + obstack_grow (obs, s, len); } static void @@ -1409,7 +1423,7 @@ m4_changeword (struct obstack *obs, int argc, macro_arguments *argv) if (bad_argc (me, argc, 1, 1)) return; - set_word_regexp (me, ARG (1)); + set_word_regexp (me, ARG (1), ARG_LEN (1)); } #endif /* ENABLE_CHANGEWORD */ @@ -2292,29 +2306,31 @@ void expand_user_macro (struct obstack *obs, symbol *sym, int argc, macro_arguments *argv) { - const char *text; + const char *text = SYMBOL_TEXT (sym); + size_t len = SYMBOL_TEXT_LEN (sym); int i; + const char *dollar = memchr (text, '$', len); - for (text = SYMBOL_TEXT (sym); *text != '\0';) + while (dollar) { - if (*text != '$') - { - obstack_1grow (obs, *text); - text++; - continue; - } - text++; - switch (*text) + obstack_grow (obs, text, dollar - text); + len -= dollar - text; + text = dollar; + if (len == 1) + break; + len--; + switch (*++text) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (no_gnu_extensions) { i = *text++ - '0'; + len--; } else { - for (i = 0; isdigit (to_uchar (*text)); text++) + for (i = 0; len && isdigit (to_uchar (*text)); text++, len--) i = i * 10 + (*text - '0'); } push_arg (obs, argv, i); @@ -2323,17 +2339,21 @@ expand_user_macro (struct obstack *obs, symbol *sym, case '#': /* number of arguments */ shipout_int (obs, argc - 1); text++; + len--; break; case '*': /* all arguments */ case '@': /* ... same, but quoted */ push_args (obs, argv, false, *text == '@'); text++; + len--; break; default: obstack_1grow (obs, '$'); break; } + dollar = memchr (text, '$', len); } + obstack_grow (obs, text, len); } diff --git a/src/freeze.c b/src/freeze.c index dd856337..5d4ac423 100644 --- a/src/freeze.c +++ b/src/freeze.c @@ -110,9 +110,9 @@ produce_frozen_state (const char *name) case TOKEN_TEXT: xfprintf (file, "T%d,%d\n", (int) SYMBOL_NAME_LEN (sym), - (int) strlen (SYMBOL_TEXT (sym))); + (int) SYMBOL_TEXT_LEN (sym)); fwrite (SYMBOL_NAME (sym), 1, SYMBOL_NAME_LEN (sym), file); - fputs (SYMBOL_TEXT (sym), file); + fwrite (SYMBOL_TEXT (sym), 1, SYMBOL_TEXT_LEN (sym), file); fputc ('\n', file); break; @@ -353,7 +353,7 @@ reload_frozen_state (const char *name) /* Enter a macro having an expansion text as a definition. */ - define_user_macro (string[0], number[0], string[1], + define_user_macro (string[0], number[0], string[1], number[1], SYMBOL_PUSHDEF); break; diff --git a/src/input.c b/src/input.c index 75f86146..a6853bf7 100644 --- a/src/input.c +++ b/src/input.c @@ -171,9 +171,6 @@ string_pair curr_comm; # define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*" -/* Table of characters that can start a word. */ -static char word_start[256]; - /* Current regular expression for detecting words. */ static struct re_pattern_buffer word_regexp; @@ -475,6 +472,7 @@ push_token (token_data *token, int level, bool inuse) destructively modifies the chain it is parsing. */ chain = (token_chain *) obstack_copy (current_input, src_chain, sizeof *chain); + chain->next = NULL; if (chain->type == CHAIN_STR && chain->u.u_s.level == -1) { if (chain->u.u_s.len <= INPUT_INLINE_THRESHOLD || !inuse) @@ -711,6 +709,9 @@ pop_wrapup (void) obstack_free (&file_names, NULL); obstack_free (wrapup_stack, NULL); free (wrapup_stack); +#ifdef ENABLE_CHANGEWORD + regfree (&word_regexp); +#endif /* ENABLE_CHANGEWORD */ return false; } @@ -1197,11 +1198,12 @@ init_argv_token (struct obstack *obs, token_data *td) /*------------------------------------------------------------------. -| This function is for matching a string against a prefix of the | -| input stream. If the string S of length SLEN matches the input | -| and CONSUME is true, the input is discarded; otherwise any | -| characters read are pushed back again. The function is used only | -| when multicharacter quotes or comment delimiters are used. | +| If the string S of length SLEN matches the next characters of the | +| input stream, return true. If CONSUME, the first character has | +| already been matched. If a match is found and CONSUME is true, | +| the input is discarded; otherwise any characters read are pushed | +| back again. The function is used only when multicharacter quotes | +| or comment delimiters are used. | `------------------------------------------------------------------*/ static bool @@ -1212,6 +1214,11 @@ match_input (const char *s, size_t slen, bool consume) const char *t; bool result = false; + if (consume) + { + s++; + slen--; + } assert (slen); ch = peek_input (false); if (ch != to_uchar (*s)) @@ -1245,21 +1252,22 @@ match_input (const char *s, size_t slen, bool consume) return result; } -/*---------------------------------------------------------------. -| The macro MATCH() is used to match a string S of length SLEN | -| against the input. The first character is handled inline, for | -| speed. Hopefully, this will not hurt efficiency too much when | -| single character quotes and comment delimiters are used. If | -| CONSUME, then CH is the result of next_char, and a successful | -| match will discard the matched string. Otherwise, CH is the | -| result of peek_input, and the input stream is effectively | -| unchanged. | -`---------------------------------------------------------------*/ +/*--------------------------------------------------------------------. +| The macro MATCH() is used to match a string S of length SLEN | +| against the input. The first character is handled inline for | +| speed, and S[SLEN] must be safe to dereference (it is faster to do | +| character comparison prior to length checks). This improves | +| efficiency for the common case of single character quotes and | +| comment delimiters, while being safe for disabled delimiters as | +| well as longer delimiters. If CONSUME, then CH is the result of | +| next_char, and a successful match will discard the matched string. | +| Otherwise, CH is the result of peek_input, and the input stream is | +| effectively unchanged. | +`--------------------------------------------------------------------*/ #define MATCH(ch, s, slen, consume) \ - ((slen) && to_uchar ((s)[0]) == (ch) \ - && ((slen) == 1 \ - || (match_input ((s) + (consume), (slen) - (consume), consume)))) + (to_uchar ((s)[0]) == (ch) \ + && ((slen) >> 1 ? match_input (s, slen, consume) : (slen))) /*----------------------------------------------------------. @@ -1291,17 +1299,17 @@ input_init (void) start_of_input_line = false; - curr_quote.str1 = xmemdup (DEF_LQUOTE, 1); + curr_quote.str1 = xmemdup0 (DEF_LQUOTE, 1); curr_quote.len1 = 1; - curr_quote.str2 = xmemdup (DEF_RQUOTE, 1); + curr_quote.str2 = xmemdup0 (DEF_RQUOTE, 1); curr_quote.len2 = 1; - curr_comm.str1 = xmemdup (DEF_BCOMM, 1); + curr_comm.str1 = xmemdup0 (DEF_BCOMM, 1); curr_comm.len1 = 1; - curr_comm.str2 = xmemdup (DEF_ECOMM, 1); + curr_comm.str2 = xmemdup0 (DEF_ECOMM, 1); curr_comm.len2 = 1; #ifdef ENABLE_CHANGEWORD - set_word_regexp (NULL, user_word_regexp); + set_word_regexp (NULL, user_word_regexp, SIZE_MAX); #endif /* ENABLE_CHANGEWORD */ set_quote_age (); @@ -1345,9 +1353,10 @@ set_quotes (const char *lq, size_t lq_len, const char *rq, size_t rq_len) free (curr_quote.str1); free (curr_quote.str2); - curr_quote.str1 = xmemdup (lq, lq_len); + /* The use of xmemdup0 is essential for MATCH() to work. */ + curr_quote.str1 = xmemdup0 (lq, lq_len); curr_quote.len1 = lq_len; - curr_quote.str2 = xmemdup (rq, rq_len); + curr_quote.str2 = xmemdup0 (rq, rq_len); curr_quote.len2 = rq_len; set_quote_age (); } @@ -1387,29 +1396,34 @@ set_comment (const char *bc, size_t bc_len, const char *ec, size_t ec_len) free (curr_comm.str1); free (curr_comm.str2); - curr_comm.str1 = xmemdup (bc, bc_len); + /* The use of xmemdup0 is essential for MATCH() to work. */ + curr_comm.str1 = xmemdup0 (bc, bc_len); curr_comm.len1 = bc_len; - curr_comm.str2 = xmemdup (ec, ec_len); + curr_comm.str2 = xmemdup0 (ec, ec_len); curr_comm.len2 = ec_len; set_quote_age (); } #ifdef ENABLE_CHANGEWORD -/*-------------------------------------------------------------------. -| Set the regular expression for recognizing words to REGEXP, and | -| report errors on behalf of CALLER. If REGEXP is NULL, revert back | -| to the default parsing rules. | -`-------------------------------------------------------------------*/ +/*-----------------------------------------------------------------. +| Set the regular expression for recognizing words to REGEXP of | +| length LEN, and report errors on behalf of CALLER. If REGEXP is | +| NULL, revert back to the default parsing rules. If LEN is | +| SIZE_MAX, use strlen(REGEXP) instead. | +`-----------------------------------------------------------------*/ void -set_word_regexp (const call_info *caller, const char *regexp) +set_word_regexp (const call_info *caller, const char *regexp, size_t len) { - int i; const char *msg; struct re_pattern_buffer new_word_regexp; - if (!*regexp || !strcmp (regexp, DEFAULT_WORD_REGEXP)) + if (len == SIZE_MAX) + len = strlen (regexp); + if (len == 0 + || (len == strlen (DEFAULT_WORD_REGEXP) + && !memcmp (regexp, DEFAULT_WORD_REGEXP, len))) { default_word_regexp = true; set_quote_age (); @@ -1418,30 +1432,30 @@ set_word_regexp (const call_info *caller, const char *regexp) /* Dry run to see whether the new expression is compilable. */ init_pattern_buffer (&new_word_regexp, NULL); - msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp); + msg = re_compile_pattern (regexp, len, &new_word_regexp); regfree (&new_word_regexp); if (msg != NULL) { - m4_warn (0, caller, _("bad regular expression `%s': %s"), regexp, msg); + m4_warn (0, caller, _("bad regular expression %s: %s"), + quotearg_style_mem (locale_quoting_style, regexp, len), msg); return; } - /* If compilation worked, retry using the word_regexp struct. - Can't rely on struct assigns working, so redo the compilation. */ - regfree (&word_regexp); - msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp); + /* If compilation worked, retry using the word_regexp struct. We + can't rely on struct assigns working, so redo the compilation. + The fastmap can be reused between compilations, and will be freed + by the final regfree. */ + if (!word_regexp.fastmap) + word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1); + msg = re_compile_pattern (regexp, len, &word_regexp); assert (!msg); re_set_registers (&word_regexp, ®s, regs.num_regs, regs.start, regs.end); + if (re_compile_fastmap (&word_regexp)) + assert (false); default_word_regexp = false; set_quote_age (); - - for (i = 1; i < 256; i++) - { - char test = i; - word_start[i] = re_match (&word_regexp, &test, 1, 0, NULL) > 0; - } } #endif /* ENABLE_CHANGEWORD */ @@ -1687,7 +1701,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, assert (ch < CHAR_EOF); obstack_1grow (obs_td, ch); } - type = TOKEN_STRING; + type = TOKEN_COMMENT; } else if (default_word_regexp && (isalpha (ch) || ch == '_')) { @@ -1703,7 +1717,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, #ifdef ENABLE_CHANGEWORD - else if (!default_word_regexp && word_start[ch]) + else if (!default_word_regexp && word_regexp.fastmap[ch]) { obstack_1grow (&token_stack, ch); while (1) @@ -1829,7 +1843,8 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv, } else { - assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP && type == TOKEN_STRING); + assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP + && (type == TOKEN_STRING || type == TOKEN_COMMENT)); #ifdef DEBUG_INPUT { token_chain *chain; @@ -1887,11 +1902,11 @@ peek_token (void) } else if (MATCH (ch, curr_comm.str1, curr_comm.len1, false)) { - result = TOKEN_STRING; + result = TOKEN_COMMENT; } else if ((default_word_regexp && (isalpha (ch) || ch == '_')) #ifdef ENABLE_CHANGEWORD - || (!default_word_regexp && word_start[ch]) + || (!default_word_regexp && word_regexp.fastmap[ch]) #endif /* ENABLE_CHANGEWORD */ ) { @@ -1935,6 +1950,8 @@ token_type_string (token_type t) return "EOF"; case TOKEN_STRING: return "STRING"; + case TOKEN_COMMENT: + return "COMMENT"; case TOKEN_WORD: return "WORD"; case TOKEN_OPEN: @@ -1973,6 +1990,10 @@ print_token (const char *s, token_type t, token_data *td) xfprintf (stderr, "string:"); break; + case TOKEN_COMMENT: + xfprintf (stderr, "comment:"); + break; + case TOKEN_MACDEF: xfprintf (stderr, "macro: %p\n", TOKEN_DATA_FUNC (td)); break; @@ -620,7 +620,7 @@ main (int argc, char *const *argv, char *const *envp) const char *value = strchr (defines->arg, '='); size_t len = value ? value - defines->arg : strlen (defines->arg); define_user_macro (defines->arg, len, value ? value + 1 : "", - SYMBOL_INSERT); + value ? SIZE_MAX : 0, SYMBOL_INSERT); } break; @@ -218,7 +218,8 @@ typedef struct token_chain token_chain; enum token_type { TOKEN_EOF = 4,/* End of file, TOKEN_VOID. */ - TOKEN_STRING, /* Quoted string or comment, TOKEN_TEXT or TOKEN_COMP. */ + TOKEN_STRING, /* Quoted string, TOKEN_TEXT or TOKEN_COMP. */ + TOKEN_COMMENT,/* Comment, TOKEN_TEXT or TOKEN_COMP. */ TOKEN_WORD, /* An identifier, TOKEN_TEXT. */ TOKEN_OPEN, /* Active character `(', TOKEN_TEXT. */ TOKEN_COMMA, /* Active character `,', TOKEN_TEXT. */ @@ -380,7 +381,7 @@ extern string_pair curr_quote; void set_quotes (const char *, size_t, const char *, size_t); void set_comment (const char *, size_t, const char *, size_t); #ifdef ENABLE_CHANGEWORD -void set_word_regexp (const call_info *, const char *); +void set_word_regexp (const call_info *, const char *, size_t); #endif unsigned int quote_age (void); bool safe_quotes (void); @@ -440,6 +441,7 @@ struct symbol #define SYMBOL_NAME_LEN(S) ((S)->len) #define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data)) #define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data)) +#define SYMBOL_TEXT_LEN(S) (TOKEN_DATA_LEN (&(S)->data)) #define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data)) typedef enum symbol_lookup symbol_lookup; @@ -470,7 +472,7 @@ token_data_type arg_type (macro_arguments *, unsigned int); const char *arg_text (macro_arguments *, unsigned int, bool); bool arg_equal (macro_arguments *, unsigned int, unsigned int); bool arg_empty (macro_arguments *, unsigned int); -size_t arg_len (macro_arguments *, unsigned int); +size_t arg_len (macro_arguments *, unsigned int, bool); builtin_func *arg_func (macro_arguments *, unsigned int); struct obstack *arg_scratch (void); bool arg_print (struct obstack *, macro_arguments *, unsigned int, @@ -490,7 +492,7 @@ void wrap_args (macro_arguments *); /* Grab the text length at argv index I. Assumes macro_argument *argv is in scope, and aborts if the argument is not text. */ -#define ARG_LEN(i) arg_len (argv, i) +#define ARG_LEN(i) arg_len (argv, i, false) /* File: builtin.c --- builtins. */ @@ -526,7 +528,8 @@ bool bad_argc (const call_info *, int, unsigned int, unsigned int); void define_builtin (const char *, size_t, const builtin *, symbol_lookup); void set_macro_sequence (const char *); void free_regex (void); -void define_user_macro (const char *, size_t, const char *, symbol_lookup); +void define_user_macro (const char *, size_t, const char *, size_t, + symbol_lookup); void undivert_all (void); void expand_user_macro (struct obstack *, symbol *, int, macro_arguments *); void m4_placeholder (struct obstack *, int, macro_arguments *); diff --git a/src/macro.c b/src/macro.c index afb6c246..e3fa1095 100644 --- a/src/macro.c +++ b/src/macro.c @@ -260,8 +260,7 @@ expand_token (struct obstack *obs, token_type t, token_data *td, int line, bool first) { symbol *sym; - bool result; - int ch; + bool result = false; switch (t) { /* TOKSW */ @@ -278,6 +277,7 @@ expand_token (struct obstack *obs, token_type t, token_data *td, int line, provided, the string was already expanded into it during next_token. */ result = first || safe_quotes (); + case TOKEN_COMMENT: if (obs) return result; break; @@ -295,8 +295,9 @@ expand_token (struct obstack *obs, token_type t, token_data *td, int line, numeric, then behavior of safe_quotes is applicable. Otherwise, assume these characters have a high likelihood of use in quote delimiters. */ - ch = to_uchar (*TOKEN_DATA_TEXT (td)); - result = (isspace (ch) || isdigit (ch)) && safe_quotes (); + result = *TOKEN_DATA_TEXT (td) != *curr_quote.str2 && safe_quotes (); + if (result) + assert (*TOKEN_DATA_TEXT (td) != *curr_quote.str1); break; case TOKEN_WORD: @@ -420,6 +421,7 @@ expand_argument (struct obstack *obs, token_data *argp, case TOKEN_WORD: case TOKEN_STRING: + case TOKEN_COMMENT: case TOKEN_MACDEF: if (!expand_token (obs, t, &td, line, first)) age = 0; @@ -1115,9 +1117,10 @@ arg_empty (macro_arguments *argv, unsigned int arg) } /* Given ARGV, return the length of argument ARG. Abort if the - argument is not text. Indices beyond argc return 0. */ + argument is not text. Indices beyond argc return 0. If FLATTEN, + builtins are ignored. */ size_t -arg_len (macro_arguments *argv, unsigned int arg) +arg_len (macro_arguments *argv, unsigned int arg, bool flatten) { token_data *token; token_chain *chain; @@ -1130,7 +1133,7 @@ arg_len (macro_arguments *argv, unsigned int arg) } if (arg >= argv->argc) return 0; - token = arg_token (argv, arg, NULL, false); + token = arg_token (argv, arg, NULL, flatten); switch (TOKEN_DATA_TYPE (token)) { case TOKEN_TEXT: @@ -1150,9 +1153,8 @@ arg_len (macro_arguments *argv, unsigned int arg) len += chain->u.u_s.len; break; case CHAIN_FUNC: - // TODO concatenate builtins - assert (!"implemented"); - abort (); + assert (flatten); + break; case CHAIN_ARGV: i = chain->u.u_a.index; limit = chain->u.u_a.argv->argc - i - chain->u.u_a.skip_last; @@ -1163,15 +1165,8 @@ arg_len (macro_arguments *argv, unsigned int arg) len += (quotes->len1 + quotes->len2) * limit; len += limit - 1; while (limit--) - { - // TODO handle builtin concatenation - if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, NULL, - false)) == TOKEN_FUNC) - assert (argv->flatten); - else - len += arg_len (chain->u.u_a.argv, i); - i++; - } + len += arg_len (chain->u.u_a.argv, i++, + flatten || chain->u.u_a.flatten); break; default: assert (!"arg_len"); @@ -1179,7 +1174,7 @@ arg_len (macro_arguments *argv, unsigned int arg) } chain = chain->next; } - assert (len); + assert (len || flatten); return len; case TOKEN_FUNC: default: |