diff options
author | Eric Blake <ebb9@byu.net> | 2008-12-02 22:51:14 -0700 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-12-02 22:51:14 -0700 |
commit | 41d0c77062c8046730101d82b56f682edc70957e (patch) | |
tree | c4f887b503240ede7246dd10c2dfba838189ed8b | |
parent | c6628fa51d6c7756f38d1dfaa2055656446ee93a (diff) | |
download | m4-41d0c77062c8046730101d82b56f682edc70957e.tar.gz |
Stage 27: Allow embedded NUL in text processing macros.
* modules/m4.c (m4_expand_ranges): Don't append extra bytes.
(translit): Manage NUL bytes.
* modules/format.c (format): Likewise.
* modules/gnu.c (substitute, regexp_substitute): Likewise.
(m4_resyntax_encode_safe): Add parameter.
(regexp, patsubst, renamesyms): Update callers.
(regexp_compile): Adjust error message.
* modules/evalparse.c (m4_evaluate): Use consistent message.
(end_text): New variable.
(eval_init_lex): Add parameter.
(eval_lex): Detect embedded NUL.
* src/freeze.c (reload_frozen_state): Likewise.
* doc/m4.texinfo (Format): Update to cover new behavior.
(Eval): Mention that result is unquoted.
* tests/freeze.at (reloading nul): Enhance test.
* tests/null.m4: Likewise.
* tests/null.err: Update expected output.
* tests/null.out: Likewise.
* tests/options.at (--regexp-syntax): Likewise.
Signed-off-by: Eric Blake <ebb9@byu.net>
-rw-r--r-- | ChangeLog | 28 | ||||
-rw-r--r-- | doc/m4.texinfo | 13 | ||||
-rw-r--r-- | modules/evalparse.c | 15 | ||||
-rw-r--r-- | modules/format.c | 43 | ||||
-rw-r--r-- | modules/gnu.c | 127 | ||||
-rw-r--r-- | modules/m4.c | 57 | ||||
-rw-r--r-- | src/freeze.c | 7 | ||||
-rw-r--r-- | tests/freeze.at | 6 | ||||
-rw-r--r-- | tests/null.err | bin | 639 -> 1318 bytes | |||
-rw-r--r-- | tests/null.m4 | bin | 7223 -> 7559 bytes | |||
-rw-r--r-- | tests/null.out | bin | 548 -> 607 bytes | |||
-rw-r--r-- | tests/options.at | 8 |
12 files changed, 204 insertions, 100 deletions
@@ -1,3 +1,31 @@ +2008-12-02 Eric Blake <ebb9@byu.net> + + Stage 27: Allow embedded NUL in text processing macros. + Pass NUL through regular expressions, format, and translit, and + diagnose it in eval and changeresyntax. Improve warning + capabilities of format. + Memory impact: none. + Speed impact: none noticed. + * modules/m4.c (m4_expand_ranges): Don't append extra bytes. + (translit): Manage NUL bytes. + * modules/format.c (format): Likewise. + * modules/gnu.c (substitute, regexp_substitute): Likewise. + (m4_resyntax_encode_safe): Add parameter. + (regexp, patsubst, renamesyms): Update callers. + (regexp_compile): Adjust error message. + * modules/evalparse.c (m4_evaluate): Use consistent message. + (end_text): New variable. + (eval_init_lex): Add parameter. + (eval_lex): Detect embedded NUL. + * src/freeze.c (reload_frozen_state): Likewise. + * doc/m4.texinfo (Format): Update to cover new behavior. + (Eval): Mention that result is unquoted. + * tests/freeze.at (reloading nul): Enhance test. + * tests/null.m4: Likewise. + * tests/null.err: Update expected output. + * tests/null.out: Likewise. + * tests/options.at (--regexp-syntax): Likewise. + 2008-11-28 Eric Blake <ebb9@byu.net> Resync NEWS with branches. diff --git a/doc/m4.texinfo b/doc/m4.texinfo index 0287a60b..bee9aec9 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -7288,7 +7288,7 @@ Likewise, escape sequences are not yet recognized. @example format(`%p', `0') @error{}m4:stdin:1: Warning: format: unrecognized specifier in `%p' -@result{} +@result{}p format(`%*d', `') @error{}m4:stdin:2: Warning: format: empty string treated as 0 @error{}m4:stdin:2: Warning: format: too few arguments: 2 < 3 @@ -7605,7 +7605,9 @@ expansion. The default radix is 10; this is also the case if @var{radix} is the empty string. A warning results if the radix is outside the range of 1 through 36, inclusive. The result of @code{eval} is always taken to be signed. No radix prefix is output, and for -radices greater than 10, the digits are lower case. The @var{width} +radices greater than 10, the digits are lower case (although some +other implementations use upper case). The output is unquoted, and +subject to further macro expansion. The @var{width} argument specifies the minimum output width, excluding any negative sign. The result is zero-padded to extend the expansion to the requested width. A warning results if the width is negative. If @@ -7636,8 +7638,13 @@ eval(`1', , `-1') @error{}m4:stdin:10: Warning: eval: negative width: -1 @result{} eval() -@error{}m4:stdin:11: Warning: eval: empty string treated as zero +@error{}m4:stdin:11: Warning: eval: empty string treated as 0 +@result{}0 +eval(` ') +@error{}m4:stdin:12: Warning: eval: empty string treated as 0 @result{}0 +define(`a', `hi')eval(` 10 ', `16') +@result{}hi @end example @node Mpeval diff --git a/modules/evalparse.c b/modules/evalparse.c index 8ad7182b..9927e13a 100644 --- a/modules/evalparse.c +++ b/modules/evalparse.c @@ -99,10 +99,15 @@ static const char *eval_text; can back up, if we have read too much. */ static const char *last_text; +/* Detect when to end parsing. */ +static const char *end_text; + +/* Prime the lexer at the start of TEXT, with length LEN. */ static void -eval_init_lex (const char *text) +eval_init_lex (const char *text, size_t len) { eval_text = text; + end_text = text + len; last_text = NULL; } @@ -119,12 +124,12 @@ eval_undo (void) static eval_token eval_lex (number *val) { - while (isspace (to_uchar (*eval_text))) + while (eval_text != end_text && isspace (to_uchar (*eval_text))) eval_text++; last_text = eval_text; - if (*eval_text == '\0') + if (eval_text == end_text) return EOTEXT; if (isdigit (to_uchar (*eval_text))) @@ -915,13 +920,13 @@ m4_evaluate (m4 *context, m4_obstack *obs, size_t argc, m4_macro_args *argv) } numb_initialise (); - eval_init_lex (str); + eval_init_lex (str, M4ARGLEN (1)); numb_init (val); et = eval_lex (&val); if (et == EOTEXT) { - m4_warn (context, 0, me, _("empty string treated as zero")); + m4_warn (context, 0, me, _("empty string treated as 0")); numb_set (val, numb_ZERO); } else diff --git a/modules/format.c b/modules/format.c index e2a1a423..af983cdc 100644 --- a/modules/format.c +++ b/modules/format.c @@ -123,11 +123,12 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv) { const m4_call_info *me = m4_arg_info (argv); const char *f; /* Format control string. */ + size_t f_len; /* Length of f. */ const char *fmt; /* Position within f. */ char fstart[] = "%'+- 0#*.*hhd"; /* Current format spec. */ char *p; /* Position within fstart. */ unsigned char c; /* A simple character. */ - int i = 0; /* Index within argc used so far. */ + int i = 1; /* Index within argc used so far. */ bool valid_format = true; /* True if entire format string ok. */ /* Flags. */ @@ -156,25 +157,24 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv) int result = 0; enum {CHAR, INT, LONG, DOUBLE, STR} datatype; - f = fmt = ARG_STR (i, argc, argv); + f = fmt = M4ARG (1); + f_len = M4ARGLEN (1); + assert (!f[f_len]); /* Requiring a terminating NUL makes parsing simpler. */ memset (ok, 0, sizeof ok); - while (true) + while (f_len--) { - while ((c = *fmt++) != '%') + c = *fmt++; + if (c != '%') { - if (c == '\0') - { - if (valid_format) - m4_bad_argc (context, argc, me, i, i, true); - return; - } obstack_1grow (obs, c); + continue; } if (*fmt == '%') { obstack_1grow (obs, '%'); fmt++; + f_len--; continue; } @@ -225,7 +225,7 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv) break; } } - while (!(flags & DONE) && fmt++); + while (!(flags & DONE) && (f_len--, fmt++)); if (flags & THOUSANDS) *p++ = '\''; if (flags & PLUS) @@ -247,12 +247,14 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv) { width = ARG_INT (i, argc, argv); fmt++; + f_len--; } else while (isdigit ((unsigned char) *fmt)) { width = 10 * width + *fmt - '0'; fmt++; + f_len--; } /* Maximum precision; an explicit negative precision is the same @@ -263,10 +265,12 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv) if (*fmt == '.') { ok['c'] = 0; + f_len--; if (*(++fmt) == '*') { prec = ARG_INT (i, argc, argv); ++fmt; + f_len--; } else { @@ -275,6 +279,7 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv) { prec = 10 * prec + *fmt - '0'; fmt++; + f_len--; } } } @@ -285,30 +290,34 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv) *p++ = 'l'; lflag = 1; fmt++; + f_len--; ok['c'] = ok['s'] = 0; } else if (*fmt == 'h') { *p++ = 'h'; fmt++; + f_len--; if (*fmt == 'h') { *p++ = 'h'; fmt++; + f_len--; } ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = ok['f'] = ok['F'] = ok['g'] = ok['G'] = ok['s'] = 0; } - c = *fmt++; - if (c > sizeof ok || !ok[c]) + c = *fmt; + if (c > sizeof ok || !ok[c] || !f_len) { - m4_warn (context, 0, me, _("unrecognized specifier in `%s'"), f); + m4_warn (context, 0, me, _("unrecognized specifier in %s"), + quotearg_style_mem (locale_quoting_style, f, M4ARGLEN (1))); valid_format = false; - if (c == '\0') - fmt--; continue; } + fmt++; + f_len--; /* Specifiers. We don't yet recognize C, S, n, or p. */ switch (c) @@ -382,4 +391,6 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv) we constructed fstart, the result should not be negative. */ assert (0 <= result); } + if (valid_format) + m4_bad_argc (context, argc, me, i, i, true); } diff --git a/modules/gnu.c b/modules/gnu.c index fd557eb7..8ad17225 100644 --- a/modules/gnu.c +++ b/modules/gnu.c @@ -167,8 +167,8 @@ regexp_compile (m4 *context, const m4_call_info *caller, const char *regexp, if (msg != NULL) { - m4_error (context, 0, 0, caller, _("bad regular expression `%s': %s"), - regexp, msg); + m4_warn (context, 0, caller, _("bad regular expression %s: %s"), + quotearg_style_mem (locale_quoting_style, regexp, len), msg); regfree (pat); free (pat); return NULL; @@ -225,28 +225,38 @@ regexp_search (m4_pattern_buffer *buf, const char *string, const int size, /* Function to perform substitution by regular expressions. Used by the builtins regexp, patsubst and renamesyms. The changed text is - placed on the obstack OBS. The substitution is REPL, with \& - substituted by this part of VICTIM matched by the last whole - regular expression, and \N substituted by the text matched by the - Nth parenthesized sub-expression in BUF. Any warnings are issued - on behalf of CALLER. BUF may be NULL for the empty regex. */ + placed on the obstack OBS. The substitution is REPL of length + REPL_LEN, with \& substituted by this part of VICTIM matched by the + last whole regular expression, and \N substituted by the text + matched by the Nth parenthesized sub-expression in BUF. Any + warnings are issued on behalf of CALLER. BUF may be NULL for the + empty regex. */ static void substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller, - const char *victim, const char *repl, m4_pattern_buffer *buf) + const char *victim, const char *repl, size_t repl_len, + m4_pattern_buffer *buf) { int ch; - for (;;) + while (repl_len--) { - while ((ch = *repl++) != '\\') + ch = *repl++; + if (ch != '\\') { - if (ch == '\0') - return; obstack_1grow (obs, ch); + continue; + } + if (!repl_len) + { + m4_warn (context, 0, caller, + _("trailing \\ ignored in replacement")); + return; } - switch ((ch = *repl++)) + ch = *repl++; + repl_len--; + switch (ch) { case '&': if (buf) @@ -265,11 +275,6 @@ substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller, buf->regs.end[ch] - buf->regs.start[ch]); break; - case '\0': - m4_warn (context, 0, caller, - _("trailing \\ ignored in replacement")); - return; - default: obstack_1grow (obs, ch); break; @@ -278,18 +283,19 @@ substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller, } -/* For each match against compiled REGEXP (held in BUF -- as returned - by regexp_compile) in VICTIM, substitute REPLACE. Non-matching - characters are copied verbatim, and the result copied to the - obstack. Errors are reported on behalf of CALLER. Return true if - a substitution was made. If OPTIMIZE is set, don't worry about - copying the input if no changes are made. */ +/* For each match against REGEXP of length REGEXP_LEN (precompiled in + BUF as returned by regexp_compile) in VICTIM of length LEN, + substitute REPLACE of length REPL_LEN. Non-matching characters are + copied verbatim, and the result copied to the obstack. Errors are + reported on behalf of CALLER. Return true if a substitution was + made. If OPTIMIZE is set, don't worry about copying the input if + no changes are made. */ static bool regexp_substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller, const char *victim, size_t len, const char *regexp, - m4_pattern_buffer *buf, const char *replace, - bool optimize) + size_t regexp_len, m4_pattern_buffer *buf, + const char *replace, size_t repl_len, bool optimize) { regoff_t matchpos = 0; /* start position of match */ size_t offset = 0; /* current match offset */ @@ -309,7 +315,9 @@ regexp_substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller, if (matchpos == -2) m4_error (context, 0, 0, caller, - _("error matching regular expression `%s'"), regexp); + _("problem matching regular expression %s"), + quotearg_style_mem (locale_quoting_style, regexp, + regexp_len)); else if (offset < len && subst) obstack_grow (obs, victim + offset, len - offset); break; @@ -322,7 +330,7 @@ regexp_substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller, /* Handle the part of the string that was covered by the match. */ - substitute (context, obs, caller, victim, replace, buf); + substitute (context, obs, caller, victim, replace, repl_len, buf); subst = true; /* Update the offset to the end of the match. If the regexp @@ -465,18 +473,24 @@ M4BUILTIN_HANDLER (builtin) } -/* Change the current regexp syntax to SPEC, or report failure on - behalf of CALLER. Currently this affects the builtins: `patsubst', - `regexp' and `renamesyms'. */ +/* Change the current regexp syntax to SPEC of length LEN, or report + failure on behalf of CALLER. Currently this affects the builtins: + `patsubst', `regexp' and `renamesyms'. */ static int m4_resyntax_encode_safe (m4 *context, const m4_call_info *caller, - const char *spec) + const char *spec, size_t len) { - int resyntax = m4_regexp_syntax_encode (spec); + int resyntax; + + if (strlen (spec) < len) + resyntax = -1; + else + resyntax = m4_regexp_syntax_encode (spec); if (resyntax < 0) - m4_warn (context, 0, caller, _("bad syntax-spec: `%s'"), spec); + m4_warn (context, 0, caller, _("bad syntax-spec: %s"), + quotearg_style_mem (locale_quoting_style, spec, len)); return resyntax; } @@ -488,7 +502,7 @@ m4_resyntax_encode_safe (m4 *context, const m4_call_info *caller, M4BUILTIN_HANDLER (changeresyntax) { int resyntax = m4_resyntax_encode_safe (context, m4_arg_info (argv), - M4ARG (1)); + M4ARG (1), M4ARGLEN (1)); if (resyntax >= 0) m4_set_regexp_syntax_opt (context, resyntax); @@ -749,31 +763,32 @@ M4BUILTIN_HANDLER (patsubst) m4_pattern_buffer *buf; /* compiled regular expression */ int resyntax; - pattern = M4ARG (2); - replace = M4ARG (3); - resyntax = m4_get_regexp_syntax_opt (context); if (argc >= 5) /* additional args ignored */ { - resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4)); + resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4), + M4ARGLEN (4)); if (resyntax < 0) return; } /* The empty regex matches everywhere, but if there is no replacement, we need not waste time with it. */ - if (!*pattern && !*replace) + if (m4_arg_empty (argv, 2) && m4_arg_empty (argv, 3)) { m4_push_arg (context, obs, argv, 1); return; } + pattern = M4ARG (2); + replace = M4ARG (3); + buf = regexp_compile (context, me, pattern, M4ARGLEN (2), resyntax); if (!buf) return; - regexp_substitute (context, obs, me, M4ARG (1), M4ARGLEN (1), - pattern, buf, replace, false); + regexp_substitute (context, obs, me, M4ARG (1), M4ARGLEN (1), pattern, + M4ARGLEN (2), buf, replace, M4ARGLEN (3), false); } @@ -810,7 +825,7 @@ M4BUILTIN_HANDLER (regexp) is a valid RESYNTAX, yet we want `regexp(aab, a*, )' to return an empty string as per M4 1.4.x. */ - if ((*replace == '\0') || (resyntax < 0)) + if (m4_arg_empty (argv, 3) || (resyntax < 0)) /* regexp(VICTIM, REGEXP, REPLACEMENT) */ resyntax = m4_get_regexp_syntax_opt (context); else @@ -820,7 +835,8 @@ M4BUILTIN_HANDLER (regexp) else if (argc >= 5) { /* regexp(VICTIM, REGEXP, REPLACEMENT, RESYNTAX) */ - resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4)); + resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4), + M4ARGLEN (4)); if (resyntax < 0) return; } @@ -828,11 +844,11 @@ M4BUILTIN_HANDLER (regexp) /* regexp(VICTIM, REGEXP) */ replace = NULL; - if (!*pattern) + if (m4_arg_empty (argv, 2)) { /* The empty regex matches everything. */ if (replace) - substitute (context, obs, me, M4ARG (1), replace, NULL); + substitute (context, obs, me, M4ARG (1), replace, M4ARGLEN (3), NULL); else m4_shipout_int (obs, 0); return; @@ -848,15 +864,16 @@ M4BUILTIN_HANDLER (regexp) if (startpos == -2) { - m4_error (context, 0, 0, me, _("error matching regular expression `%s'"), - pattern); + m4_error (context, 0, 0, me, _("problem matching regular expression %s"), + quotearg_style_mem (locale_quoting_style, pattern, + M4ARGLEN (2))); return; } if (replace == NULL) m4_shipout_int (obs, startpos); else if (startpos >= 0) - substitute (context, obs, me, victim, replace, buf); + substitute (context, obs, me, victim, replace, M4ARGLEN (3), buf); } @@ -874,7 +891,9 @@ M4BUILTIN_HANDLER (renamesyms) { const m4_call_info *me = m4_arg_info (argv); const char *regexp; /* regular expression string */ + size_t regexp_len; const char *replace; /* replacement expression string */ + size_t replace_len; m4_pattern_buffer *buf; /* compiled regular expression */ @@ -883,17 +902,20 @@ M4BUILTIN_HANDLER (renamesyms) int resyntax; regexp = M4ARG (1); + regexp_len = M4ARGLEN (1); replace = M4ARG (2); + replace_len = M4ARGLEN (2); resyntax = m4_get_regexp_syntax_opt (context); if (argc >= 4) { - resyntax = m4_resyntax_encode_safe (context, me, M4ARG (3)); + resyntax = m4_resyntax_encode_safe (context, me, M4ARG (3), + M4ARGLEN (3)); if (resyntax < 0) return; } - buf = regexp_compile (context, me, regexp, M4ARGLEN (1), resyntax); + buf = regexp_compile (context, me, regexp, regexp_len, resyntax); if (!buf) return; @@ -905,7 +927,8 @@ M4BUILTIN_HANDLER (renamesyms) const m4_string *key = &data.base[0]; if (regexp_substitute (context, data.obs, me, key->str, key->len, - regexp, buf, replace, true)) + regexp, regexp_len, buf, replace, replace_len, + true)) { size_t newlen = obstack_object_size (data.obs); m4_symbol_rename (M4SYMTAB, key->str, key->len, diff --git a/modules/m4.c b/modules/m4.c index e9695a32..f78a177d 100644 --- a/modules/m4.c +++ b/modules/m4.c @@ -998,8 +998,7 @@ m4_expand_ranges (const char *s, size_t *len, m4_obstack *obs) obstack_1grow (obs, *s); } *len = obstack_object_size (obs); - /* FIXME - use obstack_finish once translit is updated. */ - return (char *) obstack_copy0 (obs, "", 0); + return (char *) obstack_finish (obs); } /* The macro "translit" translates all characters in the first @@ -1018,7 +1017,9 @@ M4BUILTIN_HANDLER (translit) char found[UCHAR_MAX + 1] = {0}; unsigned char ch; - if (argc <= 2) + enum { ASIS, REPLACE, DELETE }; + + if (m4_arg_empty (argv, 1) || m4_arg_empty (argv, 2)) { m4_push_arg (context, obs, argv, 1); return; @@ -1026,7 +1027,7 @@ M4BUILTIN_HANDLER (translit) from = M4ARG (2); from_len = M4ARGLEN (2); - if (strchr (from, '-') != NULL) + if (memchr (from, '-', from_len) != NULL) { from = m4_expand_ranges (from, &from_len, m4_arg_scratch (context)); assert (from); @@ -1034,35 +1035,57 @@ M4BUILTIN_HANDLER (translit) to = M4ARG (3); to_len = M4ARGLEN (3); - if (strchr (to, '-') != NULL) + if (memchr (to, '-', to_len) != NULL) { to = m4_expand_ranges (to, &to_len, m4_arg_scratch (context)); assert (to); } - /* Calling strchr(from) for each character in data is quadratic, + /* Calling memchr(from) for each character in data is quadratic, since both strings can be arbitrarily long. Instead, create a from-to mapping in one pass of from, then use that map in one pass of data, for linear behavior. Traditional behavior is that only the first instance of a character in from is consulted, hence the found map. */ - for ( ; (ch = *from) != '\0'; from++) + while (from_len--) { - if (!found[ch]) + ch = *from++; + if (found[ch] == ASIS) + { + if (to_len) + { + found[ch] = REPLACE; + map[ch] = *to; + } + else + found[ch] = DELETE; + } + if (to_len) { - found[ch] = 1; - map[ch] = *to; + to++; + to_len--; } - if (*to != '\0') - to++; } - for (data = M4ARG (1); (ch = *data) != '\0'; data++) + data = M4ARG (1); + from_len = M4ARGLEN (1); + while (from_len--) { - if (!found[ch]) - obstack_1grow (obs, ch); - else if (map[ch]) - obstack_1grow (obs, map[ch]); + ch = *data++; + switch (found[ch]) + { + case ASIS: + obstack_1grow (obs, ch); + break; + case REPLACE: + obstack_1grow (obs, map[ch]); + break; + case DELETE: + break; + default: + assert (!"translit"); + abort (); + } } } diff --git a/src/freeze.c b/src/freeze.c index 5d5b4eee..3008f272 100644 --- a/src/freeze.c +++ b/src/freeze.c @@ -634,7 +634,7 @@ ill-formed frozen file, version 2 directive `%c' encountered"), 'd'); if (m4_debug_decode (context, string[0]) < 0) m4_error (context, EXIT_FAILURE, 0, NULL, - _("unknown debug mode `%s'"), + _("unknown debug mode %s"), quotearg_style_mem (locale_quoting_style, string[0], number[0])); break; @@ -751,10 +751,11 @@ ill-formed frozen file, version 2 directive `%c' encountered"), 'R'); m4_set_regexp_syntax_opt (context, m4_regexp_syntax_encode (string[0])); - if (m4_get_regexp_syntax_opt (context) < 0) + if (m4_get_regexp_syntax_opt (context) < 0 + || strlen (string[0]) < number[0]) { m4_error (context, EXIT_FAILURE, 0, NULL, - _("unknown regexp syntax code `%s'"), + _("bad syntax-spec %s"), quotearg_style_mem (locale_quoting_style, string[0], number[0])); } diff --git a/tests/freeze.at b/tests/freeze.at index 9b8c9463..693ae543 100644 --- a/tests/freeze.at +++ b/tests/freeze.at @@ -409,6 +409,12 @@ AT_CHECK_M4([-R frozen.m4f unfrozen.m4], [0], [stdout], [experr], [], [ ]) AT_CHECK([cat out1 stdout], [0], [expout]) +dnl Check that unexpected embedded NULs are recognized. +printf '# bogus frozen file\nV2\nR4\ngnu\0\n' > bogus.m4f +AT_CHECK_M4([-R bogus.m4f], [1], [], +[[m4:bogus.m4f:4: bad syntax-spec `gnu\0' +]]) + AT_CLEANUP ]) diff --git a/tests/null.err b/tests/null.err Binary files differindex 74ec09d3..7b9f7981 100644 --- a/tests/null.err +++ b/tests/null.err diff --git a/tests/null.m4 b/tests/null.m4 Binary files differindex 77b6e67d..f7a15875 100644 --- a/tests/null.m4 +++ b/tests/null.m4 diff --git a/tests/null.out b/tests/null.out Binary files differindex 5f6df390..97f80dd5 100644 --- a/tests/null.out +++ b/tests/null.out diff --git a/tests/options.at b/tests/options.at index 9331a211..dce43f87 100644 --- a/tests/options.at +++ b/tests/options.at @@ -714,8 +714,8 @@ AT_CHECK_M4([--regexp-syntax=unknown in], [1], [], AT_CHECK_M4([--regexp-syntax= in], [0], [[0 ]]) -AT_CHECK_M4([-rEXTENDED in], [1], [[ -]], [[m4:in:1: regexp: bad regular expression `(': Unmatched ( or \( +AT_CHECK_M4([-rEXTENDED in], [0], [[ +]], [[m4:in:1: Warning: regexp: bad regular expression `(': Unmatched ( or \( ]]) AT_CHECK_M4([-rgnu-m4 in], [0], [[0 @@ -725,9 +725,9 @@ AT_CHECK_M4([-r"gnu M4" in], [0], [[0 ]]) dnl Test behavior of -r intermixed with files -AT_CHECK_M4([-rEXTENDED in --regexp-syntax in], [1], [[ +AT_CHECK_M4([-rEXTENDED in --regexp-syntax in], [0], [[ 0 -]], [[m4:in:1: regexp: bad regular expression `(': Unmatched ( or \( +]], [[m4:in:1: Warning: regexp: bad regular expression `(': Unmatched ( or \( ]]) AT_CLEANUP |