summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2008-12-02 22:51:14 -0700
committerEric Blake <ebb9@byu.net>2008-12-02 22:51:14 -0700
commit41d0c77062c8046730101d82b56f682edc70957e (patch)
treec4f887b503240ede7246dd10c2dfba838189ed8b
parentc6628fa51d6c7756f38d1dfaa2055656446ee93a (diff)
downloadm4-41d0c77062c8046730101d82b56f682edc70957e.tar.gz
Stage 27: Allow embedded NUL in text processing macros.
* modules/m4.c (m4_expand_ranges): Don't append extra bytes. (translit): Manage NUL bytes. * modules/format.c (format): Likewise. * modules/gnu.c (substitute, regexp_substitute): Likewise. (m4_resyntax_encode_safe): Add parameter. (regexp, patsubst, renamesyms): Update callers. (regexp_compile): Adjust error message. * modules/evalparse.c (m4_evaluate): Use consistent message. (end_text): New variable. (eval_init_lex): Add parameter. (eval_lex): Detect embedded NUL. * src/freeze.c (reload_frozen_state): Likewise. * doc/m4.texinfo (Format): Update to cover new behavior. (Eval): Mention that result is unquoted. * tests/freeze.at (reloading nul): Enhance test. * tests/null.m4: Likewise. * tests/null.err: Update expected output. * tests/null.out: Likewise. * tests/options.at (--regexp-syntax): Likewise. Signed-off-by: Eric Blake <ebb9@byu.net>
-rw-r--r--ChangeLog28
-rw-r--r--doc/m4.texinfo13
-rw-r--r--modules/evalparse.c15
-rw-r--r--modules/format.c43
-rw-r--r--modules/gnu.c127
-rw-r--r--modules/m4.c57
-rw-r--r--src/freeze.c7
-rw-r--r--tests/freeze.at6
-rw-r--r--tests/null.errbin639 -> 1318 bytes
-rw-r--r--tests/null.m4bin7223 -> 7559 bytes
-rw-r--r--tests/null.outbin548 -> 607 bytes
-rw-r--r--tests/options.at8
12 files changed, 204 insertions, 100 deletions
diff --git a/ChangeLog b/ChangeLog
index b350524a..69849a95 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,31 @@
+2008-12-02 Eric Blake <ebb9@byu.net>
+
+ Stage 27: Allow embedded NUL in text processing macros.
+ Pass NUL through regular expressions, format, and translit, and
+ diagnose it in eval and changeresyntax. Improve warning
+ capabilities of format.
+ Memory impact: none.
+ Speed impact: none noticed.
+ * modules/m4.c (m4_expand_ranges): Don't append extra bytes.
+ (translit): Manage NUL bytes.
+ * modules/format.c (format): Likewise.
+ * modules/gnu.c (substitute, regexp_substitute): Likewise.
+ (m4_resyntax_encode_safe): Add parameter.
+ (regexp, patsubst, renamesyms): Update callers.
+ (regexp_compile): Adjust error message.
+ * modules/evalparse.c (m4_evaluate): Use consistent message.
+ (end_text): New variable.
+ (eval_init_lex): Add parameter.
+ (eval_lex): Detect embedded NUL.
+ * src/freeze.c (reload_frozen_state): Likewise.
+ * doc/m4.texinfo (Format): Update to cover new behavior.
+ (Eval): Mention that result is unquoted.
+ * tests/freeze.at (reloading nul): Enhance test.
+ * tests/null.m4: Likewise.
+ * tests/null.err: Update expected output.
+ * tests/null.out: Likewise.
+ * tests/options.at (--regexp-syntax): Likewise.
+
2008-11-28 Eric Blake <ebb9@byu.net>
Resync NEWS with branches.
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 0287a60b..bee9aec9 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -7288,7 +7288,7 @@ Likewise, escape sequences are not yet recognized.
@example
format(`%p', `0')
@error{}m4:stdin:1: Warning: format: unrecognized specifier in `%p'
-@result{}
+@result{}p
format(`%*d', `')
@error{}m4:stdin:2: Warning: format: empty string treated as 0
@error{}m4:stdin:2: Warning: format: too few arguments: 2 < 3
@@ -7605,7 +7605,9 @@ expansion. The default radix is 10; this is also the case if
@var{radix} is the empty string. A warning results if the radix is
outside the range of 1 through 36, inclusive. The result of @code{eval}
is always taken to be signed. No radix prefix is output, and for
-radices greater than 10, the digits are lower case. The @var{width}
+radices greater than 10, the digits are lower case (although some
+other implementations use upper case). The output is unquoted, and
+subject to further macro expansion. The @var{width}
argument specifies the minimum output width, excluding any negative
sign. The result is zero-padded to extend the expansion to the
requested width. A warning results if the width is negative. If
@@ -7636,8 +7638,13 @@ eval(`1', , `-1')
@error{}m4:stdin:10: Warning: eval: negative width: -1
@result{}
eval()
-@error{}m4:stdin:11: Warning: eval: empty string treated as zero
+@error{}m4:stdin:11: Warning: eval: empty string treated as 0
+@result{}0
+eval(` ')
+@error{}m4:stdin:12: Warning: eval: empty string treated as 0
@result{}0
+define(`a', `hi')eval(` 10 ', `16')
+@result{}hi
@end example
@node Mpeval
diff --git a/modules/evalparse.c b/modules/evalparse.c
index 8ad7182b..9927e13a 100644
--- a/modules/evalparse.c
+++ b/modules/evalparse.c
@@ -99,10 +99,15 @@ static const char *eval_text;
can back up, if we have read too much. */
static const char *last_text;
+/* Detect when to end parsing. */
+static const char *end_text;
+
+/* Prime the lexer at the start of TEXT, with length LEN. */
static void
-eval_init_lex (const char *text)
+eval_init_lex (const char *text, size_t len)
{
eval_text = text;
+ end_text = text + len;
last_text = NULL;
}
@@ -119,12 +124,12 @@ eval_undo (void)
static eval_token
eval_lex (number *val)
{
- while (isspace (to_uchar (*eval_text)))
+ while (eval_text != end_text && isspace (to_uchar (*eval_text)))
eval_text++;
last_text = eval_text;
- if (*eval_text == '\0')
+ if (eval_text == end_text)
return EOTEXT;
if (isdigit (to_uchar (*eval_text)))
@@ -915,13 +920,13 @@ m4_evaluate (m4 *context, m4_obstack *obs, size_t argc, m4_macro_args *argv)
}
numb_initialise ();
- eval_init_lex (str);
+ eval_init_lex (str, M4ARGLEN (1));
numb_init (val);
et = eval_lex (&val);
if (et == EOTEXT)
{
- m4_warn (context, 0, me, _("empty string treated as zero"));
+ m4_warn (context, 0, me, _("empty string treated as 0"));
numb_set (val, numb_ZERO);
}
else
diff --git a/modules/format.c b/modules/format.c
index e2a1a423..af983cdc 100644
--- a/modules/format.c
+++ b/modules/format.c
@@ -123,11 +123,12 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv)
{
const m4_call_info *me = m4_arg_info (argv);
const char *f; /* Format control string. */
+ size_t f_len; /* Length of f. */
const char *fmt; /* Position within f. */
char fstart[] = "%'+- 0#*.*hhd"; /* Current format spec. */
char *p; /* Position within fstart. */
unsigned char c; /* A simple character. */
- int i = 0; /* Index within argc used so far. */
+ int i = 1; /* Index within argc used so far. */
bool valid_format = true; /* True if entire format string ok. */
/* Flags. */
@@ -156,25 +157,24 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv)
int result = 0;
enum {CHAR, INT, LONG, DOUBLE, STR} datatype;
- f = fmt = ARG_STR (i, argc, argv);
+ f = fmt = M4ARG (1);
+ f_len = M4ARGLEN (1);
+ assert (!f[f_len]); /* Requiring a terminating NUL makes parsing simpler. */
memset (ok, 0, sizeof ok);
- while (true)
+ while (f_len--)
{
- while ((c = *fmt++) != '%')
+ c = *fmt++;
+ if (c != '%')
{
- if (c == '\0')
- {
- if (valid_format)
- m4_bad_argc (context, argc, me, i, i, true);
- return;
- }
obstack_1grow (obs, c);
+ continue;
}
if (*fmt == '%')
{
obstack_1grow (obs, '%');
fmt++;
+ f_len--;
continue;
}
@@ -225,7 +225,7 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv)
break;
}
}
- while (!(flags & DONE) && fmt++);
+ while (!(flags & DONE) && (f_len--, fmt++));
if (flags & THOUSANDS)
*p++ = '\'';
if (flags & PLUS)
@@ -247,12 +247,14 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv)
{
width = ARG_INT (i, argc, argv);
fmt++;
+ f_len--;
}
else
while (isdigit ((unsigned char) *fmt))
{
width = 10 * width + *fmt - '0';
fmt++;
+ f_len--;
}
/* Maximum precision; an explicit negative precision is the same
@@ -263,10 +265,12 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv)
if (*fmt == '.')
{
ok['c'] = 0;
+ f_len--;
if (*(++fmt) == '*')
{
prec = ARG_INT (i, argc, argv);
++fmt;
+ f_len--;
}
else
{
@@ -275,6 +279,7 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv)
{
prec = 10 * prec + *fmt - '0';
fmt++;
+ f_len--;
}
}
}
@@ -285,30 +290,34 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv)
*p++ = 'l';
lflag = 1;
fmt++;
+ f_len--;
ok['c'] = ok['s'] = 0;
}
else if (*fmt == 'h')
{
*p++ = 'h';
fmt++;
+ f_len--;
if (*fmt == 'h')
{
*p++ = 'h';
fmt++;
+ f_len--;
}
ok['a'] = ok['A'] = ok['c'] = ok['e'] = ok['E'] = ok['f'] = ok['F']
= ok['g'] = ok['G'] = ok['s'] = 0;
}
- c = *fmt++;
- if (c > sizeof ok || !ok[c])
+ c = *fmt;
+ if (c > sizeof ok || !ok[c] || !f_len)
{
- m4_warn (context, 0, me, _("unrecognized specifier in `%s'"), f);
+ m4_warn (context, 0, me, _("unrecognized specifier in %s"),
+ quotearg_style_mem (locale_quoting_style, f, M4ARGLEN (1)));
valid_format = false;
- if (c == '\0')
- fmt--;
continue;
}
+ fmt++;
+ f_len--;
/* Specifiers. We don't yet recognize C, S, n, or p. */
switch (c)
@@ -382,4 +391,6 @@ format (m4 *context, m4_obstack *obs, int argc, m4_macro_args *argv)
we constructed fstart, the result should not be negative. */
assert (0 <= result);
}
+ if (valid_format)
+ m4_bad_argc (context, argc, me, i, i, true);
}
diff --git a/modules/gnu.c b/modules/gnu.c
index fd557eb7..8ad17225 100644
--- a/modules/gnu.c
+++ b/modules/gnu.c
@@ -167,8 +167,8 @@ regexp_compile (m4 *context, const m4_call_info *caller, const char *regexp,
if (msg != NULL)
{
- m4_error (context, 0, 0, caller, _("bad regular expression `%s': %s"),
- regexp, msg);
+ m4_warn (context, 0, caller, _("bad regular expression %s: %s"),
+ quotearg_style_mem (locale_quoting_style, regexp, len), msg);
regfree (pat);
free (pat);
return NULL;
@@ -225,28 +225,38 @@ regexp_search (m4_pattern_buffer *buf, const char *string, const int size,
/* Function to perform substitution by regular expressions. Used by
the builtins regexp, patsubst and renamesyms. The changed text is
- placed on the obstack OBS. The substitution is REPL, with \&
- substituted by this part of VICTIM matched by the last whole
- regular expression, and \N substituted by the text matched by the
- Nth parenthesized sub-expression in BUF. Any warnings are issued
- on behalf of CALLER. BUF may be NULL for the empty regex. */
+ placed on the obstack OBS. The substitution is REPL of length
+ REPL_LEN, with \& substituted by this part of VICTIM matched by the
+ last whole regular expression, and \N substituted by the text
+ matched by the Nth parenthesized sub-expression in BUF. Any
+ warnings are issued on behalf of CALLER. BUF may be NULL for the
+ empty regex. */
static void
substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller,
- const char *victim, const char *repl, m4_pattern_buffer *buf)
+ const char *victim, const char *repl, size_t repl_len,
+ m4_pattern_buffer *buf)
{
int ch;
- for (;;)
+ while (repl_len--)
{
- while ((ch = *repl++) != '\\')
+ ch = *repl++;
+ if (ch != '\\')
{
- if (ch == '\0')
- return;
obstack_1grow (obs, ch);
+ continue;
+ }
+ if (!repl_len)
+ {
+ m4_warn (context, 0, caller,
+ _("trailing \\ ignored in replacement"));
+ return;
}
- switch ((ch = *repl++))
+ ch = *repl++;
+ repl_len--;
+ switch (ch)
{
case '&':
if (buf)
@@ -265,11 +275,6 @@ substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller,
buf->regs.end[ch] - buf->regs.start[ch]);
break;
- case '\0':
- m4_warn (context, 0, caller,
- _("trailing \\ ignored in replacement"));
- return;
-
default:
obstack_1grow (obs, ch);
break;
@@ -278,18 +283,19 @@ substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller,
}
-/* For each match against compiled REGEXP (held in BUF -- as returned
- by regexp_compile) in VICTIM, substitute REPLACE. Non-matching
- characters are copied verbatim, and the result copied to the
- obstack. Errors are reported on behalf of CALLER. Return true if
- a substitution was made. If OPTIMIZE is set, don't worry about
- copying the input if no changes are made. */
+/* For each match against REGEXP of length REGEXP_LEN (precompiled in
+ BUF as returned by regexp_compile) in VICTIM of length LEN,
+ substitute REPLACE of length REPL_LEN. Non-matching characters are
+ copied verbatim, and the result copied to the obstack. Errors are
+ reported on behalf of CALLER. Return true if a substitution was
+ made. If OPTIMIZE is set, don't worry about copying the input if
+ no changes are made. */
static bool
regexp_substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller,
const char *victim, size_t len, const char *regexp,
- m4_pattern_buffer *buf, const char *replace,
- bool optimize)
+ size_t regexp_len, m4_pattern_buffer *buf,
+ const char *replace, size_t repl_len, bool optimize)
{
regoff_t matchpos = 0; /* start position of match */
size_t offset = 0; /* current match offset */
@@ -309,7 +315,9 @@ regexp_substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller,
if (matchpos == -2)
m4_error (context, 0, 0, caller,
- _("error matching regular expression `%s'"), regexp);
+ _("problem matching regular expression %s"),
+ quotearg_style_mem (locale_quoting_style, regexp,
+ regexp_len));
else if (offset < len && subst)
obstack_grow (obs, victim + offset, len - offset);
break;
@@ -322,7 +330,7 @@ regexp_substitute (m4 *context, m4_obstack *obs, const m4_call_info *caller,
/* Handle the part of the string that was covered by the match. */
- substitute (context, obs, caller, victim, replace, buf);
+ substitute (context, obs, caller, victim, replace, repl_len, buf);
subst = true;
/* Update the offset to the end of the match. If the regexp
@@ -465,18 +473,24 @@ M4BUILTIN_HANDLER (builtin)
}
-/* Change the current regexp syntax to SPEC, or report failure on
- behalf of CALLER. Currently this affects the builtins: `patsubst',
- `regexp' and `renamesyms'. */
+/* Change the current regexp syntax to SPEC of length LEN, or report
+ failure on behalf of CALLER. Currently this affects the builtins:
+ `patsubst', `regexp' and `renamesyms'. */
static int
m4_resyntax_encode_safe (m4 *context, const m4_call_info *caller,
- const char *spec)
+ const char *spec, size_t len)
{
- int resyntax = m4_regexp_syntax_encode (spec);
+ int resyntax;
+
+ if (strlen (spec) < len)
+ resyntax = -1;
+ else
+ resyntax = m4_regexp_syntax_encode (spec);
if (resyntax < 0)
- m4_warn (context, 0, caller, _("bad syntax-spec: `%s'"), spec);
+ m4_warn (context, 0, caller, _("bad syntax-spec: %s"),
+ quotearg_style_mem (locale_quoting_style, spec, len));
return resyntax;
}
@@ -488,7 +502,7 @@ m4_resyntax_encode_safe (m4 *context, const m4_call_info *caller,
M4BUILTIN_HANDLER (changeresyntax)
{
int resyntax = m4_resyntax_encode_safe (context, m4_arg_info (argv),
- M4ARG (1));
+ M4ARG (1), M4ARGLEN (1));
if (resyntax >= 0)
m4_set_regexp_syntax_opt (context, resyntax);
@@ -749,31 +763,32 @@ M4BUILTIN_HANDLER (patsubst)
m4_pattern_buffer *buf; /* compiled regular expression */
int resyntax;
- pattern = M4ARG (2);
- replace = M4ARG (3);
-
resyntax = m4_get_regexp_syntax_opt (context);
if (argc >= 5) /* additional args ignored */
{
- resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4));
+ resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4),
+ M4ARGLEN (4));
if (resyntax < 0)
return;
}
/* The empty regex matches everywhere, but if there is no
replacement, we need not waste time with it. */
- if (!*pattern && !*replace)
+ if (m4_arg_empty (argv, 2) && m4_arg_empty (argv, 3))
{
m4_push_arg (context, obs, argv, 1);
return;
}
+ pattern = M4ARG (2);
+ replace = M4ARG (3);
+
buf = regexp_compile (context, me, pattern, M4ARGLEN (2), resyntax);
if (!buf)
return;
- regexp_substitute (context, obs, me, M4ARG (1), M4ARGLEN (1),
- pattern, buf, replace, false);
+ regexp_substitute (context, obs, me, M4ARG (1), M4ARGLEN (1), pattern,
+ M4ARGLEN (2), buf, replace, M4ARGLEN (3), false);
}
@@ -810,7 +825,7 @@ M4BUILTIN_HANDLER (regexp)
is a valid RESYNTAX, yet we want `regexp(aab, a*, )' to return
an empty string as per M4 1.4.x. */
- if ((*replace == '\0') || (resyntax < 0))
+ if (m4_arg_empty (argv, 3) || (resyntax < 0))
/* regexp(VICTIM, REGEXP, REPLACEMENT) */
resyntax = m4_get_regexp_syntax_opt (context);
else
@@ -820,7 +835,8 @@ M4BUILTIN_HANDLER (regexp)
else if (argc >= 5)
{
/* regexp(VICTIM, REGEXP, REPLACEMENT, RESYNTAX) */
- resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4));
+ resyntax = m4_resyntax_encode_safe (context, me, M4ARG (4),
+ M4ARGLEN (4));
if (resyntax < 0)
return;
}
@@ -828,11 +844,11 @@ M4BUILTIN_HANDLER (regexp)
/* regexp(VICTIM, REGEXP) */
replace = NULL;
- if (!*pattern)
+ if (m4_arg_empty (argv, 2))
{
/* The empty regex matches everything. */
if (replace)
- substitute (context, obs, me, M4ARG (1), replace, NULL);
+ substitute (context, obs, me, M4ARG (1), replace, M4ARGLEN (3), NULL);
else
m4_shipout_int (obs, 0);
return;
@@ -848,15 +864,16 @@ M4BUILTIN_HANDLER (regexp)
if (startpos == -2)
{
- m4_error (context, 0, 0, me, _("error matching regular expression `%s'"),
- pattern);
+ m4_error (context, 0, 0, me, _("problem matching regular expression %s"),
+ quotearg_style_mem (locale_quoting_style, pattern,
+ M4ARGLEN (2)));
return;
}
if (replace == NULL)
m4_shipout_int (obs, startpos);
else if (startpos >= 0)
- substitute (context, obs, me, victim, replace, buf);
+ substitute (context, obs, me, victim, replace, M4ARGLEN (3), buf);
}
@@ -874,7 +891,9 @@ M4BUILTIN_HANDLER (renamesyms)
{
const m4_call_info *me = m4_arg_info (argv);
const char *regexp; /* regular expression string */
+ size_t regexp_len;
const char *replace; /* replacement expression string */
+ size_t replace_len;
m4_pattern_buffer *buf; /* compiled regular expression */
@@ -883,17 +902,20 @@ M4BUILTIN_HANDLER (renamesyms)
int resyntax;
regexp = M4ARG (1);
+ regexp_len = M4ARGLEN (1);
replace = M4ARG (2);
+ replace_len = M4ARGLEN (2);
resyntax = m4_get_regexp_syntax_opt (context);
if (argc >= 4)
{
- resyntax = m4_resyntax_encode_safe (context, me, M4ARG (3));
+ resyntax = m4_resyntax_encode_safe (context, me, M4ARG (3),
+ M4ARGLEN (3));
if (resyntax < 0)
return;
}
- buf = regexp_compile (context, me, regexp, M4ARGLEN (1), resyntax);
+ buf = regexp_compile (context, me, regexp, regexp_len, resyntax);
if (!buf)
return;
@@ -905,7 +927,8 @@ M4BUILTIN_HANDLER (renamesyms)
const m4_string *key = &data.base[0];
if (regexp_substitute (context, data.obs, me, key->str, key->len,
- regexp, buf, replace, true))
+ regexp, regexp_len, buf, replace, replace_len,
+ true))
{
size_t newlen = obstack_object_size (data.obs);
m4_symbol_rename (M4SYMTAB, key->str, key->len,
diff --git a/modules/m4.c b/modules/m4.c
index e9695a32..f78a177d 100644
--- a/modules/m4.c
+++ b/modules/m4.c
@@ -998,8 +998,7 @@ m4_expand_ranges (const char *s, size_t *len, m4_obstack *obs)
obstack_1grow (obs, *s);
}
*len = obstack_object_size (obs);
- /* FIXME - use obstack_finish once translit is updated. */
- return (char *) obstack_copy0 (obs, "", 0);
+ return (char *) obstack_finish (obs);
}
/* The macro "translit" translates all characters in the first
@@ -1018,7 +1017,9 @@ M4BUILTIN_HANDLER (translit)
char found[UCHAR_MAX + 1] = {0};
unsigned char ch;
- if (argc <= 2)
+ enum { ASIS, REPLACE, DELETE };
+
+ if (m4_arg_empty (argv, 1) || m4_arg_empty (argv, 2))
{
m4_push_arg (context, obs, argv, 1);
return;
@@ -1026,7 +1027,7 @@ M4BUILTIN_HANDLER (translit)
from = M4ARG (2);
from_len = M4ARGLEN (2);
- if (strchr (from, '-') != NULL)
+ if (memchr (from, '-', from_len) != NULL)
{
from = m4_expand_ranges (from, &from_len, m4_arg_scratch (context));
assert (from);
@@ -1034,35 +1035,57 @@ M4BUILTIN_HANDLER (translit)
to = M4ARG (3);
to_len = M4ARGLEN (3);
- if (strchr (to, '-') != NULL)
+ if (memchr (to, '-', to_len) != NULL)
{
to = m4_expand_ranges (to, &to_len, m4_arg_scratch (context));
assert (to);
}
- /* Calling strchr(from) for each character in data is quadratic,
+ /* Calling memchr(from) for each character in data is quadratic,
since both strings can be arbitrarily long. Instead, create a
from-to mapping in one pass of from, then use that map in one
pass of data, for linear behavior. Traditional behavior is that
only the first instance of a character in from is consulted,
hence the found map. */
- for ( ; (ch = *from) != '\0'; from++)
+ while (from_len--)
{
- if (!found[ch])
+ ch = *from++;
+ if (found[ch] == ASIS)
+ {
+ if (to_len)
+ {
+ found[ch] = REPLACE;
+ map[ch] = *to;
+ }
+ else
+ found[ch] = DELETE;
+ }
+ if (to_len)
{
- found[ch] = 1;
- map[ch] = *to;
+ to++;
+ to_len--;
}
- if (*to != '\0')
- to++;
}
- for (data = M4ARG (1); (ch = *data) != '\0'; data++)
+ data = M4ARG (1);
+ from_len = M4ARGLEN (1);
+ while (from_len--)
{
- if (!found[ch])
- obstack_1grow (obs, ch);
- else if (map[ch])
- obstack_1grow (obs, map[ch]);
+ ch = *data++;
+ switch (found[ch])
+ {
+ case ASIS:
+ obstack_1grow (obs, ch);
+ break;
+ case REPLACE:
+ obstack_1grow (obs, map[ch]);
+ break;
+ case DELETE:
+ break;
+ default:
+ assert (!"translit");
+ abort ();
+ }
}
}
diff --git a/src/freeze.c b/src/freeze.c
index 5d5b4eee..3008f272 100644
--- a/src/freeze.c
+++ b/src/freeze.c
@@ -634,7 +634,7 @@ ill-formed frozen file, version 2 directive `%c' encountered"), 'd');
if (m4_debug_decode (context, string[0]) < 0)
m4_error (context, EXIT_FAILURE, 0, NULL,
- _("unknown debug mode `%s'"),
+ _("unknown debug mode %s"),
quotearg_style_mem (locale_quoting_style, string[0],
number[0]));
break;
@@ -751,10 +751,11 @@ ill-formed frozen file, version 2 directive `%c' encountered"), 'R');
m4_set_regexp_syntax_opt (context,
m4_regexp_syntax_encode (string[0]));
- if (m4_get_regexp_syntax_opt (context) < 0)
+ if (m4_get_regexp_syntax_opt (context) < 0
+ || strlen (string[0]) < number[0])
{
m4_error (context, EXIT_FAILURE, 0, NULL,
- _("unknown regexp syntax code `%s'"),
+ _("bad syntax-spec %s"),
quotearg_style_mem (locale_quoting_style, string[0],
number[0]));
}
diff --git a/tests/freeze.at b/tests/freeze.at
index 9b8c9463..693ae543 100644
--- a/tests/freeze.at
+++ b/tests/freeze.at
@@ -409,6 +409,12 @@ AT_CHECK_M4([-R frozen.m4f unfrozen.m4], [0], [stdout], [experr], [], [ ])
AT_CHECK([cat out1 stdout], [0], [expout])
+dnl Check that unexpected embedded NULs are recognized.
+printf '# bogus frozen file\nV2\nR4\ngnu\0\n' > bogus.m4f
+AT_CHECK_M4([-R bogus.m4f], [1], [],
+[[m4:bogus.m4f:4: bad syntax-spec `gnu\0'
+]])
+
AT_CLEANUP
])
diff --git a/tests/null.err b/tests/null.err
index 74ec09d3..7b9f7981 100644
--- a/tests/null.err
+++ b/tests/null.err
Binary files differ
diff --git a/tests/null.m4 b/tests/null.m4
index 77b6e67d..f7a15875 100644
--- a/tests/null.m4
+++ b/tests/null.m4
Binary files differ
diff --git a/tests/null.out b/tests/null.out
index 5f6df390..97f80dd5 100644
--- a/tests/null.out
+++ b/tests/null.out
Binary files differ
diff --git a/tests/options.at b/tests/options.at
index 9331a211..dce43f87 100644
--- a/tests/options.at
+++ b/tests/options.at
@@ -714,8 +714,8 @@ AT_CHECK_M4([--regexp-syntax=unknown in], [1], [],
AT_CHECK_M4([--regexp-syntax= in], [0], [[0
]])
-AT_CHECK_M4([-rEXTENDED in], [1], [[
-]], [[m4:in:1: regexp: bad regular expression `(': Unmatched ( or \(
+AT_CHECK_M4([-rEXTENDED in], [0], [[
+]], [[m4:in:1: Warning: regexp: bad regular expression `(': Unmatched ( or \(
]])
AT_CHECK_M4([-rgnu-m4 in], [0], [[0
@@ -725,9 +725,9 @@ AT_CHECK_M4([-r"gnu M4" in], [0], [[0
]])
dnl Test behavior of -r intermixed with files
-AT_CHECK_M4([-rEXTENDED in --regexp-syntax in], [1], [[
+AT_CHECK_M4([-rEXTENDED in --regexp-syntax in], [0], [[
0
-]], [[m4:in:1: regexp: bad regular expression `(': Unmatched ( or \(
+]], [[m4:in:1: Warning: regexp: bad regular expression `(': Unmatched ( or \(
]])
AT_CLEANUP