summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2008-01-14 17:25:13 -0700
committerEric Blake <ebb9@byu.net>2008-08-03 19:43:51 -0600
commitcb26d7cb8b438224908d53df59b1d394ba1928f8 (patch)
tree5fd98e6324ecac4eae890f7b3148f7bb3a9a533a
parent40c640f486bf7a99c6e16d91332f25872f501488 (diff)
downloadm4-cb26d7cb8b438224908d53df59b1d394ba1928f8.tar.gz
Stage26: allow NUL in macro definitions
-rw-r--r--configure.ac3
-rw-r--r--doc/m4.texinfo97
-rw-r--r--examples/foreachq4.m413
-rw-r--r--examples/null.errbin572 -> 713 bytes
-rw-r--r--examples/null.m4bin6189 -> 6499 bytes
-rw-r--r--examples/null.outbin468 -> 510 bytes
-rw-r--r--m4/gnulib-cache.m456
-rw-r--r--src/builtin.c82
-rw-r--r--src/freeze.c6
-rw-r--r--src/input.c131
-rw-r--r--src/m4.c2
-rw-r--r--src/m4.h13
-rw-r--r--src/macro.c35
13 files changed, 317 insertions, 121 deletions
diff --git a/configure.ac b/configure.ac
index ea4e130c..5b02c561 100644
--- a/configure.ac
+++ b/configure.ac
@@ -32,6 +32,9 @@ AC_CONFIG_HEADERS([lib/config.h:lib/config.hin])
AC_PROG_CC
M4_EARLY
+# M4 is single-threaded; so we can optimize gnulib code by using this:
+gl_DISABLE_THREADS
+
AC_CHECK_HEADERS_ONCE([siginfo.h sys/wait.h])
AC_CHECK_TYPES([siginfo_t], [], [],
[[#include <signal.h>
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index d6b7b59f..c8bf7ee1 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -1021,6 +1021,27 @@ The comment delimiters can be changed to any string at any time, using
the builtin macro @code{changecom}. @xref{Changecom}, for more
information.
+@ignore
+@comment Detect regression in 1.4.10b in regards to reparsing comments.
+@comment Not worth including in the manual.
+@example
+define(`e', `$@@')define(`q', ``$@@'')define(`foo', `bar')
+@result{}
+q(e(`one
+',#two ' foo
+))
+@result{}`one
+@result{}',`#two bar
+@result{}''
+changecom(`<', `>')define(`n', `$#')
+@result{}
+n(e(<`>, <'>))
+@result{}1
+len(e(<`>, ,<'>))
+@result{}12
+@end example
+@end ignore
+
@node Other tokens
@section Other kinds of input tokens
@@ -2578,7 +2599,8 @@ m4_indir(`m4_divnum')
Note that @code{indir} and @code{builtin} can be used to invoke builtins
without arguments, even when they normally require parameters to be
-recognized; but it will provoke a warning, and result in a void expansion.
+recognized; but it will provoke a warning, and the expansion will behave
+as though empty strings had been passed as the required arguments.
@example
builtin
@@ -2592,6 +2614,13 @@ builtin(`builtin')
builtin(`builtin',)
@error{}m4:stdin:4: Warning: builtin: undefined builtin `'
@result{}
+builtin(`builtin', ``'
+')
+@error{}m4:stdin:5: Warning: builtin: undefined builtin ``\'\n'
+@result{}
+indir(`index')
+@error{}m4:stdin:7: Warning: index: too few arguments: 0 < 2
+@result{}
@end example
@ignore
@@ -3779,6 +3808,22 @@ indir(`my_defn', indir(`shift', `', `foo'))
@result{}bar
@end example
+@ignore
+@comment not worth including in the manual, but this tests a trace code
+@comment path that was temporarily broken
+@comment options: -de --trace ifelse
+@example
+$ @kbd{m4 -de --trace ifelse}
+define(`e', `ifelse(`$1', `$2', `ifelse(`$1', `$2', `e(shift($@@))')')')
+@result{}
+e(`1', `1')
+@error{}m4trace: -1- ifelse -> ifelse(`1', `1', `e(shift(`1',`1'))')
+@error{}m4trace: -1- ifelse -> e(shift(`1',`1'))
+@error{}m4trace: -1- ifelse ->@w{ }
+@result{}
+@end example
+@end ignore
+
@node Debug Levels
@section Controlling debugging output
@@ -6990,6 +7035,22 @@ traceon(`undefined')dnl
@c Make sure freezing is successful.
+@example
+ifdef(`__unix__', ,
+ `errprint(` skipping: syscmd does not have unix semantics
+')m4exit(`77')')dnl
+changequote(`[', `]')dnl
+syscmd([echo 'changequote([,])pushdef([divnum],[hi])dnl' \
+ | ]__program__[ -F in.m4f \
+ && echo 'divnum popdef([divnum])divnum' \
+ | ]__program__[ -R in.m4f \
+ && rm in.m4f])status sysval
+@result{}hi 0
+@result{}status 0
+@end example
+
+@c Detect inability to freeze.
+
@comment options: -F /none/such
@comment status: 1
@example
@@ -7006,13 +7067,13 @@ ifdef(`__unix__', ,
`errprint(` skipping: syscmd does not have unix semantics
')m4exit(`77')')dnl
changequote(`[', `]')dnl
-syscmd([printf 'define(-\0-,hi)changequote([,\0])changecom(--\0)dnl
+syscmd([printf 'define(-\0-,\0-\0)changequote([,\0])changecom(--\0)dnl
divert(1)undivert(null.out)' | ]__program__[ -F in.m4f \
- && printf 'errprint([divnum\0] #-- indir(-\0-))' \
+ && printf 'errprint([divnum\0] #-- len(indir(-\0-)))' \
| ]__program__[ -R in.m4f \
&& rm in.m4f])errprint([ ]sysval[
])dnl
-@error{}divnum #-- hi 0
+@error{}divnum #-- 3 0
@end example
@end ignore
@@ -7937,6 +7998,34 @@ include(`loop.m4')dnl
@result{}10000
@end example
+@comment foreach via forloop recursion
+
+@comment examples
+@comment options: -Dlimit=10 -Dverbose -Dalt=4
+@example
+$ @kbd {m4 -I examples -Dlimit=10 -Dverbose -Dalt=4}
+include(`loop.m4')dnl
+@result{} 1 2 3 4 5 6 7 8 9 10
+@end example
+
+@comment examples
+@comment options: -Dlimit=2500 -Dalt=4
+@example
+$ @kbd {m4 -I examples -Dlimit=2500 -Dalt=4}
+include(`loop.m4')dnl
+@end example
+
+@comment examples
+@comment options: -Dlimit=10000 -Dalt=4
+@example
+$ @kbd {m4 -I examples -Dlimit=10000 -Dalt=4}
+define(`foo', `divert`'len(popdef(`_foreachq')_foreachq($@@))')dnl
+define(`debug', `pushdef(`_foreachq', defn(`foo'))')
+@result{}
+include(`loop.m4')dnl
+@result{}48894
+@end example
+
@end ignore
@node Improved m4wrap
diff --git a/examples/foreachq4.m4 b/examples/foreachq4.m4
new file mode 100644
index 00000000..3da64c92
--- /dev/null
+++ b/examples/foreachq4.m4
@@ -0,0 +1,13 @@
+include(`forloop2.m4')dnl
+divert(`-1')
+# foreachq(x, `item_1, item_2, ..., item_n', stmt)
+# quoted list, version based on forloop
+define(`foreachq',
+`ifelse(`$2', `', `', `_$0(`$1', `$3', $2)')')
+define(`_foreachq',
+`pushdef(`$1', forloop(`$1', `3', `$#',
+ `$0_(`1', `2', indir(`$1'))')`popdef(
+ `$1')')indir(`$1', $@)')
+define(`_foreachq_',
+``define(`$$1', `$$3')$$2`''')
+divert`'dnl
diff --git a/examples/null.err b/examples/null.err
index 5f989ee6..897ce346 100644
--- a/examples/null.err
+++ b/examples/null.err
Binary files differ
diff --git a/examples/null.m4 b/examples/null.m4
index de76742a..1823073d 100644
--- a/examples/null.m4
+++ b/examples/null.m4
Binary files differ
diff --git a/examples/null.out b/examples/null.out
index 5e90221e..dd834163 100644
--- a/examples/null.out
+++ b/examples/null.out
Binary files differ
diff --git a/m4/gnulib-cache.m4 b/m4/gnulib-cache.m4
index dffdf8d1..5c03a924 100644
--- a/m4/gnulib-cache.m4
+++ b/m4/gnulib-cache.m4
@@ -15,11 +15,63 @@
# Specification in the form of a command-line invocation:
-# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset binary-io c-stack clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 intprops memchr2 memmem mkstemp obstack obstack-printf-posix progname quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix
+# gnulib-tool --import --dir=. --local-dir=local --lib=libm4 --source-base=lib --m4-base=m4 --doc-base=doc --aux-dir=build-aux --with-tests --no-libtool --macro-prefix=M4 announce-gen assert autobuild avltree-oset binary-io c-stack clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 hash intprops memchr2 memmem mkstemp obstack obstack-printf-posix progname quote regex sigaction stdbool stdint stdlib-safer strsignal strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix
# Specification in the form of a few gnulib-tool.m4 macro invocations:
gl_LOCAL_DIR([local])
-gl_MODULES([announce-gen assert autobuild avltree-oset binary-io c-stack clean-temp cloexec close-stream closein config-h error fdl fflush flexmember fopen-safer freadptr freadseek fseeko gendocs getopt git-version-gen gnumakefile gnupload gpl-3.0 intprops memchr2 memmem mkstemp obstack obstack-printf-posix progname quote regex stdbool stdint stdlib-safer strtod strtol unlocked-io vasnprintf-posix verror version-etc version-etc-fsf xalloc xmemdup0 xprintf xvasprintf-posix])
+gl_MODULES([
+ announce-gen
+ assert
+ autobuild
+ avltree-oset
+ binary-io
+ c-stack
+ clean-temp
+ cloexec
+ close-stream
+ closein
+ config-h
+ error
+ fdl
+ fflush
+ flexmember
+ fopen-safer
+ freadptr
+ freadseek
+ fseeko
+ gendocs
+ getopt
+ git-version-gen
+ gnumakefile
+ gnupload
+ gpl-3.0
+ hash
+ intprops
+ memchr2
+ memmem
+ mkstemp
+ obstack
+ obstack-printf-posix
+ progname
+ quote
+ regex
+ sigaction
+ stdbool
+ stdint
+ stdlib-safer
+ strsignal
+ strtod
+ strtol
+ unlocked-io
+ vasnprintf-posix
+ verror
+ version-etc
+ version-etc-fsf
+ xalloc
+ xmemdup0
+ xprintf
+ xvasprintf-posix
+])
gl_AVOID([])
gl_SOURCE_BASE([lib])
gl_M4_BASE([m4])
diff --git a/src/builtin.c b/src/builtin.c
index c171ea96..bcf7bb91 100644
--- a/src/builtin.c
+++ b/src/builtin.c
@@ -427,26 +427,32 @@ free_regex (void)
}
}
-/*-----------------------------------------------------------------.
-| Define a predefined or user-defined macro, with name NAME of |
-| length NAME_LEN, and expansion TEXT. MODE is SYMBOL_INSERT for |
-| "define" or SYMBOL_PUSHDEF for "pushdef". This function is also |
-| used from main (). |
-`-----------------------------------------------------------------*/
+/*------------------------------------------------------------------.
+| Define a predefined or user-defined macro, with name NAME of |
+| length NAME_LEN, and expansion TEXT of length LEN. LEN may be |
+| SIZE_MAX, to use the string length of TEXT instead. MODE is |
+| SYMBOL_INSERT for "define" or SYMBOL_PUSHDEF for "pushdef". This |
+| function is also used from main (). |
+`------------------------------------------------------------------*/
void
define_user_macro (const char *name, size_t name_len, const char *text,
- symbol_lookup mode)
+ size_t len, symbol_lookup mode)
{
symbol *s;
- char *defn = xstrdup (text ? text : "");
+ char *defn;
+ assert (text);
+ if (len == SIZE_MAX)
+ len = strlen (text);
+ defn = xmemdup (text, len);
s = lookup_symbol (name, name_len, mode);
if (SYMBOL_TYPE (s) == TOKEN_TEXT)
free (SYMBOL_TEXT (s));
SYMBOL_TYPE (s) = TOKEN_TEXT;
SYMBOL_TEXT (s) = defn;
+ SYMBOL_TEXT_LEN (s) = len;
SYMBOL_MACRO_ARGS (s) = true;
/* Implement --warn-macro-sequence. */
@@ -454,7 +460,6 @@ define_user_macro (const char *name, size_t name_len, const char *text,
{
regoff_t offset = 0;
struct re_registers *regs = &macro_sequence_regs;
- size_t len = strlen (defn);
while (offset < len
&& (offset = re_search (&macro_sequence_buf, defn, len, offset,
@@ -513,13 +518,13 @@ builtin_init (void)
{
if (pp->unix_name != NULL)
define_user_macro (pp->unix_name, strlen (pp->unix_name),
- pp->func, SYMBOL_INSERT);
+ pp->func, SIZE_MAX, SYMBOL_INSERT);
}
else
{
if (pp->gnu_name != NULL)
define_user_macro (pp->gnu_name, strlen (pp->gnu_name),
- pp->func, SYMBOL_INSERT);
+ pp->func, SIZE_MAX, SYMBOL_INSERT);
}
}
@@ -628,7 +633,10 @@ ntoa (int32_t value, int radix)
static void
shipout_int (struct obstack *obs, int val)
{
- obstack_printf (obs, "%d", val);
+ const char *s;
+
+ s = ntoa ((int32_t) val, 10);
+ obstack_grow (obs, s, strlen (s));
}
@@ -670,7 +678,7 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode)
if (argc == 2)
{
- define_user_macro (ARG (1), ARG_LEN (1), "", mode);
+ define_user_macro (ARG (1), ARG_LEN (1), "", 0, mode);
return;
}
@@ -680,7 +688,8 @@ define_macro (int argc, macro_arguments *argv, symbol_lookup mode)
m4_warn (0, me, _("cannot concatenate builtins"));
/* fallthru */
case TOKEN_TEXT:
- define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true), mode);
+ define_user_macro (ARG (1), ARG_LEN (1), arg_text (argv, 2, true),
+ arg_len (argv, 2, true), mode);
break;
case TOKEN_FUNC:
@@ -905,7 +914,8 @@ m4_dumpdef (struct obstack *obs, int argc, macro_arguments *argv)
case TOKEN_TEXT:
if (debug_level & DEBUG_TRACE_QUOTE)
fwrite (curr_quote.str1, 1, curr_quote.len1, debug);
- fputs (SYMBOL_TEXT (data.base[0]), debug);
+ fwrite (SYMBOL_TEXT (data.base[0]), 1,
+ SYMBOL_TEXT_LEN (data.base[0]), debug);
if (debug_level & DEBUG_TRACE_QUOTE)
fwrite (curr_quote.str2, 1, curr_quote.len2, debug);
break;
@@ -1040,7 +1050,7 @@ m4_defn (struct obstack *obs, int argc, macro_arguments *argv)
{
case TOKEN_TEXT:
obstack_grow (obs, curr_quote.str1, curr_quote.len1);
- obstack_grow (obs, SYMBOL_TEXT (s), strlen (SYMBOL_TEXT (s)));
+ obstack_grow (obs, SYMBOL_TEXT (s), SYMBOL_TEXT_LEN (s));
obstack_grow (obs, curr_quote.str2, curr_quote.len2);
break;
@@ -1226,9 +1236,13 @@ m4_eval (struct obstack *obs, int argc, macro_arguments *argv)
s++;
}
len = strlen (s);
- if (min < len)
- min = len;
- obstack_printf (obs, "%.*d%s", min - len, 0, s);
+ if (len < min)
+ {
+ min -= len;
+ obstack_blank (obs, min);
+ memset ((char *) obstack_next_free (obs) - min, '0', min);
+ }
+ obstack_grow (obs, s, len);
}
static void
@@ -1409,7 +1423,7 @@ m4_changeword (struct obstack *obs, int argc, macro_arguments *argv)
if (bad_argc (me, argc, 1, 1))
return;
- set_word_regexp (me, ARG (1));
+ set_word_regexp (me, ARG (1), ARG_LEN (1));
}
#endif /* ENABLE_CHANGEWORD */
@@ -2292,29 +2306,31 @@ void
expand_user_macro (struct obstack *obs, symbol *sym,
int argc, macro_arguments *argv)
{
- const char *text;
+ const char *text = SYMBOL_TEXT (sym);
+ size_t len = SYMBOL_TEXT_LEN (sym);
int i;
+ const char *dollar = memchr (text, '$', len);
- for (text = SYMBOL_TEXT (sym); *text != '\0';)
+ while (dollar)
{
- if (*text != '$')
- {
- obstack_1grow (obs, *text);
- text++;
- continue;
- }
- text++;
- switch (*text)
+ obstack_grow (obs, text, dollar - text);
+ len -= dollar - text;
+ text = dollar;
+ if (len == 1)
+ break;
+ len--;
+ switch (*++text)
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (no_gnu_extensions)
{
i = *text++ - '0';
+ len--;
}
else
{
- for (i = 0; isdigit (to_uchar (*text)); text++)
+ for (i = 0; len && isdigit (to_uchar (*text)); text++, len--)
i = i * 10 + (*text - '0');
}
push_arg (obs, argv, i);
@@ -2323,17 +2339,21 @@ expand_user_macro (struct obstack *obs, symbol *sym,
case '#': /* number of arguments */
shipout_int (obs, argc - 1);
text++;
+ len--;
break;
case '*': /* all arguments */
case '@': /* ... same, but quoted */
push_args (obs, argv, false, *text == '@');
text++;
+ len--;
break;
default:
obstack_1grow (obs, '$');
break;
}
+ dollar = memchr (text, '$', len);
}
+ obstack_grow (obs, text, len);
}
diff --git a/src/freeze.c b/src/freeze.c
index dd856337..5d4ac423 100644
--- a/src/freeze.c
+++ b/src/freeze.c
@@ -110,9 +110,9 @@ produce_frozen_state (const char *name)
case TOKEN_TEXT:
xfprintf (file, "T%d,%d\n",
(int) SYMBOL_NAME_LEN (sym),
- (int) strlen (SYMBOL_TEXT (sym)));
+ (int) SYMBOL_TEXT_LEN (sym));
fwrite (SYMBOL_NAME (sym), 1, SYMBOL_NAME_LEN (sym), file);
- fputs (SYMBOL_TEXT (sym), file);
+ fwrite (SYMBOL_TEXT (sym), 1, SYMBOL_TEXT_LEN (sym), file);
fputc ('\n', file);
break;
@@ -353,7 +353,7 @@ reload_frozen_state (const char *name)
/* Enter a macro having an expansion text as a definition. */
- define_user_macro (string[0], number[0], string[1],
+ define_user_macro (string[0], number[0], string[1], number[1],
SYMBOL_PUSHDEF);
break;
diff --git a/src/input.c b/src/input.c
index 75f86146..a6853bf7 100644
--- a/src/input.c
+++ b/src/input.c
@@ -171,9 +171,6 @@ string_pair curr_comm;
# define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
-/* Table of characters that can start a word. */
-static char word_start[256];
-
/* Current regular expression for detecting words. */
static struct re_pattern_buffer word_regexp;
@@ -475,6 +472,7 @@ push_token (token_data *token, int level, bool inuse)
destructively modifies the chain it is parsing. */
chain = (token_chain *) obstack_copy (current_input, src_chain,
sizeof *chain);
+ chain->next = NULL;
if (chain->type == CHAIN_STR && chain->u.u_s.level == -1)
{
if (chain->u.u_s.len <= INPUT_INLINE_THRESHOLD || !inuse)
@@ -711,6 +709,9 @@ pop_wrapup (void)
obstack_free (&file_names, NULL);
obstack_free (wrapup_stack, NULL);
free (wrapup_stack);
+#ifdef ENABLE_CHANGEWORD
+ regfree (&word_regexp);
+#endif /* ENABLE_CHANGEWORD */
return false;
}
@@ -1197,11 +1198,12 @@ init_argv_token (struct obstack *obs, token_data *td)
/*------------------------------------------------------------------.
-| This function is for matching a string against a prefix of the |
-| input stream. If the string S of length SLEN matches the input |
-| and CONSUME is true, the input is discarded; otherwise any |
-| characters read are pushed back again. The function is used only |
-| when multicharacter quotes or comment delimiters are used. |
+| If the string S of length SLEN matches the next characters of the |
+| input stream, return true. If CONSUME, the first character has |
+| already been matched. If a match is found and CONSUME is true, |
+| the input is discarded; otherwise any characters read are pushed |
+| back again. The function is used only when multicharacter quotes |
+| or comment delimiters are used. |
`------------------------------------------------------------------*/
static bool
@@ -1212,6 +1214,11 @@ match_input (const char *s, size_t slen, bool consume)
const char *t;
bool result = false;
+ if (consume)
+ {
+ s++;
+ slen--;
+ }
assert (slen);
ch = peek_input (false);
if (ch != to_uchar (*s))
@@ -1245,21 +1252,22 @@ match_input (const char *s, size_t slen, bool consume)
return result;
}
-/*---------------------------------------------------------------.
-| The macro MATCH() is used to match a string S of length SLEN |
-| against the input. The first character is handled inline, for |
-| speed. Hopefully, this will not hurt efficiency too much when |
-| single character quotes and comment delimiters are used. If |
-| CONSUME, then CH is the result of next_char, and a successful |
-| match will discard the matched string. Otherwise, CH is the |
-| result of peek_input, and the input stream is effectively |
-| unchanged. |
-`---------------------------------------------------------------*/
+/*--------------------------------------------------------------------.
+| The macro MATCH() is used to match a string S of length SLEN |
+| against the input. The first character is handled inline for |
+| speed, and S[SLEN] must be safe to dereference (it is faster to do |
+| character comparison prior to length checks). This improves |
+| efficiency for the common case of single character quotes and |
+| comment delimiters, while being safe for disabled delimiters as |
+| well as longer delimiters. If CONSUME, then CH is the result of |
+| next_char, and a successful match will discard the matched string. |
+| Otherwise, CH is the result of peek_input, and the input stream is |
+| effectively unchanged. |
+`--------------------------------------------------------------------*/
#define MATCH(ch, s, slen, consume) \
- ((slen) && to_uchar ((s)[0]) == (ch) \
- && ((slen) == 1 \
- || (match_input ((s) + (consume), (slen) - (consume), consume))))
+ (to_uchar ((s)[0]) == (ch) \
+ && ((slen) >> 1 ? match_input (s, slen, consume) : (slen)))
/*----------------------------------------------------------.
@@ -1291,17 +1299,17 @@ input_init (void)
start_of_input_line = false;
- curr_quote.str1 = xmemdup (DEF_LQUOTE, 1);
+ curr_quote.str1 = xmemdup0 (DEF_LQUOTE, 1);
curr_quote.len1 = 1;
- curr_quote.str2 = xmemdup (DEF_RQUOTE, 1);
+ curr_quote.str2 = xmemdup0 (DEF_RQUOTE, 1);
curr_quote.len2 = 1;
- curr_comm.str1 = xmemdup (DEF_BCOMM, 1);
+ curr_comm.str1 = xmemdup0 (DEF_BCOMM, 1);
curr_comm.len1 = 1;
- curr_comm.str2 = xmemdup (DEF_ECOMM, 1);
+ curr_comm.str2 = xmemdup0 (DEF_ECOMM, 1);
curr_comm.len2 = 1;
#ifdef ENABLE_CHANGEWORD
- set_word_regexp (NULL, user_word_regexp);
+ set_word_regexp (NULL, user_word_regexp, SIZE_MAX);
#endif /* ENABLE_CHANGEWORD */
set_quote_age ();
@@ -1345,9 +1353,10 @@ set_quotes (const char *lq, size_t lq_len, const char *rq, size_t rq_len)
free (curr_quote.str1);
free (curr_quote.str2);
- curr_quote.str1 = xmemdup (lq, lq_len);
+ /* The use of xmemdup0 is essential for MATCH() to work. */
+ curr_quote.str1 = xmemdup0 (lq, lq_len);
curr_quote.len1 = lq_len;
- curr_quote.str2 = xmemdup (rq, rq_len);
+ curr_quote.str2 = xmemdup0 (rq, rq_len);
curr_quote.len2 = rq_len;
set_quote_age ();
}
@@ -1387,29 +1396,34 @@ set_comment (const char *bc, size_t bc_len, const char *ec, size_t ec_len)
free (curr_comm.str1);
free (curr_comm.str2);
- curr_comm.str1 = xmemdup (bc, bc_len);
+ /* The use of xmemdup0 is essential for MATCH() to work. */
+ curr_comm.str1 = xmemdup0 (bc, bc_len);
curr_comm.len1 = bc_len;
- curr_comm.str2 = xmemdup (ec, ec_len);
+ curr_comm.str2 = xmemdup0 (ec, ec_len);
curr_comm.len2 = ec_len;
set_quote_age ();
}
#ifdef ENABLE_CHANGEWORD
-/*-------------------------------------------------------------------.
-| Set the regular expression for recognizing words to REGEXP, and |
-| report errors on behalf of CALLER. If REGEXP is NULL, revert back |
-| to the default parsing rules. |
-`-------------------------------------------------------------------*/
+/*-----------------------------------------------------------------.
+| Set the regular expression for recognizing words to REGEXP of |
+| length LEN, and report errors on behalf of CALLER. If REGEXP is |
+| NULL, revert back to the default parsing rules. If LEN is |
+| SIZE_MAX, use strlen(REGEXP) instead. |
+`-----------------------------------------------------------------*/
void
-set_word_regexp (const call_info *caller, const char *regexp)
+set_word_regexp (const call_info *caller, const char *regexp, size_t len)
{
- int i;
const char *msg;
struct re_pattern_buffer new_word_regexp;
- if (!*regexp || !strcmp (regexp, DEFAULT_WORD_REGEXP))
+ if (len == SIZE_MAX)
+ len = strlen (regexp);
+ if (len == 0
+ || (len == strlen (DEFAULT_WORD_REGEXP)
+ && !memcmp (regexp, DEFAULT_WORD_REGEXP, len)))
{
default_word_regexp = true;
set_quote_age ();
@@ -1418,30 +1432,30 @@ set_word_regexp (const call_info *caller, const char *regexp)
/* Dry run to see whether the new expression is compilable. */
init_pattern_buffer (&new_word_regexp, NULL);
- msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp);
+ msg = re_compile_pattern (regexp, len, &new_word_regexp);
regfree (&new_word_regexp);
if (msg != NULL)
{
- m4_warn (0, caller, _("bad regular expression `%s': %s"), regexp, msg);
+ m4_warn (0, caller, _("bad regular expression %s: %s"),
+ quotearg_style_mem (locale_quoting_style, regexp, len), msg);
return;
}
- /* If compilation worked, retry using the word_regexp struct.
- Can't rely on struct assigns working, so redo the compilation. */
- regfree (&word_regexp);
- msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
+ /* If compilation worked, retry using the word_regexp struct. We
+ can't rely on struct assigns working, so redo the compilation.
+ The fastmap can be reused between compilations, and will be freed
+ by the final regfree. */
+ if (!word_regexp.fastmap)
+ word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1);
+ msg = re_compile_pattern (regexp, len, &word_regexp);
assert (!msg);
re_set_registers (&word_regexp, &regs, regs.num_regs, regs.start, regs.end);
+ if (re_compile_fastmap (&word_regexp))
+ assert (false);
default_word_regexp = false;
set_quote_age ();
-
- for (i = 1; i < 256; i++)
- {
- char test = i;
- word_start[i] = re_match (&word_regexp, &test, 1, 0, NULL) > 0;
- }
}
#endif /* ENABLE_CHANGEWORD */
@@ -1687,7 +1701,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
assert (ch < CHAR_EOF);
obstack_1grow (obs_td, ch);
}
- type = TOKEN_STRING;
+ type = TOKEN_COMMENT;
}
else if (default_word_regexp && (isalpha (ch) || ch == '_'))
{
@@ -1703,7 +1717,7 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
#ifdef ENABLE_CHANGEWORD
- else if (!default_word_regexp && word_start[ch])
+ else if (!default_word_regexp && word_regexp.fastmap[ch])
{
obstack_1grow (&token_stack, ch);
while (1)
@@ -1829,7 +1843,8 @@ next_token (token_data *td, int *line, struct obstack *obs, bool allow_argv,
}
else
{
- assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP && type == TOKEN_STRING);
+ assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP
+ && (type == TOKEN_STRING || type == TOKEN_COMMENT));
#ifdef DEBUG_INPUT
{
token_chain *chain;
@@ -1887,11 +1902,11 @@ peek_token (void)
}
else if (MATCH (ch, curr_comm.str1, curr_comm.len1, false))
{
- result = TOKEN_STRING;
+ result = TOKEN_COMMENT;
}
else if ((default_word_regexp && (isalpha (ch) || ch == '_'))
#ifdef ENABLE_CHANGEWORD
- || (!default_word_regexp && word_start[ch])
+ || (!default_word_regexp && word_regexp.fastmap[ch])
#endif /* ENABLE_CHANGEWORD */
)
{
@@ -1935,6 +1950,8 @@ token_type_string (token_type t)
return "EOF";
case TOKEN_STRING:
return "STRING";
+ case TOKEN_COMMENT:
+ return "COMMENT";
case TOKEN_WORD:
return "WORD";
case TOKEN_OPEN:
@@ -1973,6 +1990,10 @@ print_token (const char *s, token_type t, token_data *td)
xfprintf (stderr, "string:");
break;
+ case TOKEN_COMMENT:
+ xfprintf (stderr, "comment:");
+ break;
+
case TOKEN_MACDEF:
xfprintf (stderr, "macro: %p\n", TOKEN_DATA_FUNC (td));
break;
diff --git a/src/m4.c b/src/m4.c
index e2ef4bc3..6b59a8a4 100644
--- a/src/m4.c
+++ b/src/m4.c
@@ -620,7 +620,7 @@ main (int argc, char *const *argv, char *const *envp)
const char *value = strchr (defines->arg, '=');
size_t len = value ? value - defines->arg : strlen (defines->arg);
define_user_macro (defines->arg, len, value ? value + 1 : "",
- SYMBOL_INSERT);
+ value ? SIZE_MAX : 0, SYMBOL_INSERT);
}
break;
diff --git a/src/m4.h b/src/m4.h
index c79a5618..553a654c 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -218,7 +218,8 @@ typedef struct token_chain token_chain;
enum token_type
{
TOKEN_EOF = 4,/* End of file, TOKEN_VOID. */
- TOKEN_STRING, /* Quoted string or comment, TOKEN_TEXT or TOKEN_COMP. */
+ TOKEN_STRING, /* Quoted string, TOKEN_TEXT or TOKEN_COMP. */
+ TOKEN_COMMENT,/* Comment, TOKEN_TEXT or TOKEN_COMP. */
TOKEN_WORD, /* An identifier, TOKEN_TEXT. */
TOKEN_OPEN, /* Active character `(', TOKEN_TEXT. */
TOKEN_COMMA, /* Active character `,', TOKEN_TEXT. */
@@ -380,7 +381,7 @@ extern string_pair curr_quote;
void set_quotes (const char *, size_t, const char *, size_t);
void set_comment (const char *, size_t, const char *, size_t);
#ifdef ENABLE_CHANGEWORD
-void set_word_regexp (const call_info *, const char *);
+void set_word_regexp (const call_info *, const char *, size_t);
#endif
unsigned int quote_age (void);
bool safe_quotes (void);
@@ -440,6 +441,7 @@ struct symbol
#define SYMBOL_NAME_LEN(S) ((S)->len)
#define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data))
#define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
+#define SYMBOL_TEXT_LEN(S) (TOKEN_DATA_LEN (&(S)->data))
#define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data))
typedef enum symbol_lookup symbol_lookup;
@@ -470,7 +472,7 @@ token_data_type arg_type (macro_arguments *, unsigned int);
const char *arg_text (macro_arguments *, unsigned int, bool);
bool arg_equal (macro_arguments *, unsigned int, unsigned int);
bool arg_empty (macro_arguments *, unsigned int);
-size_t arg_len (macro_arguments *, unsigned int);
+size_t arg_len (macro_arguments *, unsigned int, bool);
builtin_func *arg_func (macro_arguments *, unsigned int);
struct obstack *arg_scratch (void);
bool arg_print (struct obstack *, macro_arguments *, unsigned int,
@@ -490,7 +492,7 @@ void wrap_args (macro_arguments *);
/* Grab the text length at argv index I. Assumes macro_argument *argv
is in scope, and aborts if the argument is not text. */
-#define ARG_LEN(i) arg_len (argv, i)
+#define ARG_LEN(i) arg_len (argv, i, false)
/* File: builtin.c --- builtins. */
@@ -526,7 +528,8 @@ bool bad_argc (const call_info *, int, unsigned int, unsigned int);
void define_builtin (const char *, size_t, const builtin *, symbol_lookup);
void set_macro_sequence (const char *);
void free_regex (void);
-void define_user_macro (const char *, size_t, const char *, symbol_lookup);
+void define_user_macro (const char *, size_t, const char *, size_t,
+ symbol_lookup);
void undivert_all (void);
void expand_user_macro (struct obstack *, symbol *, int, macro_arguments *);
void m4_placeholder (struct obstack *, int, macro_arguments *);
diff --git a/src/macro.c b/src/macro.c
index afb6c246..e3fa1095 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -260,8 +260,7 @@ expand_token (struct obstack *obs, token_type t, token_data *td, int line,
bool first)
{
symbol *sym;
- bool result;
- int ch;
+ bool result = false;
switch (t)
{ /* TOKSW */
@@ -278,6 +277,7 @@ expand_token (struct obstack *obs, token_type t, token_data *td, int line,
provided, the string was already expanded into it during
next_token. */
result = first || safe_quotes ();
+ case TOKEN_COMMENT:
if (obs)
return result;
break;
@@ -295,8 +295,9 @@ expand_token (struct obstack *obs, token_type t, token_data *td, int line,
numeric, then behavior of safe_quotes is applicable.
Otherwise, assume these characters have a high likelihood of
use in quote delimiters. */
- ch = to_uchar (*TOKEN_DATA_TEXT (td));
- result = (isspace (ch) || isdigit (ch)) && safe_quotes ();
+ result = *TOKEN_DATA_TEXT (td) != *curr_quote.str2 && safe_quotes ();
+ if (result)
+ assert (*TOKEN_DATA_TEXT (td) != *curr_quote.str1);
break;
case TOKEN_WORD:
@@ -420,6 +421,7 @@ expand_argument (struct obstack *obs, token_data *argp,
case TOKEN_WORD:
case TOKEN_STRING:
+ case TOKEN_COMMENT:
case TOKEN_MACDEF:
if (!expand_token (obs, t, &td, line, first))
age = 0;
@@ -1115,9 +1117,10 @@ arg_empty (macro_arguments *argv, unsigned int arg)
}
/* Given ARGV, return the length of argument ARG. Abort if the
- argument is not text. Indices beyond argc return 0. */
+ argument is not text. Indices beyond argc return 0. If FLATTEN,
+ builtins are ignored. */
size_t
-arg_len (macro_arguments *argv, unsigned int arg)
+arg_len (macro_arguments *argv, unsigned int arg, bool flatten)
{
token_data *token;
token_chain *chain;
@@ -1130,7 +1133,7 @@ arg_len (macro_arguments *argv, unsigned int arg)
}
if (arg >= argv->argc)
return 0;
- token = arg_token (argv, arg, NULL, false);
+ token = arg_token (argv, arg, NULL, flatten);
switch (TOKEN_DATA_TYPE (token))
{
case TOKEN_TEXT:
@@ -1150,9 +1153,8 @@ arg_len (macro_arguments *argv, unsigned int arg)
len += chain->u.u_s.len;
break;
case CHAIN_FUNC:
- // TODO concatenate builtins
- assert (!"implemented");
- abort ();
+ assert (flatten);
+ break;
case CHAIN_ARGV:
i = chain->u.u_a.index;
limit = chain->u.u_a.argv->argc - i - chain->u.u_a.skip_last;
@@ -1163,15 +1165,8 @@ arg_len (macro_arguments *argv, unsigned int arg)
len += (quotes->len1 + quotes->len2) * limit;
len += limit - 1;
while (limit--)
- {
- // TODO handle builtin concatenation
- if (TOKEN_DATA_TYPE (arg_token (chain->u.u_a.argv, i, NULL,
- false)) == TOKEN_FUNC)
- assert (argv->flatten);
- else
- len += arg_len (chain->u.u_a.argv, i);
- i++;
- }
+ len += arg_len (chain->u.u_a.argv, i++,
+ flatten || chain->u.u_a.flatten);
break;
default:
assert (!"arg_len");
@@ -1179,7 +1174,7 @@ arg_len (macro_arguments *argv, unsigned int arg)
}
chain = chain->next;
}
- assert (len);
+ assert (len || flatten);
return len;
case TOKEN_FUNC:
default: