summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2009-02-14 10:14:34 -0700
committerEric Blake <ebb9@byu.net>2009-02-16 06:37:15 -0700
commite5632a42071a39b1e6988533aeb2aeab16188b85 (patch)
tree185faffe4b1d5b60f3dfc547f60f346ec4420ade
parent1e2cb352077020f928c9e6c700880276ea79d729 (diff)
downloadm4-e5632a42071a39b1e6988533aeb2aeab16188b85.tar.gz
Revamp changesyntax vs. changequote interactions.
* m4/m4module.h (M4_SYNTAX_VALUE): Delete unused macro. (M4_SYNTAX_SUSPECT): New macro. * m4/m4private.h (struct m4_syntax_table): Add suspect field. * m4/syntax.c (check_is_single_quotes, check_is_single_comments) (check_is_macro_escaped): Delete, by inlining body... (m4_set_syntax): ...into here. Improves handling between changesyntax and changequote/changecom. (add_syntax_set, subtract_syntax_set, set_syntax_set): Simplify, and let suspect field track needed cleanup. (m4_set_quotes, m4_set_comment): Adjust meaning of is_single_quotes and is_single_comment flags to always be true if only one delimiter exists, regardless of its length. Ensure that the syntax categories M4_SYNTAX_LQUOTE and M4_SYNTAX_BCOMM are only used on 1-byte delimiters. (add_syntax_attribute, remove_syntax_attribute): Change signature to allow the use of fewer casts. Adjust the suspect field when necessary. (m4_reset_syntax, set_quote_age): Adjust callers. * m4/input.c (m4__next_token, m4__next_token_is_open): Simplify callers. * doc/m4.texinfo (Changesyntax): Update documentation and tests. Signed-off-by: Eric Blake <ebb9@byu.net>
-rw-r--r--ChangeLog23
-rw-r--r--doc/m4.texinfo88
-rw-r--r--m4/input.c23
-rw-r--r--m4/m4module.h6
-rw-r--r--m4/m4private.h12
-rw-r--r--m4/syntax.c438
6 files changed, 298 insertions, 292 deletions
diff --git a/ChangeLog b/ChangeLog
index 796c720c..8f776197 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,28 @@
2009-02-16 Eric Blake <ebb9@byu.net>
+ Revamp changesyntax vs. changequote interactions.
+ * m4/m4module.h (M4_SYNTAX_VALUE): Delete unused macro.
+ (M4_SYNTAX_SUSPECT): New macro.
+ * m4/m4private.h (struct m4_syntax_table): Add suspect field.
+ * m4/syntax.c (check_is_single_quotes, check_is_single_comments)
+ (check_is_macro_escaped): Delete, by inlining body...
+ (m4_set_syntax): ...into here. Improves handling between
+ changesyntax and changequote/changecom.
+ (add_syntax_set, subtract_syntax_set, set_syntax_set): Simplify,
+ and let suspect field track needed cleanup.
+ (m4_set_quotes, m4_set_comment): Adjust meaning of
+ is_single_quotes and is_single_comment flags to always be true if
+ only one delimiter exists, regardless of its length. Ensure that
+ the syntax categories M4_SYNTAX_LQUOTE and M4_SYNTAX_BCOMM are
+ only used on 1-byte delimiters.
+ (add_syntax_attribute, remove_syntax_attribute): Change signature
+ to allow the use of fewer casts. Adjust the suspect field when
+ necessary.
+ (m4_reset_syntax, set_quote_age): Adjust callers.
+ * m4/input.c (m4__next_token, m4__next_token_is_open): Simplify
+ callers.
+ * doc/m4.texinfo (Changesyntax): Update documentation and tests.
+
Improve changesyntax documentation.
* doc/m4.texinfo (Changesyntax): Merge two tables into one
multitable.
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 3d20d741..5c098388 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -5703,16 +5703,24 @@ a@@a
@result{}ATESTa
@end example
-@comment FIXME - improve this wording
-There is obviously an overlap with @code{changecom} and
-@code{changequote}. Comment delimiters and quotes can now be defined in
-two different ways. To avoid incompatibilities, if the quotes are set
-with @code{changequote}, all other characters marked in the syntax table
-as quotes will revert to their normal syntax categories, leaving only
-one set of defined quotes as before. If the quotes are set with
-@code{changesyntax}, it is possible to result in multiple sets of
-quotes. This applies to comment delimiters as well, @i{mutatis
-mutandis}.
+There is obviously an overlap between @code{changesyntax} and
+@code{changequote}, since there are now two ways to modify quote
+delimiters. To avoid incompatibilities, if the quotes are modified by
+@code{changequote}, any characters previously set to either quote
+delimiter by @code{changesyntax} are first demoted to the other category
+(@samp{O}), so the result is only a single set of quotes. In the other
+direction, if quotes were already disabled, or if both the start and end
+delimiter set by @code{changequote} are single bytes, then
+@code{changesyntax} preserves those settings. But if either delimiter
+occupies multiple bytes, @code{changesyntax} first disables both
+delimiters. Quotes can be disabled via @code{changesyntax} by emptying
+the left quote basic category (@samp{L}). Meanwhile, the right quote
+context category (@samp{R}) will never be empty; if a
+@code{changesyntax} action would otherwise leave that category empty,
+then the default end delimiter from @code{changequote} (@samp{'}) is
+used; thus, it is never possible to get @code{m4} in a state where a
+quoted string cannot be terminated. These interactions apply to comment
+delimiters as well, @i{mutatis mutandis} with @code{changecom}.
@example
define(`test', `TEST')
@@ -5720,20 +5728,33 @@ define(`test', `TEST')
dnl Add additional single-byte delimiters.
changesyntax(`L+<', `R+>')
@result{}
-<test>
-@result{}test
-`test'
-@result{}test
-[test]
-@result{}[TEST]
+<test> `test' [test] <<test>>
+@result{}test test [TEST] <test>
+dnl Use standard interface, overriding changesyntax settings.
changequote(<[>, `]')
@result{}
-<test>
-@result{}<TEST>
-`test'
-@result{}`TEST'
-[test]
-@result{}test
+<test> `test' [test] <<test>>
+@result{}<TEST> `TEST' test <<TEST>>
+dnl Introduce multi-byte delimiters.
+changequote([<<], [>>])
+@result{}
+<test> `test' [test] <<test>>
+@result{}<TEST> `TEST' [TEST] test
+dnl Change end quote, effectively disabling quotes.
+changesyntax(<<R]>>)
+@result{}
+<test> `test' [test] <<test>>
+@result{}<TEST> `TEST' [TEST] <<TEST>>
+dnl Change beginning quote, make ] normal, thus making ' end quote.
+changesyntax(L`, R-])
+@result{}
+<test> `test' [test] <<test>>
+@result{}<TEST> test [TEST] <<TEST>>
+dnl Set multi-byte quote; unrelated changes don't impact it.
+changequote(`<<', `>>')changesyntax(<<@@\>>)
+@result{}
+<\test> `\test' [\test] <<\test>>
+@result{}<TEST> `TEST' [TEST] \test
@end example
If several characters are assigned to a category that forms single
@@ -5748,29 +5769,6 @@ eval@{2**4-1; 2: 8>
@result{}00001111
@end example
-On the other hand, a multi-character start-quote sequence, which can
-only be created by @code{changequote}, will only be matched by the
-corresponding end-quote sequence. The same goes for comment delimiters.
-
-@example
-define(`test', `==$1==')
-@result{}
-changequote(`<<', `>>')
-@result{}
-changesyntax(<<L[>>, <<R]>>)
-@result{}
-test(<<testing]>>)
-@result{}==testing]==
-test([testing>>])
-@result{}==testing>>==
-test([<<testing>>])
-@result{}==testing==
-@end example
-
-@noindent
-Note how it is possible to have both long and short quotes, if
-@code{changequote} is used before @code{changesyntax}.
-
The syntax table is initialized to be backwards compatible, so if you
never call @code{changesyntax}, nothing will have changed.
diff --git a/m4/input.c b/m4/input.c
index ba2e467a..6c761d01 100644
--- a/m4/input.c
+++ b/m4/input.c
@@ -1640,9 +1640,8 @@ m4__next_token (m4 *context, m4_symbol_value *token, int *line,
obstack_1grow (obs_safe, ch);
}
}
- else if (!m4_is_syntax_single_quotes (M4SYNTAX)
- && MATCH (context, ch, context->syntax->quote.str1,
- context->syntax->quote.len1, true))
+ else if (MATCH (context, ch, context->syntax->quote.str1,
+ context->syntax->quote.len1, true))
{ /* QUOTED STRING, LONGER QUOTES */
if (obs)
obs_safe = obs;
@@ -1719,9 +1718,8 @@ m4__next_token (m4 *context, m4_symbol_value *token, int *line,
type = (m4_get_discard_comments_opt (context)
? M4_TOKEN_NONE : M4_TOKEN_COMMENT);
}
- else if (!m4_is_syntax_single_comments (M4SYNTAX)
- && MATCH (context, ch, context->syntax->comm.str1,
- context->syntax->comm.len1, true))
+ else if (MATCH (context, ch, context->syntax->comm.str1,
+ context->syntax->comm.len1, true))
{ /* COMMENT, LONGER DELIM */
if (obs && !m4_get_discard_comments_opt (context))
obs_safe = obs;
@@ -1779,8 +1777,7 @@ m4__next_token (m4 *context, m4_symbol_value *token, int *line,
obstack_1grow (&token_stack, ch);
type = M4_TOKEN_CLOSE;
}
- else if (m4_is_syntax_single_quotes (M4SYNTAX)
- && m4_is_syntax_single_comments (M4SYNTAX))
+ else if (m4__safe_quotes (M4SYNTAX))
{ /* EVERYTHING ELSE (SHORT QUOTES AND COMMENTS) */
assert (ch < CHAR_EOF);
obstack_1grow (&token_stack, ch);
@@ -1882,12 +1879,10 @@ m4__next_token_is_open (m4 *context)
|| m4_has_syntax (M4SYNTAX, ch, (M4_SYNTAX_BCOMM | M4_SYNTAX_ESCAPE
| M4_SYNTAX_ALPHA | M4_SYNTAX_LQUOTE
| M4_SYNTAX_ACTIVE))
- || (!m4_is_syntax_single_comments (M4SYNTAX)
- && MATCH (context, ch, context->syntax->comm.str1,
- context->syntax->comm.len1, false))
- || (!m4_is_syntax_single_quotes (M4SYNTAX)
- && MATCH (context, ch, context->syntax->quote.str1,
- context->syntax->quote.len1, false)))
+ || (MATCH (context, ch, context->syntax->comm.str1,
+ context->syntax->comm.len1, false))
+ || (MATCH (context, ch, context->syntax->quote.str1,
+ context->syntax->quote.len1, false)))
return false;
return m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OPEN);
}
diff --git a/m4/m4module.h b/m4/m4module.h
index 07f8c1a5..c94f56ab 100644
--- a/m4/m4module.h
+++ b/m4/m4module.h
@@ -484,8 +484,12 @@ enum {
M4_SYNTAX_ECOMM = 1 << 15
};
+/* Mask of attribute syntax categories. */
#define M4_SYNTAX_MASKS (M4_SYNTAX_RQUOTE | M4_SYNTAX_ECOMM)
-#define M4_SYNTAX_VALUE (~(M4_SYNTAX_RQUOTE | M4_SYNTAX_ECOMM))
+/* Mask of basic syntax categories where any change requires a
+ recomputation of the overall syntax characteristics. */
+#define M4_SYNTAX_SUSPECT (M4_SYNTAX_LQUOTE | M4_SYNTAX_BCOMM \
+ | M4_SYNTAX_ESCAPE)
#define m4_syntab(S, C) ((S)->table[(C)])
/* Determine if character C matches any of the bitwise-or'd syntax
diff --git a/m4/m4private.h b/m4/m4private.h
index 49fba3b1..4f269796 100644
--- a/m4/m4private.h
+++ b/m4/m4private.h
@@ -1,6 +1,6 @@
/* GNU m4 -- A simple macro processor
Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1998, 1999, 2004,
- 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+ 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU M4.
@@ -472,17 +472,19 @@ struct m4_syntax_table {
m4_string_pair quote; /* Quote delimiters. */
m4_string_pair comm; /* Comment delimiters. */
- /* True iff strlen(lquote) == strlen(rquote) == 1 and lquote is not
- interfering with macro names. */
+ /* True iff only one start and end quote delimiter exist. */
bool_bitfield is_single_quotes : 1;
- /* True iff strlen(bcomm) == strlen(ecomm) == 1 and bcomm is not
- interfering with macros or quotes. */
+ /* True iff only one start and end comment delimiter exist. */
bool_bitfield is_single_comments : 1;
/* True iff some character has M4_SYNTAX_ESCAPE. */
bool_bitfield is_macro_escaped : 1;
+ /* True iff a changesyntax call has impacted something that requires
+ cleanup at the end. */
+ bool_bitfield suspect : 1;
+
/* Track the number of changesyntax calls. This saturates at
0xffff, so the idea is that most users won't be changing the
syntax that frequently; perhaps in the future we will cache
diff --git a/m4/syntax.c b/m4/syntax.c
index 1fb48158..213d7908 100644
--- a/m4/syntax.c
+++ b/m4/syntax.c
@@ -1,6 +1,6 @@
/* GNU m4 -- A simple macro processor
Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2002, 2004, 2006,
- 2007, 2008 Free Software Foundation, Inc.
+ 2007, 2008, 2009 Free Software Foundation, Inc.
This file is part of GNU M4.
@@ -31,6 +31,7 @@
according to a syntax table. The character groups are (definitions
are all in m4.h, those marked with a * are not yet in use):
+ Basic (all characters fall in one of these mutually exclusive bins)
M4_SYNTAX_IGNORE *Character to be deleted from input as if not present
M4_SYNTAX_OTHER Any character with no special meaning to m4
M4_SYNTAX_SPACE Whitespace (ignored when leading macro arguments)
@@ -46,12 +47,12 @@
M4_SYNTAX_ALPHA Alphabetic characters (can start macro names)
M4_SYNTAX_NUM Numeric characters (can form macro names)
- M4_SYNTAX_LQUOTE A single characters left quote
- M4_SYNTAX_BCOMM A single characters begin comment delimiter
+ M4_SYNTAX_LQUOTE A single character left quote
+ M4_SYNTAX_BCOMM A single character begin comment delimiter
- (These are bit masks)
- M4_SYNTAX_RQUOTE A single characters right quote
- M4_SYNTAX_ECOMM A single characters end comment delimiter
+ Attribute (these are context sensitive, and exist in addition to basic)
+ M4_SYNTAX_RQUOTE A single character right quote
+ M4_SYNTAX_ECOMM A single character end comment delimiter
Besides adding new facilities, the use of a syntax table will reduce
the number of calls to next_token (). Now groups of OTHER, NUM and
@@ -65,15 +66,10 @@
"changesyntax" allows the the user to change the category of any
character.
- Default '\n' is both ECOMM and SPACE, depending on the context. To
- solve the problem of quotes and comments that have diffent syntax
- code based on the context, the RQUOTE and ECOMM codes are bit
- masks to add to an ordinary code. If a character is made a quote it
- will be recognised if the basis code does not have precedence.
-
- When changing quotes and comment delimiters only the bits are
- removed, and the characters are therefore reverted to its old
- category code.
+ By default, '\n' is both ECOMM and SPACE, depending on the context.
+ Hence we have basic categories (mutually exclusive, can introduce a
+ context, and can be empty sets), and attribute categories
+ (additive, only recognized in context, and will never be empty).
The precedence as implemented by next_token () is:
@@ -100,13 +96,27 @@
a string is parsed equally whether there is a $ or not. These characters
are instead used during user macro expansion.
- M4_SYNTAX_RQUOTE and M4_SYNTAX_ECOMM do not start tokens. */
-static bool check_is_single_quotes (m4_syntax_table *);
-static bool check_is_single_comments (m4_syntax_table *);
-static bool check_is_macro_escaped (m4_syntax_table *);
-static int add_syntax_attribute (m4_syntax_table *, int, int);
-static int remove_syntax_attribute (m4_syntax_table *, int, int);
+ M4_SYNTAX_RQUOTE and M4_SYNTAX_ECOMM do not start tokens.
+
+ There are several optimizations that can be performed depending on
+ known states of the syntax table. For example, when searching for
+ quotes, if there is only a single start quote and end quote
+ delimiter, we can use memchr2 and search a word at a time, instead
+ of performing a table lookup a byte at a time. The is_single_*
+ flags track whether quotes and comments have a single delimiter
+ (always the case if changequote/changecom were used, and
+ potentially the case after changesyntax). Since we frequently need
+ to access quotes, we store the oldest valid quote outside the
+ lookup table; the suspect flag tracks whether a cleanup pass is
+ needed to restore our invariants. On the other hand, coalescing
+ multiple M4_SYNTAX_OTHER bytes could form a delimiter, so many
+ optimizations must be disabled if a multi-byte delimiter exists;
+ this is handled by m4__safe_quotes. Meanwhile, quotes and comments
+ can be disabled if the leading delimiter is length 0. */
+
+static int add_syntax_attribute (m4_syntax_table *, char, int);
+static int remove_syntax_attribute (m4_syntax_table *, char, int);
static void set_quote_age (m4_syntax_table *, bool, bool);
m4_syntax_table *
@@ -217,35 +227,44 @@ m4_syntax_code (char ch)
/* Functions to manipulate the syntax table. */
static int
-add_syntax_attribute (m4_syntax_table *syntax, int ch, int code)
+add_syntax_attribute (m4_syntax_table *syntax, char ch, int code)
{
+ int c = to_uchar (ch);
if (code & M4_SYNTAX_MASKS)
- syntax->table[ch] |= code;
+ {
+ syntax->table[c] |= code;
+ syntax->suspect = true;
+ }
else
- syntax->table[ch] = (syntax->table[ch] & M4_SYNTAX_MASKS) | code;
+ {
+ if ((code & (M4_SYNTAX_SUSPECT)) != 0
+ || m4_has_syntax (syntax, c, M4_SYNTAX_SUSPECT))
+ syntax->suspect = true;
+ syntax->table[c] = ((syntax->table[c] & M4_SYNTAX_MASKS) | code);
+ }
#ifdef DEBUG_SYNTAX
- xfprintf(stderr, "Set syntax %o %c = %04X\n",
- ch, isprint(ch) ? ch : '-',
- syntax->table[ch]);
+ xfprintf(stderr, "Set syntax %o %c = %04X\n", c, isprint(c) ? c : '-',
+ syntax->table[c]);
#endif
- return syntax->table[ch];
+ return syntax->table[c];
}
static int
-remove_syntax_attribute (m4_syntax_table *syntax, int ch, int code)
+remove_syntax_attribute (m4_syntax_table *syntax, char ch, int code)
{
+ int c = to_uchar (ch);
assert (code & M4_SYNTAX_MASKS);
- syntax->table[ch] &= ~code;
+ syntax->table[c] &= ~code;
+ syntax->suspect = true;
#ifdef DEBUG_SYNTAX
- xfprintf(stderr, "Unset syntax %o %c = %04X\n",
- ch, isprint(ch) ? ch : '-',
- syntax->table[ch]);
+ xfprintf(stderr, "Unset syntax %o %c = %04X\n", c, isprint(c) ? c : '-',
+ syntax->table[c]);
#endif
- return syntax->table[ch];
+ return syntax->table[c];
}
/* Add the set CHARS of length LEN to syntax category CODE, removing
@@ -254,21 +273,8 @@ static void
add_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len,
int code)
{
- int ch;
-
- if (!len)
- return;
-
- if (code == M4_SYNTAX_ESCAPE)
- syntax->is_macro_escaped = true;
-
- /* Adding doesn't affect single-quote or single-comment. */
-
while (len--)
- {
- ch = to_uchar (*chars++);
- add_syntax_attribute (syntax, ch, code);
- }
+ add_syntax_attribute (syntax, *chars++, code);
}
/* Remove the set CHARS of length LEN from syntax category CODE,
@@ -277,43 +283,14 @@ static void
subtract_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len,
int code)
{
- int ch;
-
- if (!len)
- return;
-
while (len--)
{
- ch = to_uchar (*chars++);
+ char ch = *chars++;
if ((code & M4_SYNTAX_MASKS) != 0)
remove_syntax_attribute (syntax, ch, code);
else if (m4_has_syntax (syntax, ch, code))
add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER);
}
-
- /* Check for any cleanup needed. */
- switch (code)
- {
- case M4_SYNTAX_ESCAPE:
- if (syntax->is_macro_escaped)
- check_is_macro_escaped (syntax);
- break;
-
- case M4_SYNTAX_LQUOTE:
- case M4_SYNTAX_RQUOTE:
- if (syntax->is_single_quotes)
- check_is_single_quotes (syntax);
- break;
-
- case M4_SYNTAX_BCOMM:
- case M4_SYNTAX_ECOMM:
- if (syntax->is_single_comments)
- check_is_single_comments (syntax);
- break;
-
- default:
- break;
- }
}
/* Make the set CHARS of length LEN become syntax category CODE,
@@ -330,21 +307,16 @@ set_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len,
OTHER. */
for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
- if (code == M4_SYNTAX_RQUOTE || code == M4_SYNTAX_ECOMM)
+ if ((code & M4_SYNTAX_MASKS) != 0)
remove_syntax_attribute (syntax, ch, code);
else if (m4_has_syntax (syntax, ch, code))
add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER);
}
while (len--)
{
- ch = to_uchar (*chars++);
+ ch = *chars++;
add_syntax_attribute (syntax, ch, code);
}
-
- /* Check for any cleanup needed. */
- check_is_macro_escaped (syntax);
- check_is_single_quotes (syntax);
- check_is_single_comments (syntax);
}
/* Reset syntax category CODE to its default state, sending all other
@@ -375,9 +347,6 @@ reset_syntax_set (m4_syntax_table *syntax, int code)
else if (syntax->orig[ch] == code || m4_has_syntax (syntax, ch, code))
add_syntax_attribute (syntax, ch, syntax->orig[ch]);
}
- check_is_macro_escaped (syntax);
- check_is_single_quotes (syntax);
- check_is_single_comments (syntax);
}
/* Reset the syntax table to its default state. */
@@ -403,10 +372,8 @@ m4_reset_syntax (m4_syntax_table *syntax)
syntax->comm.str2 = xmemdup0 (DEF_ECOMM, 1);
syntax->comm.len2 = 1;
- add_syntax_attribute (syntax, to_uchar (syntax->quote.str2[0]),
- M4_SYNTAX_RQUOTE);
- add_syntax_attribute (syntax, to_uchar (syntax->comm.str2[0]),
- M4_SYNTAX_ECOMM);
+ add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE);
+ add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM);
syntax->is_single_quotes = true;
syntax->is_single_comments = true;
@@ -431,6 +398,7 @@ m4_set_syntax (m4_syntax_table *syntax, char key, char action,
{
return -1;
}
+ syntax->suspect = false;
switch (action)
{
case '+':
@@ -449,134 +417,159 @@ m4_set_syntax (m4_syntax_table *syntax, char key, char action,
default:
assert (false);
}
- set_quote_age (syntax, false, true);
- m4__quote_uncache (syntax);
- return code;
-}
-static bool
-check_is_single_quotes (m4_syntax_table *syntax)
-{
- int ch;
- int lquote = -1;
- int rquote = -1;
-
- if (! syntax->is_single_quotes)
- return false;
- assert (syntax->quote.len1 == 1 && syntax->quote.len2 == 1);
-
- if (m4_has_syntax (syntax, *syntax->quote.str1, M4_SYNTAX_LQUOTE)
- && m4_has_syntax (syntax, *syntax->quote.str2, M4_SYNTAX_RQUOTE))
- return true;
-
- /* The most recent action invalidated our current lquote/rquote. If
- we still have exactly one character performing those roles based
- on the syntax table, then update lquote/rquote accordingly.
- Otherwise, keep lquote/rquote, but we no longer have single
- quotes. */
- for (ch = UCHAR_MAX + 1; --ch >= 0; )
+ /* Check for any cleanup needed. */
+ if (syntax->suspect)
{
- if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
+ int ch;
+ int lquote = -1;
+ int rquote = -1;
+ int bcomm = -1;
+ int ecomm = -1;
+ if (m4_has_syntax (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE))
{
- if (lquote == -1)
- lquote = ch;
- else
+ assert (syntax->quote.len1 == 1);
+ lquote = to_uchar (syntax->quote.str1[0]);
+ }
+ if (m4_has_syntax (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE))
+ {
+ assert (syntax->quote.len2 == 1);
+ rquote = to_uchar (syntax->quote.str2[0]);
+ }
+ if (m4_has_syntax (syntax, syntax->comm.str1[0], M4_SYNTAX_BCOMM))
+ {
+ assert (syntax->comm.len1 == 1);
+ bcomm = to_uchar (syntax->comm.str1[0]);
+ }
+ if (m4_has_syntax (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM))
+ {
+ assert (syntax->comm.len2 == 1);
+ ecomm = to_uchar (syntax->comm.str2[0]);
+ }
+ syntax->is_macro_escaped = false;
+ /* Find candidates for each category. */
+ for (ch = UCHAR_MAX + 1; --ch >= 0; )
+ {
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
+ {
+ if (lquote == -1)
+ lquote = ch;
+ else if (lquote != ch)
+ syntax->is_single_quotes = false;
+ }
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE))
+ {
+ if (rquote == -1)
+ rquote = ch;
+ else if (rquote != ch)
+ syntax->is_single_quotes = false;
+ }
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
+ {
+ if (bcomm == -1)
+ bcomm = ch;
+ else if (bcomm != ch)
+ syntax->is_single_comments = false;
+ }
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM))
{
- syntax->is_single_quotes = false;
- break;
+ if (ecomm == -1)
+ ecomm = ch;
+ else if (ecomm != ch)
+ syntax->is_single_comments = false;
}
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_ESCAPE))
+ syntax->is_macro_escaped = true;
}
- if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE))
+ /* Disable multi-character delimiters if we discovered
+ delimiters. */
+ if ((1 < syntax->quote.len1 || 1 < syntax->quote.len2)
+ && (!syntax->is_single_quotes || lquote != -1 || rquote != -1))
{
- if (rquote == -1)
- rquote = ch;
- else
+ if (syntax->quote.len1)
+ {
+ syntax->quote.len1 = lquote == to_uchar (syntax->quote.str1[0]);
+ syntax->quote.str1[syntax->quote.len1] = '\0';
+ }
+ if (syntax->quote.len2)
{
- syntax->is_single_quotes = false;
- break;
+ syntax->quote.len2 = rquote == to_uchar (syntax->quote.str2[0]);
+ syntax->quote.str2[syntax->quote.len2] = '\0';
}
}
- }
- if (lquote == -1 || rquote == -1)
- syntax->is_single_quotes = false;
- else if (syntax->is_single_quotes)
- {
- *syntax->quote.str1 = lquote;
- *syntax->quote.str2 = rquote;
- }
- return syntax->is_single_quotes;
-}
-
-static bool
-check_is_single_comments (m4_syntax_table *syntax)
-{
- int ch;
- int bcomm = -1;
- int ecomm = -1;
-
- if (! syntax->is_single_comments)
- return false;
- assert (syntax->comm.len1 == 1 && syntax->comm.len2 == 1);
-
- if (m4_has_syntax (syntax, *syntax->comm.str1, M4_SYNTAX_BCOMM)
- && m4_has_syntax (syntax, *syntax->comm.str2, M4_SYNTAX_ECOMM))
- return true;
-
- /* The most recent action invalidated our current bcomm/ecomm. If
- we still have exactly one character performing those roles based
- on the syntax table, then update bcomm/ecomm accordingly.
- Otherwise, keep bcomm/ecomm, but we no longer have single
- comments. */
- for (ch = UCHAR_MAX + 1; --ch >= 0; )
- {
- if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
+ if ((1 < syntax->comm.len1 || 1 < syntax->comm.len2)
+ && (!syntax->is_single_comments || bcomm != -1 || ecomm != -1))
+ {
+ if (syntax->comm.len1)
+ {
+ syntax->comm.len1 = bcomm == to_uchar (syntax->comm.str1[0]);
+ syntax->comm.str1[syntax->comm.len1] = '\0';
+ }
+ if (syntax->comm.len2)
+ {
+ syntax->comm.len2 = ecomm == to_uchar (syntax->comm.str2[0]);
+ syntax->comm.str2[syntax->comm.len2] = '\0';
+ }
+ }
+ /* Update the strings. */
+ if (lquote != -1)
{
- if (bcomm == -1)
- bcomm = ch;
+ if (syntax->quote.len1)
+ assert (syntax->quote.len1 == 1);
else
{
- syntax->is_single_comments = false;
- break;
+ free (syntax->quote.str1);
+ syntax->quote.str1 = xcharalloc (2);
+ syntax->quote.str1[1] = '\0';
+ syntax->quote.len1 = 1;
}
+ syntax->quote.str1[0] = lquote;
+ if (rquote == -1)
+ {
+ rquote = '\'';
+ add_syntax_attribute (syntax, rquote, M4_SYNTAX_RQUOTE);
+ }
+ if (!syntax->quote.len2)
+ {
+ free (syntax->quote.str2);
+ syntax->quote.str2 = xcharalloc (2);
+ }
+ syntax->quote.str2[0] = rquote;
+ syntax->quote.str2[1] = '\0';
+ syntax->quote.len2 = 1;
}
- if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM))
+ if (bcomm != -1)
{
- if (ecomm == -1)
- ecomm = ch;
+ if (syntax->comm.len1)
+ assert (syntax->comm.len1 == 1);
else
{
- syntax->is_single_comments = false;
- break;
+ free (syntax->comm.str1);
+ syntax->comm.str1 = xcharalloc (2);
+ syntax->comm.str1[1] = '\0';
+ syntax->comm.len1 = 1;
}
+ syntax->comm.str1[0] = bcomm;
+ if (ecomm == -1)
+ {
+ ecomm = '\n';
+ add_syntax_attribute (syntax, ecomm, M4_SYNTAX_ECOMM);
+ }
+ if (!syntax->comm.len2)
+ {
+ free (syntax->comm.str2);
+ syntax->comm.str2 = xcharalloc (2);
+ }
+ syntax->comm.str2[0] = ecomm;
+ syntax->comm.str2[1] = '\0';
+ syntax->comm.len2 = 1;
}
}
- if (bcomm == -1 || ecomm == -1)
- syntax->is_single_comments = false;
- else if (syntax->is_single_comments)
- {
- *syntax->comm.str1 = bcomm;
- *syntax->comm.str2 = ecomm;
- }
- return syntax->is_single_comments;
-}
-
-static bool
-check_is_macro_escaped (m4_syntax_table *syntax)
-{
- int ch;
-
- syntax->is_macro_escaped = false;
- for (ch = UCHAR_MAX + 1; --ch >= 0; )
- if (m4_has_syntax (syntax, ch, M4_SYNTAX_ESCAPE))
- {
- syntax->is_macro_escaped = true;
- break;
- }
-
- return syntax->is_macro_escaped;
+ set_quote_age (syntax, false, true);
+ m4__quote_uncache (syntax);
+ return code;
}
-
/* Functions for setting quotes and comment delimiters. Used by
m4_changecom () and m4_changequote (). Both functions override the
@@ -629,13 +622,11 @@ m4_set_quotes (m4_syntax_table *syntax, const char *lq, size_t lq_len,
/* changequote overrides syntax_table, but be careful when it is
used to select a start-quote sequence that is effectively
disabled. */
-
- syntax->is_single_quotes
- = (syntax->quote.len1 == 1 && syntax->quote.len2 == 1
- && !m4_has_syntax (syntax, *syntax->quote.str1,
- (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE
- | M4_SYNTAX_ALPHA | M4_SYNTAX_NUM)));
-
+ syntax->is_single_quotes = !m4_has_syntax (syntax, *syntax->quote.str1,
+ (M4_SYNTAX_IGNORE
+ | M4_SYNTAX_ESCAPE
+ | M4_SYNTAX_ALPHA
+ | M4_SYNTAX_NUM));
for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE))
@@ -646,15 +637,12 @@ m4_set_quotes (m4_syntax_table *syntax, const char *lq, size_t lq_len,
remove_syntax_attribute (syntax, ch, M4_SYNTAX_RQUOTE);
}
- if (syntax->is_single_quotes)
+ if (syntax->is_single_quotes
+ && syntax->quote.len1 == 1 && syntax->quote.len2 == 1)
{
- add_syntax_attribute (syntax, to_uchar (syntax->quote.str1[0]),
- M4_SYNTAX_LQUOTE);
- add_syntax_attribute (syntax, to_uchar (syntax->quote.str2[0]),
- M4_SYNTAX_RQUOTE);
+ add_syntax_attribute (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE);
+ add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE);
}
- if (syntax->is_macro_escaped)
- check_is_macro_escaped (syntax);
set_quote_age (syntax, false, false);
}
@@ -703,14 +691,12 @@ m4_set_comment (m4_syntax_table *syntax, const char *bc, size_t bc_len,
/* changecom overrides syntax_table, but be careful when it is used
to select a start-comment sequence that is effectively
disabled. */
-
- syntax->is_single_comments
- = (syntax->comm.len1 == 1 && syntax->comm.len2 == 1
- && !m4_has_syntax (syntax, *syntax->comm.str1,
- (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE
- | M4_SYNTAX_ALPHA | M4_SYNTAX_NUM
- | M4_SYNTAX_LQUOTE)));
-
+ syntax->is_single_comments = !m4_has_syntax (syntax, *syntax->comm.str1,
+ (M4_SYNTAX_IGNORE
+ | M4_SYNTAX_ESCAPE
+ | M4_SYNTAX_ALPHA
+ | M4_SYNTAX_NUM
+ | M4_SYNTAX_LQUOTE));
for (ch = UCHAR_MAX + 1; --ch >= 0; )
{
if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM))
@@ -720,20 +706,17 @@ m4_set_comment (m4_syntax_table *syntax, const char *bc, size_t bc_len,
if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM))
remove_syntax_attribute (syntax, ch, M4_SYNTAX_ECOMM);
}
- if (syntax->is_single_comments)
+ if (syntax->is_single_comments
+ && syntax->comm.len1 == 1 && syntax->comm.len2 == 1)
{
- add_syntax_attribute (syntax, to_uchar (syntax->comm.str1[0]),
- M4_SYNTAX_BCOMM);
- add_syntax_attribute (syntax, to_uchar (syntax->comm.str2[0]),
- M4_SYNTAX_ECOMM);
+ add_syntax_attribute (syntax, syntax->comm.str1[0], M4_SYNTAX_BCOMM);
+ add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM);
}
- if (syntax->is_macro_escaped)
- check_is_macro_escaped (syntax);
set_quote_age (syntax, false, false);
}
/* Call this when changing anything that might impact the quote age,
- so that m4_quote_age and m4_safe_quotes will reflect the change.
+ so that m4__quote_age and m4__safe_quotes will reflect the change.
If RESET, changesyntax was reset to its default stage; if CHANGE,
arbitrary syntax has changed; otherwise, just quotes or comment
delimiters have changed. */
@@ -789,6 +772,7 @@ set_quote_age (m4_syntax_table *syntax, bool reset, bool change)
else
local_syntax_age = syntax->syntax_age;
if (local_syntax_age < 0xffff && syntax->is_single_quotes
+ && syntax->quote.len1 == 1 && syntax->quote.len2 == 1
&& !m4_has_syntax (syntax, *syntax->quote.str1,
(M4_SYNTAX_ALPHA | M4_SYNTAX_NUM | M4_SYNTAX_OPEN
| M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE