summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Blake <ebb9@byu.net>2009-02-18 06:56:38 -0700
committerEric Blake <ebb9@byu.net>2009-02-18 08:30:35 -0700
commitcc55111948ac37d1dd492bbef85364bf08ed2d2d (patch)
tree86bb407680e0865352ed343c1b470004c778bc13
parent10fb79cb4f4a4cb6a174d628c3304fee61596ff8 (diff)
downloadm4-cc55111948ac37d1dd492bbef85364bf08ed2d2d.tar.gz
Improve handling of $ in syntax table.
* m4/m4module.h (m4_is_syntax_single_dollar): New function. (M4_SYNTAX_DOLLAR, M4_SYNTAX_LBRACE, M4_SYNTAX_RBRACE): Change to be context rather than basic syntax categories. (M4_SYNTAX_MASKS): Adjust macro. * m4/m4private.h (struct m4_syntax_table): Add dollar and is_single_dollar members. (m4_is_syntax_single_dollar): Add fast alternative. * m4/syntax.c (m4_syntax_create, reset_syntax_set): Adjust to account for change to context categories. (m4_set_syntax): Manage is_single_dollar. (m4_is_syntax_single_dollar): New function. * m4/macro.c (locate_dollar): New helper function. (process_macro): Use it to speed up macro expansion. * m4/input.c (m4__next_token): Adjust client. * doc/m4.texinfo (Changesyntax): Document this. Signed-off-by: Eric Blake <ebb9@byu.net>
-rw-r--r--ChangeLog19
-rw-r--r--doc/m4.texinfo26
-rw-r--r--m4/input.c8
-rw-r--r--m4/m4module.h23
-rw-r--r--m4/m4private.h6
-rw-r--r--m4/macro.c41
-rw-r--r--m4/syntax.c102
7 files changed, 142 insertions, 83 deletions
diff --git a/ChangeLog b/ChangeLog
index b5f01a29..66ac3dfa 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2009-02-18 Eric Blake <ebb9@byu.net>
+
+ Improve handling of $ in syntax table.
+ * m4/m4module.h (m4_is_syntax_single_dollar): New function.
+ (M4_SYNTAX_DOLLAR, M4_SYNTAX_LBRACE, M4_SYNTAX_RBRACE): Change to
+ be context rather than basic syntax categories.
+ (M4_SYNTAX_MASKS): Adjust macro.
+ * m4/m4private.h (struct m4_syntax_table): Add dollar and
+ is_single_dollar members.
+ (m4_is_syntax_single_dollar): Add fast alternative.
+ * m4/syntax.c (m4_syntax_create, reset_syntax_set): Adjust to
+ account for change to context categories.
+ (m4_set_syntax): Manage is_single_dollar.
+ (m4_is_syntax_single_dollar): New function.
+ * m4/macro.c (locate_dollar): New helper function.
+ (process_macro): Use it to speed up macro expansion.
+ * m4/input.c (m4__next_token): Adjust client.
+ * doc/m4.texinfo (Changesyntax): Document this.
+
2009-02-17 Eric Blake <ebb9@byu.net>
Sync changecom documentation with branch.
diff --git a/doc/m4.texinfo b/doc/m4.texinfo
index 815591d7..e3eee251 100644
--- a/doc/m4.texinfo
+++ b/doc/m4.texinfo
@@ -5410,8 +5410,8 @@ stream into tokens.
Each @var{syntax-spec} is a two-part string. The first part is a
command, consisting of a single character describing a syntax category,
and an optional one-character action. The action can be @samp{-} to
-remove the listed characters from that category and reassign them to the
-`Other' category, @samp{=} to set the category to the listed characters
+remove the listed characters from that category, @samp{=} to set the
+category to the listed characters
and reassign all other characters previously in that category to
`Other', or @samp{+} to add the listed characters to the category
without affecting other characters. If an action is not specified, but
@@ -5495,18 +5495,17 @@ comment delimiters, use @code{changecom} (@pxref{Changecom}). Note that
newline also defaults to the syntax category `White space', when it
appears in isolation.
-@comment FIXME - make ${} context, not basic
-@item @kbd{$} @tab @dfn{Dollar} @tab Basic
+@item @kbd{$} @tab @dfn{Dollar} @tab Context
@tab Characters that can introduce an argument reference in the body of a
macro. The default is the single character @samp{$}.
@comment FIXME - implement ${10} argument parsing.
-@item @kbd{@{} @tab @dfn{Left brace} @tab Basic
+@item @kbd{@{} @tab @dfn{Left brace} @tab Context
@tab Characters that introduce an extended argument reference in the body of
a macro immediately after a character in the Dollar category. The
default is the single character @samp{@{}.
-@item @kbd{@}} @tab @dfn{Right brace} @tab Basic
+@item @kbd{@}} @tab @dfn{Right brace} @tab Context
@tab Characters that conclude an extended argument reference in the body of a
macro. The default is the single character @samp{@}}.
@@ -5612,7 +5611,8 @@ c)
@end example
It is possible to redefine the @samp{$} used to indicate macro arguments
-in user defined macros.
+in user defined macros. Dollar class syntax elements are copied to the
+output if there is no valid expansion.
@example
define(`argref', `Dollar: $#, Question: ?#')
@@ -5620,22 +5620,12 @@ define(`argref', `Dollar: $#, Question: ?#')
argref(1, 2, 3)
@result{}Dollar: 3, Question: ?#
dnl Change argument identifier.
-changesyntax(`$?', `O$')
+changesyntax(`$?')
@result{}
argref(1,2,3)
@result{}Dollar: $#, Question: 3
-@end example
-
-@noindent
-Dollar class syntax elements are copied to the output if there is no
-valid expansion.
-
-@example
define(`escape', `$?`'1$?1?')
@result{}
-dnl Change argument identifier.
-changesyntax(`$?')
-@result{}
escape(foo)
@result{}$?1$foo?
@end example
diff --git a/m4/input.c b/m4/input.c
index 0fb41015..e061bcd7 100644
--- a/m4/input.c
+++ b/m4/input.c
@@ -2067,9 +2067,7 @@ m4__next_token (m4 *context, m4_symbol_value *token, int *line,
{ /* EVERYTHING ELSE */
assert (ch < CHAR_EOF);
obstack_1grow (&token_stack, ch);
- if (m4_has_syntax (M4SYNTAX, ch,
- (M4_SYNTAX_OTHER | M4_SYNTAX_NUM | M4_SYNTAX_DOLLAR
- | M4_SYNTAX_LBRACE | M4_SYNTAX_RBRACE)))
+ if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_OTHER | M4_SYNTAX_NUM))
{
if (obs)
{
@@ -2078,9 +2076,7 @@ m4__next_token (m4 *context, m4_symbol_value *token, int *line,
}
if (m4__safe_quotes (M4SYNTAX))
consume_syntax (context, obs_safe,
- (M4_SYNTAX_OTHER | M4_SYNTAX_NUM
- | M4_SYNTAX_DOLLAR | M4_SYNTAX_LBRACE
- | M4_SYNTAX_RBRACE));
+ M4_SYNTAX_OTHER | M4_SYNTAX_NUM);
type = M4_TOKEN_STRING;
}
else if (m4_has_syntax (M4SYNTAX, ch, M4_SYNTAX_SPACE))
diff --git a/m4/m4module.h b/m4/m4module.h
index c94f56ab..a16ca4f4 100644
--- a/m4/m4module.h
+++ b/m4/m4module.h
@@ -454,6 +454,7 @@ extern const m4_string_pair *m4_get_syntax_comments (m4_syntax_table *);
extern bool m4_is_syntax_single_quotes (m4_syntax_table *);
extern bool m4_is_syntax_single_comments (m4_syntax_table *);
+extern bool m4_is_syntax_single_dollar (m4_syntax_table *);
extern bool m4_is_syntax_macro_escaped (m4_syntax_table *);
/* These are values to be assigned to syntax table entries. Although
@@ -466,26 +467,28 @@ enum {
M4_SYNTAX_ALPHA = 1 << 1,
M4_SYNTAX_LQUOTE = 1 << 2,
M4_SYNTAX_BCOMM = 1 << 3,
- M4_SYNTAX_OTHER = 1 << 4,
+ M4_SYNTAX_ACTIVE = 1 << 4,
M4_SYNTAX_NUM = 1 << 5,
- M4_SYNTAX_DOLLAR = 1 << 6,
- M4_SYNTAX_LBRACE = 1 << 7,
- M4_SYNTAX_RBRACE = 1 << 8,
- M4_SYNTAX_SPACE = 1 << 9,
- M4_SYNTAX_ACTIVE = 1 << 10,
- M4_SYNTAX_OPEN = 1 << 11,
- M4_SYNTAX_CLOSE = 1 << 12,
- M4_SYNTAX_COMMA = 1 << 13,
+ M4_SYNTAX_SPACE = 1 << 6,
+ M4_SYNTAX_OPEN = 1 << 7,
+ M4_SYNTAX_CLOSE = 1 << 8,
+ M4_SYNTAX_COMMA = 1 << 9,
+ M4_SYNTAX_OTHER = 1 << 10,
/* These values are bit masks to OR with categories above, a syntax entry
may have any number of these in addition to a maximum of one of the
values above. */
+ M4_SYNTAX_DOLLAR = 1 << 11,
+ M4_SYNTAX_LBRACE = 1 << 12,
+ M4_SYNTAX_RBRACE = 1 << 13,
M4_SYNTAX_RQUOTE = 1 << 14,
M4_SYNTAX_ECOMM = 1 << 15
};
/* Mask of attribute syntax categories. */
-#define M4_SYNTAX_MASKS (M4_SYNTAX_RQUOTE | M4_SYNTAX_ECOMM)
+#define M4_SYNTAX_MASKS (M4_SYNTAX_RQUOTE | M4_SYNTAX_ECOMM \
+ | M4_SYNTAX_DOLLAR | M4_SYNTAX_LBRACE \
+ | M4_SYNTAX_RBRACE)
/* Mask of basic syntax categories where any change requires a
recomputation of the overall syntax characteristics. */
#define M4_SYNTAX_SUSPECT (M4_SYNTAX_LQUOTE | M4_SYNTAX_BCOMM \
diff --git a/m4/m4private.h b/m4/m4private.h
index 4f269796..b0cbe963 100644
--- a/m4/m4private.h
+++ b/m4/m4private.h
@@ -472,12 +472,17 @@ struct m4_syntax_table {
m4_string_pair quote; /* Quote delimiters. */
m4_string_pair comm; /* Comment delimiters. */
+ char dollar; /* Dollar character, if is_single_dollar. */
+
/* True iff only one start and end quote delimiter exist. */
bool_bitfield is_single_quotes : 1;
/* True iff only one start and end comment delimiter exist. */
bool_bitfield is_single_comments : 1;
+ /* True iff only one byte has M4_SYNTAX_DOLLAR. */
+ bool_bitfield is_single_dollar : 1;
+
/* True iff some character has M4_SYNTAX_ESCAPE. */
bool_bitfield is_macro_escaped : 1;
@@ -518,6 +523,7 @@ struct m4_syntax_table {
# define m4_is_syntax_single_quotes(S) ((S)->is_single_quotes)
# define m4_is_syntax_single_comments(S) ((S)->is_single_comments)
+# define m4_is_syntax_single_dollar(S) ((S)->is_single_dollar)
# define m4_is_syntax_macro_escaped(S) ((S)->is_macro_escaped)
#endif
diff --git a/m4/macro.c b/m4/macro.c
index 738c8cad..7295157f 100644
--- a/m4/macro.c
+++ b/m4/macro.c
@@ -688,6 +688,22 @@ m4_macro_call (m4 *context, m4_symbol_value *value, m4_obstack *expansion,
trace_post (context, trace_start, argv->info);
}
+/* Locate the next byte with dollar syntax in string STR of length
+ LEN, or return NULL. */
+static const char *
+locate_dollar (m4 *context, const char *str, size_t len)
+{
+ if (m4_is_syntax_single_dollar (M4SYNTAX))
+ return (char *) memchr (str, M4SYNTAX->dollar, len);
+ while (len--)
+ {
+ if (m4_has_syntax (M4SYNTAX, *str, M4_SYNTAX_DOLLAR))
+ return str;
+ str++;
+ }
+ return NULL;
+}
+
/* This function handles all expansion of user defined and predefined
macros. It is called with an obstack OBS, where the macros expansion
will be placed, as an unfinished object. SYMBOL points to the macro
@@ -700,19 +716,17 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs,
const char *text = m4_get_symbol_value_text (value);
size_t len = m4_get_symbol_value_len (value);
int i;
+ const char *dollar = locate_dollar (context, text, len);
- while (len--)
+ while (dollar)
{
- char ch;
-
- if (!m4_has_syntax (M4SYNTAX, *text, M4_SYNTAX_DOLLAR) || !len)
- {
- obstack_1grow (obs, *text);
- text++;
- continue;
- }
- ch = *text++;
- switch (*text)
+ obstack_grow (obs, text, dollar - text);
+ len -= dollar - text;
+ text = dollar;
+ if (len == 1)
+ break;
+ len--;
+ switch (*++text)
{
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
@@ -753,7 +767,7 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs,
if (m4_get_posixly_correct_opt (context)
|| !VALUE_ARG_SIGNATURE (value))
{
- obstack_1grow (obs, ch);
+ obstack_1grow (obs, *dollar);
}
else
{
@@ -796,11 +810,12 @@ process_macro (m4 *context, m4_symbol_value *value, m4_obstack *obs,
text = endp;
free (key);
- break;
}
break;
}
+ dollar = locate_dollar (context, text, len);
}
+ obstack_grow (obs, text, len);
}
diff --git a/m4/syntax.c b/m4/syntax.c
index 09490555..d26c0b89 100644
--- a/m4/syntax.c
+++ b/m4/syntax.c
@@ -29,7 +29,7 @@
The input is read character by character and grouped together
according to a syntax table. The character groups are (definitions
- are all in m4.h, those marked with a * are not yet in use):
+ are all in m4module.h, those marked with a * are not yet in use):
Basic (all characters fall in one of these mutually exclusive bins)
M4_SYNTAX_IGNORE *Character to be deleted from input as if not present
@@ -38,9 +38,6 @@
M4_SYNTAX_OPEN Open list of macro arguments
M4_SYNTAX_CLOSE Close list of macro arguments
M4_SYNTAX_COMMA Separates macro arguments
- M4_SYNTAX_DOLLAR Indicates macro argument in user macros
- M4_SYNTAX_LBRACE Indicates start of extended macro argument
- M4_SYNTAX_RBRACE Indicates end of extended macro argument
M4_SYNTAX_ACTIVE This character is a macro name by itself
M4_SYNTAX_ESCAPE Use this character to prefix all macro names
@@ -53,6 +50,9 @@
Attribute (these are context sensitive, and exist in addition to basic)
M4_SYNTAX_RQUOTE A single character right quote
M4_SYNTAX_ECOMM A single character end comment delimiter
+ M4_SYNTAX_DOLLAR Indicates macro argument in user macros
+ M4_SYNTAX_LBRACE *Indicates start of extended macro argument
+ M4_SYNTAX_RBRACE *Indicates end of extended macro argument
Besides adding new facilities, the use of a syntax table will reduce
the number of calls to next_token (). Now groups of OTHER, NUM and
@@ -80,10 +80,7 @@
M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM
M4_SYNTAX_OTHER } Reads all M4_SYNTAX_OTHER, M4_SYNTAX_NUM
- M4_SYNTAX_NUM } M4_SYNTAX_DOLLAR, M4_SYNTAX_LBRACE, M4_SYNTAX_RBRACE
- M4_SYNTAX_DOLLAR }
- M4_SYNTAX_LBRACE }
- M4_SYNTAX_RBRACE }
+ M4_SYNTAX_NUM }
M4_SYNTAX_SPACE Reads all M4_SYNTAX_SPACE, depending on buffering
M4_SYNTAX_ACTIVE Returns a single char as a macro name
@@ -92,12 +89,10 @@
M4_SYNTAX_CLOSE }
M4_SYNTAX_COMMA }
- The $, {, and } are not really a part of m4's input syntax, because a
- a string is parsed equally whether there is a $ or not. These characters
- are instead used during user macro expansion.
-
-
- M4_SYNTAX_RQUOTE and M4_SYNTAX_ECOMM do not start tokens.
+ M4_SYNTAX_RQUOTE and M4_SYNTAX_ECOMM are context-sensitive, and
+ close out M4_SYNTAX_LQUOTE and M4_SYNTAX_BCOMM, respectively.
+ Also, M4_SYNTAX_DOLLAR, M4_SYNTAX_LBRACE, and M4_SYNTAX_RBRACE are
+ context-sensitive, only mattering when expanding macro definitions.
There are several optimizations that can be performed depending on
known states of the syntax table. For example, when searching for
@@ -125,7 +120,8 @@ m4_syntax_create (void)
m4_syntax_table *syntax = (m4_syntax_table *) xzalloc (sizeof *syntax);
int ch;
- /* Set up default table. This table never changes during operation. */
+ /* Set up default table. This table never changes during operation,
+ and contains no context attributes. */
for (ch = UCHAR_MAX + 1; --ch >= 0; )
switch (ch)
{
@@ -138,25 +134,12 @@ m4_syntax_create (void)
case ',':
syntax->orig[ch] = M4_SYNTAX_COMMA;
break;
- case '$':
- syntax->orig[ch] = M4_SYNTAX_DOLLAR;
- break;
- case '{':
- syntax->orig[ch] = M4_SYNTAX_LBRACE;
- break;
- case '}':
- syntax->orig[ch] = M4_SYNTAX_RBRACE;
- break;
case '`':
syntax->orig[ch] = M4_SYNTAX_LQUOTE;
break;
case '#':
syntax->orig[ch] = M4_SYNTAX_BCOMM;
break;
- case '\0':
- /* FIXME - revisit the ignore syntax attribute. */
- /* syntax->orig[ch] = M4_SYNTAX_IGNORE; */
- /* break; */
default:
if (isspace (ch))
syntax->orig[ch] = M4_SYNTAX_SPACE;
@@ -196,24 +179,25 @@ m4_syntax_code (char ch)
switch (ch)
{
- /* Sorted according to the order of M4_SYNTAX_* in m4module.h. */
- /* FIXME - revisit the ignore syntax attribute. */
+ /* Sorted according to the order of M4_SYNTAX_* in m4module.h. */
+ /* FIXME - revisit the ignore syntax attribute. */
case 'I': case 'i': code = M4_SYNTAX_IGNORE; break;
+ /* Basic categories. */
case '@': code = M4_SYNTAX_ESCAPE; break;
case 'W': case 'w': code = M4_SYNTAX_ALPHA; break;
case 'L': case 'l': code = M4_SYNTAX_LQUOTE; break;
case 'B': case 'b': code = M4_SYNTAX_BCOMM; break;
- case 'O': case 'o': code = M4_SYNTAX_OTHER; break;
+ case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break;
case 'D': case 'd': code = M4_SYNTAX_NUM; break;
- case '$': code = M4_SYNTAX_DOLLAR; break;
- case '{': code = M4_SYNTAX_LBRACE; break;
- case '}': code = M4_SYNTAX_RBRACE; break;
case 'S': case 's': code = M4_SYNTAX_SPACE; break;
- case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break;
case '(': code = M4_SYNTAX_OPEN; break;
case ')': code = M4_SYNTAX_CLOSE; break;
case ',': code = M4_SYNTAX_COMMA; break;
-
+ case 'O': case 'o': code = M4_SYNTAX_OTHER; break;
+ /* Context categories. */
+ case '$': code = M4_SYNTAX_DOLLAR; break;
+ case '{': code = M4_SYNTAX_LBRACE; break;
+ case '}': code = M4_SYNTAX_RBRACE; break;
case 'R': case 'r': code = M4_SYNTAX_RQUOTE; break;
case 'E': case 'e': code = M4_SYNTAX_ECOMM; break;
@@ -344,6 +328,27 @@ reset_syntax_set (m4_syntax_table *syntax, int code)
else
remove_syntax_attribute (syntax, ch, code);
}
+ else if (code == M4_SYNTAX_DOLLAR)
+ {
+ if (ch == '$')
+ add_syntax_attribute (syntax, ch, code);
+ else
+ remove_syntax_attribute (syntax, ch, code);
+ }
+ else if (code == M4_SYNTAX_LBRACE)
+ {
+ if (ch == '{')
+ add_syntax_attribute (syntax, ch, code);
+ else
+ remove_syntax_attribute (syntax, ch, code);
+ }
+ else if (code == M4_SYNTAX_RBRACE)
+ {
+ if (ch == '}')
+ add_syntax_attribute (syntax, ch, code);
+ else
+ remove_syntax_attribute (syntax, ch, code);
+ }
else if (syntax->orig[ch] == code || m4_has_syntax (syntax, ch, code))
add_syntax_attribute (syntax, ch, syntax->orig[ch]);
}
@@ -371,12 +376,17 @@ m4_reset_syntax (m4_syntax_table *syntax)
syntax->comm.len1 = 1;
syntax->comm.str2 = xmemdup0 (DEF_ECOMM, 1);
syntax->comm.len2 = 1;
+ syntax->dollar = '$';
add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE);
add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM);
+ add_syntax_attribute (syntax, '$', M4_SYNTAX_DOLLAR);
+ add_syntax_attribute (syntax, '{', M4_SYNTAX_LBRACE);
+ add_syntax_attribute (syntax, '}', M4_SYNTAX_RBRACE);
syntax->is_single_quotes = true;
syntax->is_single_comments = true;
+ syntax->is_single_dollar = true;
syntax->is_macro_escaped = false;
set_quote_age (syntax, true, false);
}
@@ -428,6 +438,7 @@ m4_set_syntax (m4_syntax_table *syntax, char key, char action,
int ecomm = -1;
bool single_quote_possible = true;
bool single_comm_possible = true;
+ int dollar = -1;
if (m4_has_syntax (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE))
{
assert (syntax->quote.len1 == 1);
@@ -448,6 +459,7 @@ m4_set_syntax (m4_syntax_table *syntax, char key, char action,
assert (syntax->comm.len2 == 1);
ecomm = to_uchar (syntax->comm.str2[0]);
}
+ syntax->is_single_dollar = false;
syntax->is_macro_escaped = false;
/* Find candidates for each category. */
for (ch = UCHAR_MAX + 1; --ch >= 0; )
@@ -480,6 +492,16 @@ m4_set_syntax (m4_syntax_table *syntax, char key, char action,
else if (ecomm != ch)
single_comm_possible = false;
}
+ if (m4_has_syntax (syntax, ch, M4_SYNTAX_DOLLAR))
+ {
+ if (dollar == -1)
+ {
+ syntax->dollar = dollar = ch;
+ syntax->is_single_dollar = true;
+ }
+ else
+ syntax->is_single_dollar = false;
+ }
if (m4_has_syntax (syntax, ch, M4_SYNTAX_ESCAPE))
syntax->is_macro_escaped = true;
}
@@ -915,6 +937,14 @@ m4_is_syntax_single_comments (m4_syntax_table *syntax)
return syntax->is_single_comments;
}
+#undef m4_is_syntax_single_dollar
+bool
+m4_is_syntax_single_dollar (m4_syntax_table *syntax)
+{
+ assert (syntax);
+ return syntax->is_single_dollar;
+}
+
#undef m4_is_syntax_macro_escaped
bool
m4_is_syntax_macro_escaped (m4_syntax_table *syntax)