summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGary V. Vaughan <gary@gnu.org>2002-05-29 18:06:42 +0000
committerEric Blake <ebb9@byu.net>2007-10-05 21:58:22 -0600
commitde11cb28a57e7d1196a587f24dc83f1b492570b3 (patch)
tree7b76cf261009fc6bbcb298ee21c67e7bf1dd2b78
parent86b1ec4e3585a53151d88f754a342819d3dc6b06 (diff)
downloadm4-de11cb28a57e7d1196a587f24dc83f1b492570b3.tar.gz
Preparations for refactoring syntax tables to allow reverse
lookups [fetch me a M4_SYNTAX_OPEN], without compromising the speed of normal lookups in an array of unsigned short.
-rw-r--r--ChangeLog22
-rw-r--r--m4/Makefile.am2
-rw-r--r--m4/input.c297
-rw-r--r--m4/m4module.h68
-rw-r--r--m4/m4private.h9
-rw-r--r--m4/syntax.c300
-rw-r--r--src/main.c1
7 files changed, 385 insertions, 314 deletions
diff --git a/ChangeLog b/ChangeLog
index 9b552735..0d2ea823 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,27 @@
2002-05-29 Gary V. Vaughan <gary@gnu.org>
+ Preparations for refactoring syntax tables to allow reverse
+ lookups [fetch me a M4_SYNTAX_OPEN], without compromising the
+ speed of normal lookups in an array of unsigned short.
+
+ * m4/input.c (single_quotes, single_comments, use_macro_escape):
+ Moved from here...
+ * m4/m4private.h (m4__single_quotes, m4__single_comments,
+ m4__use_macro_escape): ...to here, and renamed. The `m4__' prefix
+ is for internal symbols which unavoidably pollute the global
+ namespace, but are not published APIs.
+ Adjusted all callers.
+ * m4/input.c (m4_syntax_init, m4_syntax_code, m4_set_quotes,
+ m4_set_comment, m4_set_syntax, set_syntax_internal,
+ unset_syntax_attribute): Moved from this file...
+ * m4/syntax.c: New. ...to this file.
+ Also added an m4_syntax_exit stub for orthogonality.
+ * src/main.c (main): Use it.
+ * m4/Makefile.am (libm4_la_SOURCES): Add syntax.c.
+ * m4/m4module.h: Reformatting. New prototypes.
+
+2002-05-29 Gary V. Vaughan <gary@gnu.org>
+
* bootstrap (aclocal_apiversion): The aclocal apiversion is
distinct from the automake release number (in that the apiversion
apparently has no micro-version-component).
diff --git a/m4/Makefile.am b/m4/Makefile.am
index 85eb91eb..48656011 100644
--- a/m4/Makefile.am
+++ b/m4/Makefile.am
@@ -38,7 +38,7 @@ EXTRA_DIST = $(EXTRA_HEADERS) obstack.c
lib_LTLIBRARIES = libm4.la
libm4_la_SOURCES = builtin.c debug.c error.c hash.c \
input.c ltdl.c macro.c module.c output.c \
- path.c regex.c symtab.c utility.c
+ path.c regex.c symtab.c syntax.c utility.c
libm4_la_LIBADD = $(LTLIBOBJS) $(LIBADD_DL)
module.o module.lo: $(srcdir)/module.c pathconf.h
diff --git a/m4/input.c b/m4/input.c
index fd43d823..2f8425ac 100644
--- a/m4/input.c
+++ b/m4/input.c
@@ -26,9 +26,6 @@
#define DEBUG_INPUT
#undef DEBUG_INPUT
-#define DEBUG_SYNTAX
-#undef DEBUG_SYNTAX
-
/*
Unread input can be either files, that should be read (eg. included
files), strings, which should be rescanned (eg. macro expansion
@@ -67,77 +64,8 @@
for use by the error handling functions in m4.c. Whenever a file
input_block is pushed, the current file name and line number is saved
in the input_block, and the two variables are reset to match the new
- input file.
-
- THE SYNTAX TABLE
-
- The input is read character by character and grouped together
- according to a syntax table. The character groups are (definitions
- are all in m4.h, those marked with a * are not yet in use):
-
- M4_SYNTAX_IGNORE *Character to be deleted from input as if not present
- M4_SYNTAX_OTHER Any character with no special meaning to m4
- M4_SYNTAX_SPACE Whitespace (ignored when leading macro arguments)
- M4_SYNTAX_OPEN Open list of macro arguments
- M4_SYNTAX_CLOSE Close list of macro arguments
- M4_SYNTAX_COMMA Separates macro arguments
- M4_SYNTAX_DOLLAR *Indicates macro argument in user macros
- M4_SYNTAX_ACTIVE This caracter is a macro name by itself
-
- M4_SYNTAX_ESCAPE Use this character to prefix all macro names
- M4_SYNTAX_ALPHA Alphabetic characters (can start macro names)
- M4_SYNTAX_NUM Numeric characters
- M4_SYNTAX_ALNUM Alphanumeric characters (can form macro names)
-
- (These are bit masks)
- M4_SYNTAX_LQUOTE A single characters left quote
- M4_SYNTAX_RQUOTE A single characters right quote
- M4_SYNTAX_BCOMM A single characters begin comment delimiter
- M4_SYNTAX_ECOMM A single characters end comment delimiter
-
- Besides adding new facilities, the use of a syntax table will reduce
- the number of calls to next_token (). Now groups of OTHER, NUM and
- SPACE characters can be returned as a single token, since next_token
- () knows they have no special syntactical meaning to m4. This is,
- however, only possible if only single character quotes comments
- comments are used, because otherwise the quote and comment characters
- will not show up in the syntax-table.
-
- Having a syntax table allows new facilities. The new builtin
- "changesyntax" allows the the user to change the category of any
- character.
-
- Default '\n' is both ECOMM and SPACE, depending on the context. To
- solve the problem of quotes and comments that have diffent syntax
- code based on the context, the [LR]QUOTE and [BE]COMM codes are bit
- masks to add to an ordinary code. If a character is made a quote it
- will be recognised if the basis code does not have precedence.
-
- When changing quotes and comment delimiters only the bits are
- removed, and the characters are therefore reverted to its old
- category code.
-
- The precedence as implemented by next_token () is:
-
- M4_SYNTAX_IGNORE *Filtered out below next_token ()
- M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM
- M4_SYNTAX_ESCAPE Reads macro name iff set, else next
- M4_SYNTAX_ALPHA Reads macro name
- M4_SYNTAX_LQUOTE Reads all until balanced M4_SYNTAX_RQUOTE
-
- M4_SYNTAX_OTHER and M4_SYNTAX_NUM
- Reads all M4_SYNTAX_OTHER and M4_SYNTAX_NUM
- M4_SYNTAX_SPACE Reads all M4_SYNTAX_SPACE
- M4_SYNTAX_ACTIVE Returns a single char as a word
- the rest Returned as a single char
-
- M4_SYNTAX_DOLLAR is not currently used. The character $ is treated as a
- M4_SYNTAX_OTHER. It could be done, but it will slow next_token () down
- a bit. The $ is not really a part of m4's input syntax in the sense
- that a string is parsed equally whether there is a $ or not. The
- character $ is used by convention in user macros. */
-
-static void check_use_macro_escape (void);
+ input file. */
+
static int file_peek (void);
static int file_read (void);
static void file_unget (int ch);
@@ -148,14 +76,12 @@ static int macro_read (void);
static int match_input (const unsigned char *s);
static int next_char (void);
static void pop_input (void);
-static void set_syntax_internal (int code, int ch);
static int single_peek (void);
static int single_read (void);
static int string_peek (void);
static int string_read (void);
static void string_unget (int ch);
static void unget_input (int ch);
-static void unset_syntax_attribute (int code, int ch);
struct input_funcs
{
@@ -243,22 +169,10 @@ static input_block *next;
/* Flag for next_char () to increment m4_current_line. */
static boolean start_of_input_line;
-/* Input syntax table */
-/* unsigned short syntax_table[256]; moved to m4module.c. */
-
#define CHAR_EOF 256 /* character return on EOF */
#define CHAR_MACRO 257 /* character return for MACRO token */
#define CHAR_RETRY 258 /* character return for end of input block */
-/* TRUE iff strlen(rquote) == strlen(lquote) == 1 */
-static boolean single_quotes;
-
-/* TRUE iff strlen(bcomm) == strlen(ecomm) == 1 */
-static boolean single_comments;
-
-/* TRUE iff some character has M4_SYNTAX_ESCAPE */
-static boolean use_macro_escape;
-
/* push_file () pushes an input file on the input stack, saving the
@@ -760,9 +674,6 @@ match_input (const unsigned char *s)
/* Inititialise input stacks, and quote/comment characters. */
-static void set_syntax_internal (int code, int ch);
-static void unset_syntax_attribute (int code, int ch);
-
void
m4_input_init (void)
{
@@ -788,15 +699,15 @@ m4_input_init (void)
lquote.length = strlen (lquote.string);
rquote.string = xstrdup (DEF_RQUOTE);
rquote.length = strlen (rquote.string);
- single_quotes = TRUE;
+ m4__single_quotes = TRUE;
bcomm.string = xstrdup (DEF_BCOMM);
bcomm.length = strlen (bcomm.string);
ecomm.string = xstrdup (DEF_ECOMM);
ecomm.length = strlen (ecomm.string);
- single_comments = TRUE;
+ m4__single_comments = TRUE;
- use_macro_escape = FALSE;
+ m4__use_macro_escape = FALSE;
}
void
@@ -811,198 +722,8 @@ m4_input_exit (void)
obstack_free (&token_stack, NULL);
}
-void
-m4_syntax_init (void)
-{
- int ch;
-
- for (ch = 256; --ch > 0;)
- {
- if (ch == '(')
- set_syntax_internal (M4_SYNTAX_OPEN, ch);
- else if (ch == ')')
- set_syntax_internal (M4_SYNTAX_CLOSE, ch);
- else if (ch == ',')
- set_syntax_internal (M4_SYNTAX_COMMA, ch);
- else if (isspace (ch))
- set_syntax_internal (M4_SYNTAX_SPACE, ch);
- else if (isalpha (ch) || ch == '_')
- set_syntax_internal (M4_SYNTAX_ALPHA, ch);
- else if (isdigit (ch))
- set_syntax_internal (M4_SYNTAX_NUM, ch);
- else
- set_syntax_internal (M4_SYNTAX_OTHER, ch);
- }
- /* set_syntax_internal(M4_SYNTAX_IGNORE, 0); */
-
- /* Default quotes and comment delimiters are always one char */
- set_syntax_internal (M4_SYNTAX_LQUOTE, lquote.string[0]);
- set_syntax_internal (M4_SYNTAX_RQUOTE, rquote.string[0]);
- set_syntax_internal (M4_SYNTAX_BCOMM, bcomm.string[0]);
- set_syntax_internal (M4_SYNTAX_ECOMM, ecomm.string[0]);
-}
-
-int
-m4_syntax_code (char ch)
-{
- int code;
-
- switch (ch)
- {
- case 'I': case 'i': code = M4_SYNTAX_IGNORE; break;
- case 'O': case 'o': code = M4_SYNTAX_OTHER; break;
- case 'S': case 's': code = M4_SYNTAX_SPACE; break;
- case 'W': case 'w': code = M4_SYNTAX_ALPHA; break;
- case 'D': case 'd': code = M4_SYNTAX_NUM; break;
-
- case '(': code = M4_SYNTAX_OPEN; break;
- case ')': code = M4_SYNTAX_CLOSE; break;
- case ',': code = M4_SYNTAX_COMMA; break;
- case '@': code = M4_SYNTAX_ESCAPE; break;
-#if 0 /* not yet used */
- case '$': code = M4_SYNTAX_DOLLAR; break;
-#endif
-
- case 'L': case 'l': code = M4_SYNTAX_LQUOTE; break;
- case 'R': case 'r': code = M4_SYNTAX_RQUOTE; break;
- case 'B': case 'b': code = M4_SYNTAX_BCOMM; break;
- case 'E': case 'e': code = M4_SYNTAX_ECOMM; break;
- case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break;
-
- default: code = -1; break;
- }
-
- return code;
-}
-
-static void
-check_use_macro_escape (void)
-{
- int ch;
-
- use_macro_escape = FALSE;
- for (ch = 256; --ch >= 0; )
- if (M4_IS_ESCAPE (ch))
- use_macro_escape = TRUE;
-}
-
-/* Functions for setting quotes and comment delimiters. Used by
- m4_changecom () and m4_changequote (). Both functions overrides the
- syntax_table to maintain compatibility. */
-void
-m4_set_quotes (const char *lq, const char *rq)
-{
- int ch;
- for (ch = 256; --ch >= 0;) /* changequote overrides syntax_table */
- if (M4_IS_LQUOTE (ch) || M4_IS_RQUOTE (ch))
- unset_syntax_attribute (M4_SYNTAX_LQUOTE | M4_SYNTAX_RQUOTE, ch);
-
- xfree (lquote.string);
- xfree (rquote.string);
-
- lquote.string = xstrdup (lq ? lq : DEF_LQUOTE);
- lquote.length = strlen (lquote.string);
- rquote.string = xstrdup (rq ? rq : DEF_RQUOTE);
- rquote.length = strlen (rquote.string);
-
- single_quotes = (lquote.length == 1 && rquote.length == 1);
-
- if (single_quotes)
- {
- set_syntax_internal (M4_SYNTAX_LQUOTE, lquote.string[0]);
- set_syntax_internal (M4_SYNTAX_RQUOTE, rquote.string[0]);
- }
-
- if (use_macro_escape)
- check_use_macro_escape ();
-}
-
-void
-m4_set_comment (const char *bc, const char *ec)
-{
- int ch;
- for (ch = 256; --ch >= 0;) /* changecom overrides syntax_table */
- if (M4_IS_BCOMM (ch) || M4_IS_ECOMM (ch))
- unset_syntax_attribute (M4_SYNTAX_BCOMM | M4_SYNTAX_ECOMM, ch);
-
- xfree (bcomm.string);
- xfree (ecomm.string);
-
- bcomm.string = xstrdup (bc ? bc : DEF_BCOMM);
- bcomm.length = strlen (bcomm.string);
- ecomm.string = xstrdup (ec ? ec : DEF_ECOMM);
- ecomm.length = strlen (ecomm.string);
-
- single_comments = (bcomm.length == 1 && ecomm.length == 1);
-
- if (single_comments)
- {
- set_syntax_internal (M4_SYNTAX_BCOMM, bcomm.string[0]);
- set_syntax_internal (M4_SYNTAX_ECOMM, ecomm.string[0]);
- }
-
- if (use_macro_escape)
- check_use_macro_escape ();
-}
-
-/* Functions to manipulate the syntax table. */
-static void
-set_syntax_internal (int code, int ch)
-{
- if (code & M4_SYNTAX_MASKS)
- m4_syntax_table[ch] |= code;
- else
- m4_syntax_table[ch] = code;
-
-#ifdef DEBUG_SYNTAX
- fprintf(stderr, "Set syntax %o %c = %04X\n",
- ch, isprint(ch) ? ch : '-',
- m4_syntax_table[ch]);
-#endif
-}
-
-static void
-unset_syntax_attribute (int code, int ch)
-{
- if (code & M4_SYNTAX_MASKS)
- m4_syntax_table[ch] &= ~code;
-
-#ifdef DEBUG_SYNTAX
- fprintf(stderr, "Unset syntax %o %c = %04X\n",
- ch, isprint(ch) ? ch : '-',
- m4_syntax_table[ch]);
-#endif
-}
-
-void
-m4_set_syntax (char key, const unsigned char *chars)
-{
- int ch, code;
-
- code = m4_syntax_code (key);
-
- if ((code < 0) && (key != '\0'))
- {
- M4ERROR ((warning_status, 0,
- _("Undefined syntax code %c"), key));
- return;
- }
-
- if (*chars != '\0')
- while ((ch = *chars++))
- set_syntax_internal (code, ch);
- else
- for (ch = 256; --ch > 0; )
- set_syntax_internal (code, ch);
-
- if (use_macro_escape || code == M4_SYNTAX_ESCAPE)
- check_use_macro_escape();
-}
-
-
-
/* Parse and return a single token from the input stream. A token can
either be TOKEN_EOF, if the input_stack is empty; it can be TOKEN_STRING
for a quoted string; TOKEN_WORD for something that is a potential macro
@@ -1056,7 +777,7 @@ m4_next_token (m4_token *td)
type = discard_comments ? M4_TOKEN_NONE : M4_TOKEN_STRING;
}
/* COMMENT, LONGER DELIM */
- else if (!single_comments && MATCH (ch, bcomm.string))
+ else if (!m4__single_comments && MATCH (ch, bcomm.string))
{
obstack_grow (&token_stack, bcomm.string, bcomm.length);
while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm.string))
@@ -1103,7 +824,7 @@ m4_next_token (m4_token *td)
if (ch != CHAR_EOF)
unget_input(ch);
- type = use_macro_escape ? M4_TOKEN_STRING : M4_TOKEN_WORD;
+ type = m4__use_macro_escape ? M4_TOKEN_STRING : M4_TOKEN_WORD;
}
else if (M4_IS_LQUOTE(ch)) /* QUOTED STRING, SINGLE QUOTES */
{
@@ -1135,7 +856,7 @@ m4_next_token (m4_token *td)
type = M4_TOKEN_STRING;
}
/* QUOTED STRING, LONGER QUOTES */
- else if (!single_quotes && MATCH (ch, lquote.string))
+ else if (!m4__single_quotes && MATCH (ch, lquote.string))
{
const char *current_file = m4_current_file;
int current_line = m4_current_line;
@@ -1163,7 +884,7 @@ m4_next_token (m4_token *td)
}
type = M4_TOKEN_STRING;
}
- else if (single_quotes && single_comments) /* EVERYTHING ELSE */
+ else if (m4__single_quotes && m4__single_comments) /* EVERYTHING ELSE */
{
obstack_1grow (&token_stack, ch);
diff --git a/m4/m4module.h b/m4/m4module.h
index 741ea86c..cd9b9c58 100644
--- a/m4/m4module.h
+++ b/m4/m4module.h
@@ -57,6 +57,12 @@ typedef struct {
} m4_builtin;
+
+/* --- MODULE MANAGEMENT --- */
+
+typedef void m4_module_init_func (lt_dlhandle, struct obstack*);
+typedef void m4_module_finish_func (lt_dlhandle, struct obstack*);
+
extern void m4_module_init (void);
extern lt_dlhandle m4_module_load (const char*, struct obstack*);
extern void m4_module_unload (const char*, struct obstack*);
@@ -72,6 +78,9 @@ extern m4_macro *m4_module_macros (lt_dlhandle);
extern lt_dlhandle m4_module_find_by_builtin (const m4_builtin*);
+
+/* --- MACRO (and builtin) MANAGEMENT --- */
+
extern m4_symbol *m4_macro_pushdef (const char *name, lt_dlhandle handle,
const char *text, int flags,
int min_args, int max_args);
@@ -95,6 +104,10 @@ extern const m4_builtin *m4_builtin_find_by_name (
extern const m4_builtin *m4_builtin_find_by_func (
const m4_builtin *, m4_builtin_func *);
+
+
+/* --- SYMBOL TABLE MANAGEMENT --- */
+
extern m4_hash *m4_symtab;
extern void m4_symtab_init (void);
@@ -133,9 +146,6 @@ typedef enum {
M4_TOKEN_FUNC
} m4_data_t;
-typedef void m4_module_init_func (lt_dlhandle, struct obstack*);
-typedef void m4_module_finish_func (lt_dlhandle, struct obstack*);
-
extern m4_token_t m4_token_type (m4_token *);
extern char *m4_token_text (m4_token *);
extern m4_builtin_func *m4_token_func (m4_token *);
@@ -208,6 +218,9 @@ void m4_shipout_string (struct obstack*, const char*, int, boolean);
void m4_dump_args (struct obstack *obs, int argc, m4_token **argv, const char *sep, boolean quoted);
+
+/* --- RUNTIME DEBUGGING --- */
+
FILE *m4_debug;
/* The value of debug_level is a bitmask of the following. */
@@ -323,6 +336,13 @@ void m4_process_macro (struct obstack *obs, m4_symbol *symbol, int argc, m4_toke
/* --- SYNTAX TABLE DEFINITIONS --- */
+/* Please read the comment at the top of input.c for details */
+unsigned short m4_syntax_table[256];
+
+extern void m4_syntax_init (void);
+extern void m4_syntax_exit (void);
+extern int m4_syntax_code (char ch);
+
/* These are simple values, not bit masks. There is no overlap. */
#define M4_SYNTAX_OTHER (0x0000)
@@ -352,40 +372,38 @@ void m4_process_macro (struct obstack *obs, m4_symbol *symbol, int argc, m4_toke
#define M4_SYNTAX_VALUE (0x00FF|M4_SYNTAX_LQUOTE|M4_SYNTAX_BCOMM)
#define M4_SYNTAX_MASKS (0xFF00)
-
-#define m4_syntax(ch) m4_syntax_table[(int)(ch)]
+#define m4__syntax(ch) m4_syntax_table[(int)(ch)]
-#define M4_IS_OTHER(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_OTHER)
-#define M4_IS_IGNORE(ch) ((m4_syntax(ch)) == M4_SYNTAX_IGNORE)
-#define M4_IS_SPACE(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_SPACE)
+#define M4_IS_OTHER(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_OTHER)
+#define M4_IS_IGNORE(ch) ((m4__syntax(ch)) == M4_SYNTAX_IGNORE)
+#define M4_IS_SPACE(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_SPACE)
-#define M4_IS_OPEN(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_OPEN)
-#define M4_IS_CLOSE(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_CLOSE)
-#define M4_IS_COMMA(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_COMMA)
-#define M4_IS_DOLLAR(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_DOLLAR)
-#define M4_IS_ACTIVE(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ACTIVE)
+#define M4_IS_OPEN(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_OPEN)
+#define M4_IS_CLOSE(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_CLOSE)
+#define M4_IS_COMMA(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_COMMA)
+#define M4_IS_DOLLAR(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_DOLLAR)
+#define M4_IS_ACTIVE(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ACTIVE)
-#define M4_IS_ESCAPE(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ESCAPE)
-#define M4_IS_ALPHA(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ALPHA)
-#define M4_IS_NUM(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_NUM)
-#define M4_IS_ALNUM(ch) (((m4_syntax(ch)) & M4_SYNTAX_ALNUM) != 0)
+#define M4_IS_ESCAPE(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ESCAPE)
+#define M4_IS_ALPHA(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ALPHA)
+#define M4_IS_NUM(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_NUM)
+#define M4_IS_ALNUM(ch) (((m4__syntax(ch)) & M4_SYNTAX_ALNUM) != 0)
-#define M4_IS_LQUOTE(ch) (m4_syntax(ch) & M4_SYNTAX_LQUOTE)
-#define M4_IS_RQUOTE(ch) (m4_syntax(ch) & M4_SYNTAX_RQUOTE)
-#define M4_IS_BCOMM(ch) (m4_syntax(ch) & M4_SYNTAX_BCOMM)
-#define M4_IS_ECOMM(ch) (m4_syntax(ch) & M4_SYNTAX_ECOMM)
+#define M4_IS_LQUOTE(ch) (m4__syntax(ch) & M4_SYNTAX_LQUOTE)
+#define M4_IS_RQUOTE(ch) (m4__syntax(ch) & M4_SYNTAX_RQUOTE)
+#define M4_IS_BCOMM(ch) (m4__syntax(ch) & M4_SYNTAX_BCOMM)
+#define M4_IS_ECOMM(ch) (m4__syntax(ch) & M4_SYNTAX_ECOMM)
-/* Please read the comment at the top of input.c for details */
-unsigned short m4_syntax_table[256];
+
+
+/* --- TOKENISATION AND INPUT --- */
/* current input file, and line */
const char *m4_current_file;
int m4_current_line;
-extern int m4_syntax_code (char ch);
extern void m4_input_init (void);
extern void m4_input_exit (void);
-extern void m4_syntax_init (void);
extern int m4_peek_input (void);
extern m4_token_t m4_next_token (m4_token *);
extern void m4_token_copy (m4_token *dest, m4_token *src);
diff --git a/m4/m4private.h b/m4/m4private.h
index 4d6094e0..645e3da8 100644
--- a/m4/m4private.h
+++ b/m4/m4private.h
@@ -27,6 +27,15 @@
#include <assert.h>
#include <m4module.h>
+/* TRUE iff strlen(rquote) == strlen(lquote) == 1 */
+extern boolean m4__single_quotes;
+
+/* TRUE iff strlen(bcomm) == strlen(ecomm) == 1 */
+extern boolean m4__single_comments;
+
+/* TRUE iff some character has M4_SYNTAX_ESCAPE */
+extern boolean m4__use_macro_escape;
+
struct m4_module_data {
m4_builtin *bp; /* `m4_builtin_table' address */
m4_macro *mp; /* `m4_macro_table' address */
diff --git a/m4/syntax.c b/m4/syntax.c
new file mode 100644
index 00000000..be4c7a02
--- /dev/null
+++ b/m4/syntax.c
@@ -0,0 +1,300 @@
+/* GNU m4 -- A simple macro processor
+ Copyright 1989, 90, 91, 92, 93, 94, 2002 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA
+*/
+
+#include "m4private.h"
+
+#define DEBUG_SYNTAX
+#undef DEBUG_SYNTAX
+
+/* THE SYNTAX TABLE
+
+ The input is read character by character and grouped together
+ according to a syntax table. The character groups are (definitions
+ are all in m4.h, those marked with a * are not yet in use):
+
+ M4_SYNTAX_IGNORE *Character to be deleted from input as if not present
+ M4_SYNTAX_OTHER Any character with no special meaning to m4
+ M4_SYNTAX_SPACE Whitespace (ignored when leading macro arguments)
+ M4_SYNTAX_OPEN Open list of macro arguments
+ M4_SYNTAX_CLOSE Close list of macro arguments
+ M4_SYNTAX_COMMA Separates macro arguments
+ M4_SYNTAX_DOLLAR *Indicates macro argument in user macros
+ M4_SYNTAX_ACTIVE This caracter is a macro name by itself
+
+ M4_SYNTAX_ESCAPE Use this character to prefix all macro names
+ M4_SYNTAX_ALPHA Alphabetic characters (can start macro names)
+ M4_SYNTAX_NUM Numeric characters
+ M4_SYNTAX_ALNUM Alphanumeric characters (can form macro names)
+
+ (These are bit masks)
+ M4_SYNTAX_LQUOTE A single characters left quote
+ M4_SYNTAX_RQUOTE A single characters right quote
+ M4_SYNTAX_BCOMM A single characters begin comment delimiter
+ M4_SYNTAX_ECOMM A single characters end comment delimiter
+
+ Besides adding new facilities, the use of a syntax table will reduce
+ the number of calls to next_token (). Now groups of OTHER, NUM and
+ SPACE characters can be returned as a single token, since next_token
+ () knows they have no special syntactical meaning to m4. This is,
+ however, only possible if only single character quotes comments
+ comments are used, because otherwise the quote and comment characters
+ will not show up in the syntax-table.
+
+ Having a syntax table allows new facilities. The new builtin
+ "changesyntax" allows the the user to change the category of any
+ character.
+
+ Default '\n' is both ECOMM and SPACE, depending on the context. To
+ solve the problem of quotes and comments that have diffent syntax
+ code based on the context, the [LR]QUOTE and [BE]COMM codes are bit
+ masks to add to an ordinary code. If a character is made a quote it
+ will be recognised if the basis code does not have precedence.
+
+ When changing quotes and comment delimiters only the bits are
+ removed, and the characters are therefore reverted to its old
+ category code.
+
+ The precedence as implemented by next_token () is:
+
+ M4_SYNTAX_IGNORE *Filtered out below next_token ()
+ M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM
+ M4_SYNTAX_ESCAPE Reads macro name iff set, else next
+ M4_SYNTAX_ALPHA Reads macro name
+ M4_SYNTAX_LQUOTE Reads all until balanced M4_SYNTAX_RQUOTE
+
+ M4_SYNTAX_OTHER and M4_SYNTAX_NUM
+ Reads all M4_SYNTAX_OTHER and M4_SYNTAX_NUM
+ M4_SYNTAX_SPACE Reads all M4_SYNTAX_SPACE
+ M4_SYNTAX_ACTIVE Returns a single char as a word
+ the rest Returned as a single char
+
+ M4_SYNTAX_DOLLAR is not currently used. The character $ is treated as a
+ M4_SYNTAX_OTHER. It could be done, but it will slow next_token () down
+ a bit. The $ is not really a part of m4's input syntax in the sense
+ that a string is parsed equally whether there is a $ or not. The
+ character $ is used by convention in user macros. */
+
+static void check_use_macro_escape (void);
+static void set_syntax_internal (int code, int ch);
+static void unset_syntax_attribute (int code, int ch);
+
+/* TRUE iff strlen(rquote) == strlen(lquote) == 1 */
+boolean m4__single_quotes;
+
+/* TRUE iff strlen(bcomm) == strlen(ecomm) == 1 */
+boolean m4__single_comments;
+
+/* TRUE iff some character has M4_SYNTAX_ESCAPE */
+boolean m4__use_macro_escape;
+
+void
+m4_syntax_init (void)
+{
+ int ch;
+
+ for (ch = 256; --ch > 0;)
+ {
+ if (ch == '(')
+ set_syntax_internal (M4_SYNTAX_OPEN, ch);
+ else if (ch == ')')
+ set_syntax_internal (M4_SYNTAX_CLOSE, ch);
+ else if (ch == ',')
+ set_syntax_internal (M4_SYNTAX_COMMA, ch);
+ else if (isspace (ch))
+ set_syntax_internal (M4_SYNTAX_SPACE, ch);
+ else if (isalpha (ch) || ch == '_')
+ set_syntax_internal (M4_SYNTAX_ALPHA, ch);
+ else if (isdigit (ch))
+ set_syntax_internal (M4_SYNTAX_NUM, ch);
+ else
+ set_syntax_internal (M4_SYNTAX_OTHER, ch);
+ }
+ /* set_syntax_internal(M4_SYNTAX_IGNORE, 0); */
+
+ /* Default quotes and comment delimiters are always one char */
+ set_syntax_internal (M4_SYNTAX_LQUOTE, lquote.string[0]);
+ set_syntax_internal (M4_SYNTAX_RQUOTE, rquote.string[0]);
+ set_syntax_internal (M4_SYNTAX_BCOMM, bcomm.string[0]);
+ set_syntax_internal (M4_SYNTAX_ECOMM, ecomm.string[0]);
+}
+
+void
+m4_syntax_exit (void)
+{
+ return;
+}
+
+int
+m4_syntax_code (char ch)
+{
+ int code;
+
+ switch (ch)
+ {
+ case 'I': case 'i': code = M4_SYNTAX_IGNORE; break;
+ case 'O': case 'o': code = M4_SYNTAX_OTHER; break;
+ case 'S': case 's': code = M4_SYNTAX_SPACE; break;
+ case 'W': case 'w': code = M4_SYNTAX_ALPHA; break;
+ case 'D': case 'd': code = M4_SYNTAX_NUM; break;
+
+ case '(': code = M4_SYNTAX_OPEN; break;
+ case ')': code = M4_SYNTAX_CLOSE; break;
+ case ',': code = M4_SYNTAX_COMMA; break;
+ case '@': code = M4_SYNTAX_ESCAPE; break;
+#if 0 /* not yet used */
+ case '$': code = M4_SYNTAX_DOLLAR; break;
+#endif
+
+ case 'L': case 'l': code = M4_SYNTAX_LQUOTE; break;
+ case 'R': case 'r': code = M4_SYNTAX_RQUOTE; break;
+ case 'B': case 'b': code = M4_SYNTAX_BCOMM; break;
+ case 'E': case 'e': code = M4_SYNTAX_ECOMM; break;
+ case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break;
+
+ default: code = -1; break;
+ }
+
+ return code;
+}
+
+
+
+/* Functions for setting quotes and comment delimiters. Used by
+ m4_changecom () and m4_changequote (). Both functions overrides the
+ syntax_table to maintain compatibility. */
+void
+m4_set_quotes (const char *lq, const char *rq)
+{
+ int ch;
+ for (ch = 256; --ch >= 0;) /* changequote overrides syntax_table */
+ if (M4_IS_LQUOTE (ch) || M4_IS_RQUOTE (ch))
+ unset_syntax_attribute (M4_SYNTAX_LQUOTE | M4_SYNTAX_RQUOTE, ch);
+
+ xfree (lquote.string);
+ xfree (rquote.string);
+
+ lquote.string = xstrdup (lq ? lq : DEF_LQUOTE);
+ lquote.length = strlen (lquote.string);
+ rquote.string = xstrdup (rq ? rq : DEF_RQUOTE);
+ rquote.length = strlen (rquote.string);
+
+ m4__single_quotes = (lquote.length == 1 && rquote.length == 1);
+
+ if (m4__single_quotes)
+ {
+ set_syntax_internal (M4_SYNTAX_LQUOTE, lquote.string[0]);
+ set_syntax_internal (M4_SYNTAX_RQUOTE, rquote.string[0]);
+ }
+
+ if (m4__use_macro_escape)
+ check_use_macro_escape ();
+}
+
+void
+m4_set_comment (const char *bc, const char *ec)
+{
+ int ch;
+ for (ch = 256; --ch >= 0;) /* changecom overrides syntax_table */
+ if (M4_IS_BCOMM (ch) || M4_IS_ECOMM (ch))
+ unset_syntax_attribute (M4_SYNTAX_BCOMM | M4_SYNTAX_ECOMM, ch);
+
+ xfree (bcomm.string);
+ xfree (ecomm.string);
+
+ bcomm.string = xstrdup (bc ? bc : DEF_BCOMM);
+ bcomm.length = strlen (bcomm.string);
+ ecomm.string = xstrdup (ec ? ec : DEF_ECOMM);
+ ecomm.length = strlen (ecomm.string);
+
+ m4__single_comments = (bcomm.length == 1 && ecomm.length == 1);
+
+ if (m4__single_comments)
+ {
+ set_syntax_internal (M4_SYNTAX_BCOMM, bcomm.string[0]);
+ set_syntax_internal (M4_SYNTAX_ECOMM, ecomm.string[0]);
+ }
+
+ if (m4__use_macro_escape)
+ check_use_macro_escape ();
+}
+
+/* Functions to manipulate the syntax table. */
+static void
+set_syntax_internal (int code, int ch)
+{
+ if (code & M4_SYNTAX_MASKS)
+ m4_syntax_table[ch] |= code;
+ else
+ m4_syntax_table[ch] = code;
+
+#ifdef DEBUG_SYNTAX
+ fprintf(stderr, "Set syntax %o %c = %04X\n",
+ ch, isprint(ch) ? ch : '-',
+ m4_syntax_table[ch]);
+#endif
+}
+
+static void
+unset_syntax_attribute (int code, int ch)
+{
+ if (code & M4_SYNTAX_MASKS)
+ m4_syntax_table[ch] &= ~code;
+
+#ifdef DEBUG_SYNTAX
+ fprintf(stderr, "Unset syntax %o %c = %04X\n",
+ ch, isprint(ch) ? ch : '-',
+ m4_syntax_table[ch]);
+#endif
+}
+
+void
+m4_set_syntax (char key, const unsigned char *chars)
+{
+ int ch, code;
+
+ code = m4_syntax_code (key);
+
+ if ((code < 0) && (key != '\0'))
+ {
+ M4ERROR ((warning_status, 0,
+ _("Undefined syntax code %c"), key));
+ return;
+ }
+
+ if (*chars != '\0')
+ while ((ch = *chars++))
+ set_syntax_internal (code, ch);
+ else
+ for (ch = 256; --ch > 0; )
+ set_syntax_internal (code, ch);
+
+ if (m4__use_macro_escape || code == M4_SYNTAX_ESCAPE)
+ check_use_macro_escape();
+}
+
+static void
+check_use_macro_escape (void)
+{
+ int ch;
+
+ m4__use_macro_escape = FALSE;
+ for (ch = 256; --ch >= 0; )
+ if (M4_IS_ESCAPE (ch))
+ m4__use_macro_escape = TRUE;
+}
diff --git a/src/main.c b/src/main.c
index 4c69059b..f19689b5 100644
--- a/src/main.c
+++ b/src/main.c
@@ -524,6 +524,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"),
Strictly, we don't need to do this, but it makes leak detection
a whole lot easier! */
m4_symtab_exit ();
+ m4_syntax_exit ();
m4_output_exit ();
m4_input_exit ();
m4_debug_exit ();