diff options
author | Gary V. Vaughan <gary@gnu.org> | 2002-05-29 18:06:42 +0000 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2007-10-05 21:58:22 -0600 |
commit | de11cb28a57e7d1196a587f24dc83f1b492570b3 (patch) | |
tree | 7b76cf261009fc6bbcb298ee21c67e7bf1dd2b78 | |
parent | 86b1ec4e3585a53151d88f754a342819d3dc6b06 (diff) | |
download | m4-de11cb28a57e7d1196a587f24dc83f1b492570b3.tar.gz |
Preparations for refactoring syntax tables to allow reverse
lookups [fetch me a M4_SYNTAX_OPEN], without compromising the
speed of normal lookups in an array of unsigned short.
-rw-r--r-- | ChangeLog | 22 | ||||
-rw-r--r-- | m4/Makefile.am | 2 | ||||
-rw-r--r-- | m4/input.c | 297 | ||||
-rw-r--r-- | m4/m4module.h | 68 | ||||
-rw-r--r-- | m4/m4private.h | 9 | ||||
-rw-r--r-- | m4/syntax.c | 300 | ||||
-rw-r--r-- | src/main.c | 1 |
7 files changed, 385 insertions, 314 deletions
@@ -1,5 +1,27 @@ 2002-05-29 Gary V. Vaughan <gary@gnu.org> + Preparations for refactoring syntax tables to allow reverse + lookups [fetch me a M4_SYNTAX_OPEN], without compromising the + speed of normal lookups in an array of unsigned short. + + * m4/input.c (single_quotes, single_comments, use_macro_escape): + Moved from here... + * m4/m4private.h (m4__single_quotes, m4__single_comments, + m4__use_macro_escape): ...to here, and renamed. The `m4__' prefix + is for internal symbols which unavoidably pollute the global + namespace, but are not published APIs. + Adjusted all callers. + * m4/input.c (m4_syntax_init, m4_syntax_code, m4_set_quotes, + m4_set_comment, m4_set_syntax, set_syntax_internal, + unset_syntax_attribute): Moved from this file... + * m4/syntax.c: New. ...to this file. + Also added an m4_syntax_exit stub for orthogonality. + * src/main.c (main): Use it. + * m4/Makefile.am (libm4_la_SOURCES): Add syntax.c. + * m4/m4module.h: Reformatting. New prototypes. + +2002-05-29 Gary V. Vaughan <gary@gnu.org> + * bootstrap (aclocal_apiversion): The aclocal apiversion is distinct from the automake release number (in that the apiversion apparently has no micro-version-component). diff --git a/m4/Makefile.am b/m4/Makefile.am index 85eb91eb..48656011 100644 --- a/m4/Makefile.am +++ b/m4/Makefile.am @@ -38,7 +38,7 @@ EXTRA_DIST = $(EXTRA_HEADERS) obstack.c lib_LTLIBRARIES = libm4.la libm4_la_SOURCES = builtin.c debug.c error.c hash.c \ input.c ltdl.c macro.c module.c output.c \ - path.c regex.c symtab.c utility.c + path.c regex.c symtab.c syntax.c utility.c libm4_la_LIBADD = $(LTLIBOBJS) $(LIBADD_DL) module.o module.lo: $(srcdir)/module.c pathconf.h @@ -26,9 +26,6 @@ #define DEBUG_INPUT #undef DEBUG_INPUT -#define DEBUG_SYNTAX -#undef DEBUG_SYNTAX - /* Unread input can be either files, that should be read (eg. included files), strings, which should be rescanned (eg. macro expansion @@ -67,77 +64,8 @@ for use by the error handling functions in m4.c. Whenever a file input_block is pushed, the current file name and line number is saved in the input_block, and the two variables are reset to match the new - input file. - - THE SYNTAX TABLE - - The input is read character by character and grouped together - according to a syntax table. The character groups are (definitions - are all in m4.h, those marked with a * are not yet in use): - - M4_SYNTAX_IGNORE *Character to be deleted from input as if not present - M4_SYNTAX_OTHER Any character with no special meaning to m4 - M4_SYNTAX_SPACE Whitespace (ignored when leading macro arguments) - M4_SYNTAX_OPEN Open list of macro arguments - M4_SYNTAX_CLOSE Close list of macro arguments - M4_SYNTAX_COMMA Separates macro arguments - M4_SYNTAX_DOLLAR *Indicates macro argument in user macros - M4_SYNTAX_ACTIVE This caracter is a macro name by itself - - M4_SYNTAX_ESCAPE Use this character to prefix all macro names - M4_SYNTAX_ALPHA Alphabetic characters (can start macro names) - M4_SYNTAX_NUM Numeric characters - M4_SYNTAX_ALNUM Alphanumeric characters (can form macro names) - - (These are bit masks) - M4_SYNTAX_LQUOTE A single characters left quote - M4_SYNTAX_RQUOTE A single characters right quote - M4_SYNTAX_BCOMM A single characters begin comment delimiter - M4_SYNTAX_ECOMM A single characters end comment delimiter - - Besides adding new facilities, the use of a syntax table will reduce - the number of calls to next_token (). Now groups of OTHER, NUM and - SPACE characters can be returned as a single token, since next_token - () knows they have no special syntactical meaning to m4. This is, - however, only possible if only single character quotes comments - comments are used, because otherwise the quote and comment characters - will not show up in the syntax-table. - - Having a syntax table allows new facilities. The new builtin - "changesyntax" allows the the user to change the category of any - character. - - Default '\n' is both ECOMM and SPACE, depending on the context. To - solve the problem of quotes and comments that have diffent syntax - code based on the context, the [LR]QUOTE and [BE]COMM codes are bit - masks to add to an ordinary code. If a character is made a quote it - will be recognised if the basis code does not have precedence. - - When changing quotes and comment delimiters only the bits are - removed, and the characters are therefore reverted to its old - category code. - - The precedence as implemented by next_token () is: - - M4_SYNTAX_IGNORE *Filtered out below next_token () - M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM - M4_SYNTAX_ESCAPE Reads macro name iff set, else next - M4_SYNTAX_ALPHA Reads macro name - M4_SYNTAX_LQUOTE Reads all until balanced M4_SYNTAX_RQUOTE - - M4_SYNTAX_OTHER and M4_SYNTAX_NUM - Reads all M4_SYNTAX_OTHER and M4_SYNTAX_NUM - M4_SYNTAX_SPACE Reads all M4_SYNTAX_SPACE - M4_SYNTAX_ACTIVE Returns a single char as a word - the rest Returned as a single char - - M4_SYNTAX_DOLLAR is not currently used. The character $ is treated as a - M4_SYNTAX_OTHER. It could be done, but it will slow next_token () down - a bit. The $ is not really a part of m4's input syntax in the sense - that a string is parsed equally whether there is a $ or not. The - character $ is used by convention in user macros. */ - -static void check_use_macro_escape (void); + input file. */ + static int file_peek (void); static int file_read (void); static void file_unget (int ch); @@ -148,14 +76,12 @@ static int macro_read (void); static int match_input (const unsigned char *s); static int next_char (void); static void pop_input (void); -static void set_syntax_internal (int code, int ch); static int single_peek (void); static int single_read (void); static int string_peek (void); static int string_read (void); static void string_unget (int ch); static void unget_input (int ch); -static void unset_syntax_attribute (int code, int ch); struct input_funcs { @@ -243,22 +169,10 @@ static input_block *next; /* Flag for next_char () to increment m4_current_line. */ static boolean start_of_input_line; -/* Input syntax table */ -/* unsigned short syntax_table[256]; moved to m4module.c. */ - #define CHAR_EOF 256 /* character return on EOF */ #define CHAR_MACRO 257 /* character return for MACRO token */ #define CHAR_RETRY 258 /* character return for end of input block */ -/* TRUE iff strlen(rquote) == strlen(lquote) == 1 */ -static boolean single_quotes; - -/* TRUE iff strlen(bcomm) == strlen(ecomm) == 1 */ -static boolean single_comments; - -/* TRUE iff some character has M4_SYNTAX_ESCAPE */ -static boolean use_macro_escape; - /* push_file () pushes an input file on the input stack, saving the @@ -760,9 +674,6 @@ match_input (const unsigned char *s) /* Inititialise input stacks, and quote/comment characters. */ -static void set_syntax_internal (int code, int ch); -static void unset_syntax_attribute (int code, int ch); - void m4_input_init (void) { @@ -788,15 +699,15 @@ m4_input_init (void) lquote.length = strlen (lquote.string); rquote.string = xstrdup (DEF_RQUOTE); rquote.length = strlen (rquote.string); - single_quotes = TRUE; + m4__single_quotes = TRUE; bcomm.string = xstrdup (DEF_BCOMM); bcomm.length = strlen (bcomm.string); ecomm.string = xstrdup (DEF_ECOMM); ecomm.length = strlen (ecomm.string); - single_comments = TRUE; + m4__single_comments = TRUE; - use_macro_escape = FALSE; + m4__use_macro_escape = FALSE; } void @@ -811,198 +722,8 @@ m4_input_exit (void) obstack_free (&token_stack, NULL); } -void -m4_syntax_init (void) -{ - int ch; - - for (ch = 256; --ch > 0;) - { - if (ch == '(') - set_syntax_internal (M4_SYNTAX_OPEN, ch); - else if (ch == ')') - set_syntax_internal (M4_SYNTAX_CLOSE, ch); - else if (ch == ',') - set_syntax_internal (M4_SYNTAX_COMMA, ch); - else if (isspace (ch)) - set_syntax_internal (M4_SYNTAX_SPACE, ch); - else if (isalpha (ch) || ch == '_') - set_syntax_internal (M4_SYNTAX_ALPHA, ch); - else if (isdigit (ch)) - set_syntax_internal (M4_SYNTAX_NUM, ch); - else - set_syntax_internal (M4_SYNTAX_OTHER, ch); - } - /* set_syntax_internal(M4_SYNTAX_IGNORE, 0); */ - - /* Default quotes and comment delimiters are always one char */ - set_syntax_internal (M4_SYNTAX_LQUOTE, lquote.string[0]); - set_syntax_internal (M4_SYNTAX_RQUOTE, rquote.string[0]); - set_syntax_internal (M4_SYNTAX_BCOMM, bcomm.string[0]); - set_syntax_internal (M4_SYNTAX_ECOMM, ecomm.string[0]); -} - -int -m4_syntax_code (char ch) -{ - int code; - - switch (ch) - { - case 'I': case 'i': code = M4_SYNTAX_IGNORE; break; - case 'O': case 'o': code = M4_SYNTAX_OTHER; break; - case 'S': case 's': code = M4_SYNTAX_SPACE; break; - case 'W': case 'w': code = M4_SYNTAX_ALPHA; break; - case 'D': case 'd': code = M4_SYNTAX_NUM; break; - - case '(': code = M4_SYNTAX_OPEN; break; - case ')': code = M4_SYNTAX_CLOSE; break; - case ',': code = M4_SYNTAX_COMMA; break; - case '@': code = M4_SYNTAX_ESCAPE; break; -#if 0 /* not yet used */ - case '$': code = M4_SYNTAX_DOLLAR; break; -#endif - - case 'L': case 'l': code = M4_SYNTAX_LQUOTE; break; - case 'R': case 'r': code = M4_SYNTAX_RQUOTE; break; - case 'B': case 'b': code = M4_SYNTAX_BCOMM; break; - case 'E': case 'e': code = M4_SYNTAX_ECOMM; break; - case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break; - - default: code = -1; break; - } - - return code; -} - -static void -check_use_macro_escape (void) -{ - int ch; - - use_macro_escape = FALSE; - for (ch = 256; --ch >= 0; ) - if (M4_IS_ESCAPE (ch)) - use_macro_escape = TRUE; -} - -/* Functions for setting quotes and comment delimiters. Used by - m4_changecom () and m4_changequote (). Both functions overrides the - syntax_table to maintain compatibility. */ -void -m4_set_quotes (const char *lq, const char *rq) -{ - int ch; - for (ch = 256; --ch >= 0;) /* changequote overrides syntax_table */ - if (M4_IS_LQUOTE (ch) || M4_IS_RQUOTE (ch)) - unset_syntax_attribute (M4_SYNTAX_LQUOTE | M4_SYNTAX_RQUOTE, ch); - - xfree (lquote.string); - xfree (rquote.string); - - lquote.string = xstrdup (lq ? lq : DEF_LQUOTE); - lquote.length = strlen (lquote.string); - rquote.string = xstrdup (rq ? rq : DEF_RQUOTE); - rquote.length = strlen (rquote.string); - - single_quotes = (lquote.length == 1 && rquote.length == 1); - - if (single_quotes) - { - set_syntax_internal (M4_SYNTAX_LQUOTE, lquote.string[0]); - set_syntax_internal (M4_SYNTAX_RQUOTE, rquote.string[0]); - } - - if (use_macro_escape) - check_use_macro_escape (); -} - -void -m4_set_comment (const char *bc, const char *ec) -{ - int ch; - for (ch = 256; --ch >= 0;) /* changecom overrides syntax_table */ - if (M4_IS_BCOMM (ch) || M4_IS_ECOMM (ch)) - unset_syntax_attribute (M4_SYNTAX_BCOMM | M4_SYNTAX_ECOMM, ch); - - xfree (bcomm.string); - xfree (ecomm.string); - - bcomm.string = xstrdup (bc ? bc : DEF_BCOMM); - bcomm.length = strlen (bcomm.string); - ecomm.string = xstrdup (ec ? ec : DEF_ECOMM); - ecomm.length = strlen (ecomm.string); - - single_comments = (bcomm.length == 1 && ecomm.length == 1); - - if (single_comments) - { - set_syntax_internal (M4_SYNTAX_BCOMM, bcomm.string[0]); - set_syntax_internal (M4_SYNTAX_ECOMM, ecomm.string[0]); - } - - if (use_macro_escape) - check_use_macro_escape (); -} - -/* Functions to manipulate the syntax table. */ -static void -set_syntax_internal (int code, int ch) -{ - if (code & M4_SYNTAX_MASKS) - m4_syntax_table[ch] |= code; - else - m4_syntax_table[ch] = code; - -#ifdef DEBUG_SYNTAX - fprintf(stderr, "Set syntax %o %c = %04X\n", - ch, isprint(ch) ? ch : '-', - m4_syntax_table[ch]); -#endif -} - -static void -unset_syntax_attribute (int code, int ch) -{ - if (code & M4_SYNTAX_MASKS) - m4_syntax_table[ch] &= ~code; - -#ifdef DEBUG_SYNTAX - fprintf(stderr, "Unset syntax %o %c = %04X\n", - ch, isprint(ch) ? ch : '-', - m4_syntax_table[ch]); -#endif -} - -void -m4_set_syntax (char key, const unsigned char *chars) -{ - int ch, code; - - code = m4_syntax_code (key); - - if ((code < 0) && (key != '\0')) - { - M4ERROR ((warning_status, 0, - _("Undefined syntax code %c"), key)); - return; - } - - if (*chars != '\0') - while ((ch = *chars++)) - set_syntax_internal (code, ch); - else - for (ch = 256; --ch > 0; ) - set_syntax_internal (code, ch); - - if (use_macro_escape || code == M4_SYNTAX_ESCAPE) - check_use_macro_escape(); -} - - - /* Parse and return a single token from the input stream. A token can either be TOKEN_EOF, if the input_stack is empty; it can be TOKEN_STRING for a quoted string; TOKEN_WORD for something that is a potential macro @@ -1056,7 +777,7 @@ m4_next_token (m4_token *td) type = discard_comments ? M4_TOKEN_NONE : M4_TOKEN_STRING; } /* COMMENT, LONGER DELIM */ - else if (!single_comments && MATCH (ch, bcomm.string)) + else if (!m4__single_comments && MATCH (ch, bcomm.string)) { obstack_grow (&token_stack, bcomm.string, bcomm.length); while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm.string)) @@ -1103,7 +824,7 @@ m4_next_token (m4_token *td) if (ch != CHAR_EOF) unget_input(ch); - type = use_macro_escape ? M4_TOKEN_STRING : M4_TOKEN_WORD; + type = m4__use_macro_escape ? M4_TOKEN_STRING : M4_TOKEN_WORD; } else if (M4_IS_LQUOTE(ch)) /* QUOTED STRING, SINGLE QUOTES */ { @@ -1135,7 +856,7 @@ m4_next_token (m4_token *td) type = M4_TOKEN_STRING; } /* QUOTED STRING, LONGER QUOTES */ - else if (!single_quotes && MATCH (ch, lquote.string)) + else if (!m4__single_quotes && MATCH (ch, lquote.string)) { const char *current_file = m4_current_file; int current_line = m4_current_line; @@ -1163,7 +884,7 @@ m4_next_token (m4_token *td) } type = M4_TOKEN_STRING; } - else if (single_quotes && single_comments) /* EVERYTHING ELSE */ + else if (m4__single_quotes && m4__single_comments) /* EVERYTHING ELSE */ { obstack_1grow (&token_stack, ch); diff --git a/m4/m4module.h b/m4/m4module.h index 741ea86c..cd9b9c58 100644 --- a/m4/m4module.h +++ b/m4/m4module.h @@ -57,6 +57,12 @@ typedef struct { } m4_builtin; + +/* --- MODULE MANAGEMENT --- */ + +typedef void m4_module_init_func (lt_dlhandle, struct obstack*); +typedef void m4_module_finish_func (lt_dlhandle, struct obstack*); + extern void m4_module_init (void); extern lt_dlhandle m4_module_load (const char*, struct obstack*); extern void m4_module_unload (const char*, struct obstack*); @@ -72,6 +78,9 @@ extern m4_macro *m4_module_macros (lt_dlhandle); extern lt_dlhandle m4_module_find_by_builtin (const m4_builtin*); + +/* --- MACRO (and builtin) MANAGEMENT --- */ + extern m4_symbol *m4_macro_pushdef (const char *name, lt_dlhandle handle, const char *text, int flags, int min_args, int max_args); @@ -95,6 +104,10 @@ extern const m4_builtin *m4_builtin_find_by_name ( extern const m4_builtin *m4_builtin_find_by_func ( const m4_builtin *, m4_builtin_func *); + + +/* --- SYMBOL TABLE MANAGEMENT --- */ + extern m4_hash *m4_symtab; extern void m4_symtab_init (void); @@ -133,9 +146,6 @@ typedef enum { M4_TOKEN_FUNC } m4_data_t; -typedef void m4_module_init_func (lt_dlhandle, struct obstack*); -typedef void m4_module_finish_func (lt_dlhandle, struct obstack*); - extern m4_token_t m4_token_type (m4_token *); extern char *m4_token_text (m4_token *); extern m4_builtin_func *m4_token_func (m4_token *); @@ -208,6 +218,9 @@ void m4_shipout_string (struct obstack*, const char*, int, boolean); void m4_dump_args (struct obstack *obs, int argc, m4_token **argv, const char *sep, boolean quoted); + +/* --- RUNTIME DEBUGGING --- */ + FILE *m4_debug; /* The value of debug_level is a bitmask of the following. */ @@ -323,6 +336,13 @@ void m4_process_macro (struct obstack *obs, m4_symbol *symbol, int argc, m4_toke /* --- SYNTAX TABLE DEFINITIONS --- */ +/* Please read the comment at the top of input.c for details */ +unsigned short m4_syntax_table[256]; + +extern void m4_syntax_init (void); +extern void m4_syntax_exit (void); +extern int m4_syntax_code (char ch); + /* These are simple values, not bit masks. There is no overlap. */ #define M4_SYNTAX_OTHER (0x0000) @@ -352,40 +372,38 @@ void m4_process_macro (struct obstack *obs, m4_symbol *symbol, int argc, m4_toke #define M4_SYNTAX_VALUE (0x00FF|M4_SYNTAX_LQUOTE|M4_SYNTAX_BCOMM) #define M4_SYNTAX_MASKS (0xFF00) - -#define m4_syntax(ch) m4_syntax_table[(int)(ch)] +#define m4__syntax(ch) m4_syntax_table[(int)(ch)] -#define M4_IS_OTHER(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_OTHER) -#define M4_IS_IGNORE(ch) ((m4_syntax(ch)) == M4_SYNTAX_IGNORE) -#define M4_IS_SPACE(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_SPACE) +#define M4_IS_OTHER(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_OTHER) +#define M4_IS_IGNORE(ch) ((m4__syntax(ch)) == M4_SYNTAX_IGNORE) +#define M4_IS_SPACE(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_SPACE) -#define M4_IS_OPEN(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_OPEN) -#define M4_IS_CLOSE(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_CLOSE) -#define M4_IS_COMMA(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_COMMA) -#define M4_IS_DOLLAR(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_DOLLAR) -#define M4_IS_ACTIVE(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ACTIVE) +#define M4_IS_OPEN(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_OPEN) +#define M4_IS_CLOSE(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_CLOSE) +#define M4_IS_COMMA(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_COMMA) +#define M4_IS_DOLLAR(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_DOLLAR) +#define M4_IS_ACTIVE(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ACTIVE) -#define M4_IS_ESCAPE(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ESCAPE) -#define M4_IS_ALPHA(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ALPHA) -#define M4_IS_NUM(ch) ((m4_syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_NUM) -#define M4_IS_ALNUM(ch) (((m4_syntax(ch)) & M4_SYNTAX_ALNUM) != 0) +#define M4_IS_ESCAPE(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ESCAPE) +#define M4_IS_ALPHA(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_ALPHA) +#define M4_IS_NUM(ch) ((m4__syntax(ch)&M4_SYNTAX_VALUE) == M4_SYNTAX_NUM) +#define M4_IS_ALNUM(ch) (((m4__syntax(ch)) & M4_SYNTAX_ALNUM) != 0) -#define M4_IS_LQUOTE(ch) (m4_syntax(ch) & M4_SYNTAX_LQUOTE) -#define M4_IS_RQUOTE(ch) (m4_syntax(ch) & M4_SYNTAX_RQUOTE) -#define M4_IS_BCOMM(ch) (m4_syntax(ch) & M4_SYNTAX_BCOMM) -#define M4_IS_ECOMM(ch) (m4_syntax(ch) & M4_SYNTAX_ECOMM) +#define M4_IS_LQUOTE(ch) (m4__syntax(ch) & M4_SYNTAX_LQUOTE) +#define M4_IS_RQUOTE(ch) (m4__syntax(ch) & M4_SYNTAX_RQUOTE) +#define M4_IS_BCOMM(ch) (m4__syntax(ch) & M4_SYNTAX_BCOMM) +#define M4_IS_ECOMM(ch) (m4__syntax(ch) & M4_SYNTAX_ECOMM) -/* Please read the comment at the top of input.c for details */ -unsigned short m4_syntax_table[256]; + + +/* --- TOKENISATION AND INPUT --- */ /* current input file, and line */ const char *m4_current_file; int m4_current_line; -extern int m4_syntax_code (char ch); extern void m4_input_init (void); extern void m4_input_exit (void); -extern void m4_syntax_init (void); extern int m4_peek_input (void); extern m4_token_t m4_next_token (m4_token *); extern void m4_token_copy (m4_token *dest, m4_token *src); diff --git a/m4/m4private.h b/m4/m4private.h index 4d6094e0..645e3da8 100644 --- a/m4/m4private.h +++ b/m4/m4private.h @@ -27,6 +27,15 @@ #include <assert.h> #include <m4module.h> +/* TRUE iff strlen(rquote) == strlen(lquote) == 1 */ +extern boolean m4__single_quotes; + +/* TRUE iff strlen(bcomm) == strlen(ecomm) == 1 */ +extern boolean m4__single_comments; + +/* TRUE iff some character has M4_SYNTAX_ESCAPE */ +extern boolean m4__use_macro_escape; + struct m4_module_data { m4_builtin *bp; /* `m4_builtin_table' address */ m4_macro *mp; /* `m4_macro_table' address */ diff --git a/m4/syntax.c b/m4/syntax.c new file mode 100644 index 00000000..be4c7a02 --- /dev/null +++ b/m4/syntax.c @@ -0,0 +1,300 @@ +/* GNU m4 -- A simple macro processor + Copyright 1989, 90, 91, 92, 93, 94, 2002 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA +*/ + +#include "m4private.h" + +#define DEBUG_SYNTAX +#undef DEBUG_SYNTAX + +/* THE SYNTAX TABLE + + The input is read character by character and grouped together + according to a syntax table. The character groups are (definitions + are all in m4.h, those marked with a * are not yet in use): + + M4_SYNTAX_IGNORE *Character to be deleted from input as if not present + M4_SYNTAX_OTHER Any character with no special meaning to m4 + M4_SYNTAX_SPACE Whitespace (ignored when leading macro arguments) + M4_SYNTAX_OPEN Open list of macro arguments + M4_SYNTAX_CLOSE Close list of macro arguments + M4_SYNTAX_COMMA Separates macro arguments + M4_SYNTAX_DOLLAR *Indicates macro argument in user macros + M4_SYNTAX_ACTIVE This caracter is a macro name by itself + + M4_SYNTAX_ESCAPE Use this character to prefix all macro names + M4_SYNTAX_ALPHA Alphabetic characters (can start macro names) + M4_SYNTAX_NUM Numeric characters + M4_SYNTAX_ALNUM Alphanumeric characters (can form macro names) + + (These are bit masks) + M4_SYNTAX_LQUOTE A single characters left quote + M4_SYNTAX_RQUOTE A single characters right quote + M4_SYNTAX_BCOMM A single characters begin comment delimiter + M4_SYNTAX_ECOMM A single characters end comment delimiter + + Besides adding new facilities, the use of a syntax table will reduce + the number of calls to next_token (). Now groups of OTHER, NUM and + SPACE characters can be returned as a single token, since next_token + () knows they have no special syntactical meaning to m4. This is, + however, only possible if only single character quotes comments + comments are used, because otherwise the quote and comment characters + will not show up in the syntax-table. + + Having a syntax table allows new facilities. The new builtin + "changesyntax" allows the the user to change the category of any + character. + + Default '\n' is both ECOMM and SPACE, depending on the context. To + solve the problem of quotes and comments that have diffent syntax + code based on the context, the [LR]QUOTE and [BE]COMM codes are bit + masks to add to an ordinary code. If a character is made a quote it + will be recognised if the basis code does not have precedence. + + When changing quotes and comment delimiters only the bits are + removed, and the characters are therefore reverted to its old + category code. + + The precedence as implemented by next_token () is: + + M4_SYNTAX_IGNORE *Filtered out below next_token () + M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM + M4_SYNTAX_ESCAPE Reads macro name iff set, else next + M4_SYNTAX_ALPHA Reads macro name + M4_SYNTAX_LQUOTE Reads all until balanced M4_SYNTAX_RQUOTE + + M4_SYNTAX_OTHER and M4_SYNTAX_NUM + Reads all M4_SYNTAX_OTHER and M4_SYNTAX_NUM + M4_SYNTAX_SPACE Reads all M4_SYNTAX_SPACE + M4_SYNTAX_ACTIVE Returns a single char as a word + the rest Returned as a single char + + M4_SYNTAX_DOLLAR is not currently used. The character $ is treated as a + M4_SYNTAX_OTHER. It could be done, but it will slow next_token () down + a bit. The $ is not really a part of m4's input syntax in the sense + that a string is parsed equally whether there is a $ or not. The + character $ is used by convention in user macros. */ + +static void check_use_macro_escape (void); +static void set_syntax_internal (int code, int ch); +static void unset_syntax_attribute (int code, int ch); + +/* TRUE iff strlen(rquote) == strlen(lquote) == 1 */ +boolean m4__single_quotes; + +/* TRUE iff strlen(bcomm) == strlen(ecomm) == 1 */ +boolean m4__single_comments; + +/* TRUE iff some character has M4_SYNTAX_ESCAPE */ +boolean m4__use_macro_escape; + +void +m4_syntax_init (void) +{ + int ch; + + for (ch = 256; --ch > 0;) + { + if (ch == '(') + set_syntax_internal (M4_SYNTAX_OPEN, ch); + else if (ch == ')') + set_syntax_internal (M4_SYNTAX_CLOSE, ch); + else if (ch == ',') + set_syntax_internal (M4_SYNTAX_COMMA, ch); + else if (isspace (ch)) + set_syntax_internal (M4_SYNTAX_SPACE, ch); + else if (isalpha (ch) || ch == '_') + set_syntax_internal (M4_SYNTAX_ALPHA, ch); + else if (isdigit (ch)) + set_syntax_internal (M4_SYNTAX_NUM, ch); + else + set_syntax_internal (M4_SYNTAX_OTHER, ch); + } + /* set_syntax_internal(M4_SYNTAX_IGNORE, 0); */ + + /* Default quotes and comment delimiters are always one char */ + set_syntax_internal (M4_SYNTAX_LQUOTE, lquote.string[0]); + set_syntax_internal (M4_SYNTAX_RQUOTE, rquote.string[0]); + set_syntax_internal (M4_SYNTAX_BCOMM, bcomm.string[0]); + set_syntax_internal (M4_SYNTAX_ECOMM, ecomm.string[0]); +} + +void +m4_syntax_exit (void) +{ + return; +} + +int +m4_syntax_code (char ch) +{ + int code; + + switch (ch) + { + case 'I': case 'i': code = M4_SYNTAX_IGNORE; break; + case 'O': case 'o': code = M4_SYNTAX_OTHER; break; + case 'S': case 's': code = M4_SYNTAX_SPACE; break; + case 'W': case 'w': code = M4_SYNTAX_ALPHA; break; + case 'D': case 'd': code = M4_SYNTAX_NUM; break; + + case '(': code = M4_SYNTAX_OPEN; break; + case ')': code = M4_SYNTAX_CLOSE; break; + case ',': code = M4_SYNTAX_COMMA; break; + case '@': code = M4_SYNTAX_ESCAPE; break; +#if 0 /* not yet used */ + case '$': code = M4_SYNTAX_DOLLAR; break; +#endif + + case 'L': case 'l': code = M4_SYNTAX_LQUOTE; break; + case 'R': case 'r': code = M4_SYNTAX_RQUOTE; break; + case 'B': case 'b': code = M4_SYNTAX_BCOMM; break; + case 'E': case 'e': code = M4_SYNTAX_ECOMM; break; + case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break; + + default: code = -1; break; + } + + return code; +} + + + +/* Functions for setting quotes and comment delimiters. Used by + m4_changecom () and m4_changequote (). Both functions overrides the + syntax_table to maintain compatibility. */ +void +m4_set_quotes (const char *lq, const char *rq) +{ + int ch; + for (ch = 256; --ch >= 0;) /* changequote overrides syntax_table */ + if (M4_IS_LQUOTE (ch) || M4_IS_RQUOTE (ch)) + unset_syntax_attribute (M4_SYNTAX_LQUOTE | M4_SYNTAX_RQUOTE, ch); + + xfree (lquote.string); + xfree (rquote.string); + + lquote.string = xstrdup (lq ? lq : DEF_LQUOTE); + lquote.length = strlen (lquote.string); + rquote.string = xstrdup (rq ? rq : DEF_RQUOTE); + rquote.length = strlen (rquote.string); + + m4__single_quotes = (lquote.length == 1 && rquote.length == 1); + + if (m4__single_quotes) + { + set_syntax_internal (M4_SYNTAX_LQUOTE, lquote.string[0]); + set_syntax_internal (M4_SYNTAX_RQUOTE, rquote.string[0]); + } + + if (m4__use_macro_escape) + check_use_macro_escape (); +} + +void +m4_set_comment (const char *bc, const char *ec) +{ + int ch; + for (ch = 256; --ch >= 0;) /* changecom overrides syntax_table */ + if (M4_IS_BCOMM (ch) || M4_IS_ECOMM (ch)) + unset_syntax_attribute (M4_SYNTAX_BCOMM | M4_SYNTAX_ECOMM, ch); + + xfree (bcomm.string); + xfree (ecomm.string); + + bcomm.string = xstrdup (bc ? bc : DEF_BCOMM); + bcomm.length = strlen (bcomm.string); + ecomm.string = xstrdup (ec ? ec : DEF_ECOMM); + ecomm.length = strlen (ecomm.string); + + m4__single_comments = (bcomm.length == 1 && ecomm.length == 1); + + if (m4__single_comments) + { + set_syntax_internal (M4_SYNTAX_BCOMM, bcomm.string[0]); + set_syntax_internal (M4_SYNTAX_ECOMM, ecomm.string[0]); + } + + if (m4__use_macro_escape) + check_use_macro_escape (); +} + +/* Functions to manipulate the syntax table. */ +static void +set_syntax_internal (int code, int ch) +{ + if (code & M4_SYNTAX_MASKS) + m4_syntax_table[ch] |= code; + else + m4_syntax_table[ch] = code; + +#ifdef DEBUG_SYNTAX + fprintf(stderr, "Set syntax %o %c = %04X\n", + ch, isprint(ch) ? ch : '-', + m4_syntax_table[ch]); +#endif +} + +static void +unset_syntax_attribute (int code, int ch) +{ + if (code & M4_SYNTAX_MASKS) + m4_syntax_table[ch] &= ~code; + +#ifdef DEBUG_SYNTAX + fprintf(stderr, "Unset syntax %o %c = %04X\n", + ch, isprint(ch) ? ch : '-', + m4_syntax_table[ch]); +#endif +} + +void +m4_set_syntax (char key, const unsigned char *chars) +{ + int ch, code; + + code = m4_syntax_code (key); + + if ((code < 0) && (key != '\0')) + { + M4ERROR ((warning_status, 0, + _("Undefined syntax code %c"), key)); + return; + } + + if (*chars != '\0') + while ((ch = *chars++)) + set_syntax_internal (code, ch); + else + for (ch = 256; --ch > 0; ) + set_syntax_internal (code, ch); + + if (m4__use_macro_escape || code == M4_SYNTAX_ESCAPE) + check_use_macro_escape(); +} + +static void +check_use_macro_escape (void) +{ + int ch; + + m4__use_macro_escape = FALSE; + for (ch = 256; --ch >= 0; ) + if (M4_IS_ESCAPE (ch)) + m4__use_macro_escape = TRUE; +} @@ -524,6 +524,7 @@ warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"), Strictly, we don't need to do this, but it makes leak detection a whole lot easier! */ m4_symtab_exit (); + m4_syntax_exit (); m4_output_exit (); m4_input_exit (); m4_debug_exit (); |