diff options
Diffstat (limited to 'm4/syntax.c')
-rw-r--r-- | m4/syntax.c | 954 |
1 files changed, 0 insertions, 954 deletions
diff --git a/m4/syntax.c b/m4/syntax.c deleted file mode 100644 index 4bde1234..00000000 --- a/m4/syntax.c +++ /dev/null @@ -1,954 +0,0 @@ -/* GNU m4 -- A simple macro processor - Copyright (C) 1989-1994, 2002, 2004, 2006-2010, 2013-2014, 2017 Free - Software Foundation, Inc. - - This file is part of GNU M4. - - GNU M4 is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - GNU M4 is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#include <config.h> - -#include "m4private.h" - -/* Define this to see runtime debug info. Implied by DEBUG. */ -/*#define DEBUG_SYNTAX */ - -/* THE SYNTAX TABLE - - The input is read character by character and grouped together - according to a syntax table. The character groups are (definitions - are all in m4module.h, those marked with a * are not yet in use): - - Basic (all characters fall in one of these mutually exclusive bins) - M4_SYNTAX_IGNORE *Character to be deleted from input as if not present - M4_SYNTAX_OTHER Any character with no special meaning to m4 - M4_SYNTAX_SPACE Whitespace (ignored when leading macro arguments) - M4_SYNTAX_OPEN Open list of macro arguments - M4_SYNTAX_CLOSE Close list of macro arguments - M4_SYNTAX_COMMA Separates macro arguments - M4_SYNTAX_ACTIVE This character is a macro name by itself - M4_SYNTAX_ESCAPE Use this character to prefix all macro names - - M4_SYNTAX_ALPHA Alphabetic characters (can start macro names) - M4_SYNTAX_NUM Numeric characters (can form macro names) - - M4_SYNTAX_LQUOTE A single character left quote - M4_SYNTAX_BCOMM A single character begin comment delimiter - - Attribute (these are context sensitive, and exist in addition to basic) - M4_SYNTAX_RQUOTE A single character right quote - M4_SYNTAX_ECOMM A single character end comment delimiter - M4_SYNTAX_DOLLAR Indicates macro argument in user macros - M4_SYNTAX_LBRACE *Indicates start of extended macro argument - M4_SYNTAX_RBRACE *Indicates end of extended macro argument - - Besides adding new facilities, the use of a syntax table will reduce - the number of calls to next_token (). Now groups of OTHER, NUM and - SPACE characters can be returned as a single token, since next_token - () knows they have no special syntactical meaning to m4. This is, - however, only possible if only single character quotes comments - comments are used, because otherwise the quote and comment characters - will not show up in the syntax-table. - - Having a syntax table allows new facilities. The new builtin - "changesyntax" allows the user to change the category of any - character. - - By default, '\n' is both ECOMM and SPACE, depending on the context. - Hence we have basic categories (mutually exclusive, can introduce a - context, and can be empty sets), and attribute categories - (additive, only recognized in context, and will never be empty). - - The precedence as implemented by next_token () is: - - M4_SYNTAX_IGNORE *Filtered out below next_token () - M4_SYNTAX_ESCAPE Reads macro name iff set, else next character - M4_SYNTAX_ALPHA Reads M4_SYNTAX_ALPHA and M4_SYNTAX_NUM as macro name - M4_SYNTAX_LQUOTE Reads all until balanced M4_SYNTAX_RQUOTE - M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM - - M4_SYNTAX_OTHER } Reads all M4_SYNTAX_OTHER, M4_SYNTAX_NUM - M4_SYNTAX_NUM } - - M4_SYNTAX_SPACE Reads all M4_SYNTAX_SPACE, depending on buffering - M4_SYNTAX_ACTIVE Returns a single char as a macro name - - M4_SYNTAX_OPEN } Returned as a single char - M4_SYNTAX_CLOSE } - M4_SYNTAX_COMMA } - - M4_SYNTAX_RQUOTE and M4_SYNTAX_ECOMM are context-sensitive, and - close out M4_SYNTAX_LQUOTE and M4_SYNTAX_BCOMM, respectively. - Also, M4_SYNTAX_DOLLAR, M4_SYNTAX_LBRACE, and M4_SYNTAX_RBRACE are - context-sensitive, only mattering when expanding macro definitions. - - There are several optimizations that can be performed depending on - known states of the syntax table. For example, when searching for - quotes, if there is only a single start quote and end quote - delimiter, we can use memchr2 and search a word at a time, instead - of performing a table lookup a byte at a time. The is_single_* - flags track whether quotes and comments have a single delimiter - (always the case if changequote/changecom were used, and - potentially the case after changesyntax). Since we frequently need - to access quotes, we store the oldest valid quote outside the - lookup table; the suspect flag tracks whether a cleanup pass is - needed to restore our invariants. On the other hand, coalescing - multiple M4_SYNTAX_OTHER bytes could form a delimiter, so many - optimizations must be disabled if a multi-byte delimiter exists; - this is handled by m4__safe_quotes. Meanwhile, quotes and comments - can be disabled if the leading delimiter is length 0. */ - -static int add_syntax_attribute (m4_syntax_table *, char, int); -static int remove_syntax_attribute (m4_syntax_table *, char, int); -static void set_quote_age (m4_syntax_table *, bool, bool); - -m4_syntax_table * -m4_syntax_create (void) -{ - m4_syntax_table *syntax = (m4_syntax_table *) xzalloc (sizeof *syntax); - int ch; - - /* Set up default table. This table never changes during operation, - and contains no context attributes. */ - for (ch = UCHAR_MAX + 1; --ch >= 0; ) - switch (ch) - { - case '(': - syntax->orig[ch] = M4_SYNTAX_OPEN; - break; - case ')': - syntax->orig[ch] = M4_SYNTAX_CLOSE; - break; - case ',': - syntax->orig[ch] = M4_SYNTAX_COMMA; - break; - case '`': - syntax->orig[ch] = M4_SYNTAX_LQUOTE; - break; - case '#': - syntax->orig[ch] = M4_SYNTAX_BCOMM; - break; - default: - if (isspace (ch)) - syntax->orig[ch] = M4_SYNTAX_SPACE; - else if (isalpha (ch) || ch == '_') - syntax->orig[ch] = M4_SYNTAX_ALPHA; - else if (isdigit (ch)) - syntax->orig[ch] = M4_SYNTAX_NUM; - else - syntax->orig[ch] = M4_SYNTAX_OTHER; - } - - /* Set up current table to match default. */ - m4_reset_syntax (syntax); - syntax->cached_simple.str1 = syntax->cached_lquote; - syntax->cached_simple.len1 = 1; - syntax->cached_simple.str2 = syntax->cached_rquote; - syntax->cached_simple.len2 = 1; - return syntax; -} - -void -m4_syntax_delete (m4_syntax_table *syntax) -{ - assert (syntax); - - free (syntax->quote.str1); - free (syntax->quote.str2); - free (syntax->comm.str1); - free (syntax->comm.str2); - free (syntax); -} - -int -m4_syntax_code (char ch) -{ - int code; - - switch (ch) - { - /* Sorted according to the order of M4_SYNTAX_* in m4module.h. */ - /* FIXME - revisit the ignore syntax attribute. */ - case 'I': case 'i': code = M4_SYNTAX_IGNORE; break; - /* Basic categories. */ - case '@': code = M4_SYNTAX_ESCAPE; break; - case 'W': case 'w': code = M4_SYNTAX_ALPHA; break; - case 'L': case 'l': code = M4_SYNTAX_LQUOTE; break; - case 'B': case 'b': code = M4_SYNTAX_BCOMM; break; - case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break; - case 'D': case 'd': code = M4_SYNTAX_NUM; break; - case 'S': case 's': code = M4_SYNTAX_SPACE; break; - case '(': code = M4_SYNTAX_OPEN; break; - case ')': code = M4_SYNTAX_CLOSE; break; - case ',': code = M4_SYNTAX_COMMA; break; - case 'O': case 'o': code = M4_SYNTAX_OTHER; break; - /* Context categories. */ - case '$': code = M4_SYNTAX_DOLLAR; break; - case '{': code = M4_SYNTAX_LBRACE; break; - case '}': code = M4_SYNTAX_RBRACE; break; - case 'R': case 'r': code = M4_SYNTAX_RQUOTE; break; - case 'E': case 'e': code = M4_SYNTAX_ECOMM; break; - - default: code = -1; break; - } - - return code; -} - - - -/* Functions to manipulate the syntax table. */ -static int -add_syntax_attribute (m4_syntax_table *syntax, char ch, int code) -{ - int c = to_uchar (ch); - if (code & M4_SYNTAX_MASKS) - { - syntax->table[c] |= code; - syntax->suspect = true; - } - else - { - if ((code & (M4_SYNTAX_SUSPECT)) != 0 - || m4_has_syntax (syntax, c, M4_SYNTAX_SUSPECT)) - syntax->suspect = true; - syntax->table[c] = ((syntax->table[c] & M4_SYNTAX_MASKS) | code); - } - -#ifdef DEBUG_SYNTAX - xfprintf(stderr, "Set syntax %o %c = %04X\n", c, isprint(c) ? c : '-', - syntax->table[c]); -#endif - - return syntax->table[c]; -} - -static int -remove_syntax_attribute (m4_syntax_table *syntax, char ch, int code) -{ - int c = to_uchar (ch); - assert (code & M4_SYNTAX_MASKS); - syntax->table[c] &= ~code; - syntax->suspect = true; - -#ifdef DEBUG_SYNTAX - xfprintf(stderr, "Unset syntax %o %c = %04X\n", c, isprint(c) ? c : '-', - syntax->table[c]); -#endif - - return syntax->table[c]; -} - -/* Add the set CHARS of length LEN to syntax category CODE, removing - them from whatever category they used to be in. */ -static void -add_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len, - int code) -{ - while (len--) - add_syntax_attribute (syntax, *chars++, code); -} - -/* Remove the set CHARS of length LEN from syntax category CODE, - adding them to category M4_SYNTAX_OTHER instead. */ -static void -subtract_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len, - int code) -{ - while (len--) - { - char ch = *chars++; - if ((code & M4_SYNTAX_MASKS) != 0) - remove_syntax_attribute (syntax, ch, code); - else if (m4_has_syntax (syntax, ch, code)) - add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER); - } -} - -/* Make the set CHARS of length LEN become syntax category CODE, - removing CHARS from any other categories, and sending all bytes in - the category but not in CHARS to category M4_SYNTAX_OTHER - instead. */ -static void -set_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len, - int code) -{ - int ch; - /* Explicit set of characters to install with this category; all - other characters that used to have the category get reset to - OTHER. */ - for (ch = UCHAR_MAX + 1; --ch >= 0; ) - { - if ((code & M4_SYNTAX_MASKS) != 0) - remove_syntax_attribute (syntax, ch, code); - else if (m4_has_syntax (syntax, ch, code)) - add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER); - } - while (len--) - { - ch = *chars++; - add_syntax_attribute (syntax, ch, code); - } -} - -/* Reset syntax category CODE to its default state, sending all other - characters in the category back to their default state. */ -static void -reset_syntax_set (m4_syntax_table *syntax, int code) -{ - int ch; - for (ch = UCHAR_MAX + 1; --ch >= 0; ) - { - /* Reset the category back to its default state. All other - characters that used to have this category get reset to - their default state as well. */ - if (code == M4_SYNTAX_RQUOTE) - { - if (ch == '\'') - add_syntax_attribute (syntax, ch, code); - else - remove_syntax_attribute (syntax, ch, code); - } - else if (code == M4_SYNTAX_ECOMM) - { - if (ch == '\n') - add_syntax_attribute (syntax, ch, code); - else - remove_syntax_attribute (syntax, ch, code); - } - else if (code == M4_SYNTAX_DOLLAR) - { - if (ch == '$') - add_syntax_attribute (syntax, ch, code); - else - remove_syntax_attribute (syntax, ch, code); - } - else if (code == M4_SYNTAX_LBRACE) - { - if (ch == '{') - add_syntax_attribute (syntax, ch, code); - else - remove_syntax_attribute (syntax, ch, code); - } - else if (code == M4_SYNTAX_RBRACE) - { - if (ch == '}') - add_syntax_attribute (syntax, ch, code); - else - remove_syntax_attribute (syntax, ch, code); - } - else if (syntax->orig[ch] == code || m4_has_syntax (syntax, ch, code)) - add_syntax_attribute (syntax, ch, syntax->orig[ch]); - } -} - -/* Reset the syntax table to its default state. */ -void -m4_reset_syntax (m4_syntax_table *syntax) -{ - /* Restore the default syntax, which has known quote and comment - properties. */ - memcpy (syntax->table, syntax->orig, sizeof syntax->orig); - - free (syntax->quote.str1); - free (syntax->quote.str2); - free (syntax->comm.str1); - free (syntax->comm.str2); - - /* The use of xmemdup0 is exploited by input.c. */ - syntax->quote.str1 = xmemdup0 (DEF_LQUOTE, 1); - syntax->quote.len1 = 1; - syntax->quote.str2 = xmemdup0 (DEF_RQUOTE, 1); - syntax->quote.len2 = 1; - syntax->comm.str1 = xmemdup0 (DEF_BCOMM, 1); - syntax->comm.len1 = 1; - syntax->comm.str2 = xmemdup0 (DEF_ECOMM, 1); - syntax->comm.len2 = 1; - syntax->dollar = '$'; - - add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE); - add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM); - add_syntax_attribute (syntax, '$', M4_SYNTAX_DOLLAR); - add_syntax_attribute (syntax, '{', M4_SYNTAX_LBRACE); - add_syntax_attribute (syntax, '}', M4_SYNTAX_RBRACE); - - syntax->is_single_quotes = true; - syntax->is_single_comments = true; - syntax->is_single_dollar = true; - syntax->is_macro_escaped = false; - set_quote_age (syntax, true, false); -} - -/* Alter the syntax for category KEY, according to ACTION: '+' to add, - '-' to subtract, '=' to set, or '\0' to reset. The array CHARS of - length LEN describes the characters to modify; it is ignored if - ACTION is '\0'. Return -1 if KEY is invalid, otherwise return the - syntax category matching KEY. */ -int -m4_set_syntax (m4_syntax_table *syntax, char key, char action, - const char *chars, size_t len) -{ - int code; - - assert (syntax && chars); - code = m4_syntax_code (key); - if (code < 0) - { - return -1; - } - syntax->suspect = false; - switch (action) - { - case '+': - add_syntax_set (syntax, chars, len, code); - break; - case '-': - subtract_syntax_set (syntax, chars, len, code); - break; - case '=': - set_syntax_set (syntax, chars, len, code); - break; - case '\0': - assert (!len); - reset_syntax_set (syntax, code); - break; - default: - assert (false); - } - - /* Check for any cleanup needed. */ - if (syntax->suspect) - { - int ch; - int lquote = -1; - int rquote = -1; - int bcomm = -1; - int ecomm = -1; - bool single_quote_possible = true; - bool single_comm_possible = true; - int dollar = -1; - if (m4_has_syntax (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE)) - { - assert (syntax->quote.len1 == 1); - lquote = to_uchar (syntax->quote.str1[0]); - } - if (m4_has_syntax (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE)) - { - assert (syntax->quote.len2 == 1); - rquote = to_uchar (syntax->quote.str2[0]); - } - if (m4_has_syntax (syntax, syntax->comm.str1[0], M4_SYNTAX_BCOMM)) - { - assert (syntax->comm.len1 == 1); - bcomm = to_uchar (syntax->comm.str1[0]); - } - if (m4_has_syntax (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM)) - { - assert (syntax->comm.len2 == 1); - ecomm = to_uchar (syntax->comm.str2[0]); - } - syntax->is_single_dollar = false; - syntax->is_macro_escaped = false; - /* Find candidates for each category. */ - for (ch = UCHAR_MAX + 1; --ch >= 0; ) - { - if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE)) - { - if (lquote == -1) - lquote = ch; - else if (lquote != ch) - single_quote_possible = false; - } - if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE)) - { - if (rquote == -1) - rquote = ch; - else if (rquote != ch) - single_quote_possible = false; - } - if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM)) - { - if (bcomm == -1) - bcomm = ch; - else if (bcomm != ch) - single_comm_possible = false; - } - if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM)) - { - if (ecomm == -1) - ecomm = ch; - else if (ecomm != ch) - single_comm_possible = false; - } - if (m4_has_syntax (syntax, ch, M4_SYNTAX_DOLLAR)) - { - if (dollar == -1) - { - syntax->dollar = dollar = ch; - syntax->is_single_dollar = true; - } - else - syntax->is_single_dollar = false; - } - if (m4_has_syntax (syntax, ch, M4_SYNTAX_ESCAPE)) - syntax->is_macro_escaped = true; - } - /* Disable multi-character delimiters if we discovered - delimiters. */ - if (!single_quote_possible) - syntax->is_single_quotes = false; - if (!single_comm_possible) - syntax->is_single_comments = false; - if ((1 < syntax->quote.len1 || 1 < syntax->quote.len2) - && (!syntax->is_single_quotes || lquote != -1 || rquote != -1)) - { - if (syntax->quote.len1) - { - syntax->quote.len1 = lquote == to_uchar (syntax->quote.str1[0]); - syntax->quote.str1[syntax->quote.len1] = '\0'; - } - if (syntax->quote.len2) - { - syntax->quote.len2 = rquote == to_uchar (syntax->quote.str2[0]); - syntax->quote.str2[syntax->quote.len2] = '\0'; - } - } - if ((1 < syntax->comm.len1 || 1 < syntax->comm.len2) - && (!syntax->is_single_comments || bcomm != -1 || ecomm != -1)) - { - if (syntax->comm.len1) - { - syntax->comm.len1 = bcomm == to_uchar (syntax->comm.str1[0]); - syntax->comm.str1[syntax->comm.len1] = '\0'; - } - if (syntax->comm.len2) - { - syntax->comm.len2 = ecomm == to_uchar (syntax->comm.str2[0]); - syntax->comm.str2[syntax->comm.len2] = '\0'; - } - } - /* Update the strings. */ - if (lquote != -1) - { - if (single_quote_possible) - syntax->is_single_quotes = true; - if (syntax->quote.len1) - assert (syntax->quote.len1 == 1); - else - { - free (syntax->quote.str1); - syntax->quote.str1 = xcharalloc (2); - syntax->quote.str1[1] = '\0'; - syntax->quote.len1 = 1; - } - syntax->quote.str1[0] = lquote; - if (rquote == -1) - { - rquote = '\''; - add_syntax_attribute (syntax, rquote, M4_SYNTAX_RQUOTE); - } - if (!syntax->quote.len2) - { - free (syntax->quote.str2); - syntax->quote.str2 = xcharalloc (2); - } - syntax->quote.str2[0] = rquote; - syntax->quote.str2[1] = '\0'; - syntax->quote.len2 = 1; - } - if (bcomm != -1) - { - if (single_comm_possible) - syntax->is_single_comments = true; - if (syntax->comm.len1) - assert (syntax->comm.len1 == 1); - else - { - free (syntax->comm.str1); - syntax->comm.str1 = xcharalloc (2); - syntax->comm.str1[1] = '\0'; - syntax->comm.len1 = 1; - } - syntax->comm.str1[0] = bcomm; - if (ecomm == -1) - { - ecomm = '\n'; - add_syntax_attribute (syntax, ecomm, M4_SYNTAX_ECOMM); - } - if (!syntax->comm.len2) - { - free (syntax->comm.str2); - syntax->comm.str2 = xcharalloc (2); - } - syntax->comm.str2[0] = ecomm; - syntax->comm.str2[1] = '\0'; - syntax->comm.len2 = 1; - } - } - set_quote_age (syntax, false, true); - m4__quote_uncache (syntax); - return code; -} - - -/* Functions for setting quotes and comment delimiters. Used by - m4_changecom () and m4_changequote (). Both functions override the - syntax table to maintain compatibility. */ - -/* Set the quote delimiters to LQ and RQ, with respective lengths - LQ_LEN and RQ_LEN. Pass NULL if the argument was not present, to - distinguish from an explicit empty string. */ -void -m4_set_quotes (m4_syntax_table *syntax, const char *lq, size_t lq_len, - const char *rq, size_t rq_len) -{ - int ch; - - assert (syntax); - - /* POSIX states that with 0 arguments, the default quotes are used. - POSIX XCU ERN 112 states that behavior is implementation-defined - if there was only one argument, or if there is an empty string in - either position when there are two arguments. We allow an empty - left quote to disable quoting, but a non-empty left quote will - always create a non-empty right quote. See the texinfo for what - some other implementations do. */ - if (!lq) - { - lq = DEF_LQUOTE; - lq_len = 1; - rq = DEF_RQUOTE; - rq_len = 1; - } - else if (!rq || (lq_len && !rq_len)) - { - rq = DEF_RQUOTE; - rq_len = 1; - } - - if (syntax->quote.len1 == lq_len && syntax->quote.len2 == rq_len - && memcmp (syntax->quote.str1, lq, lq_len) == 0 - && memcmp (syntax->quote.str2, rq, rq_len) == 0) - return; - - free (syntax->quote.str1); - free (syntax->quote.str2); - /* The use of xmemdup0 is exploited by input.c. */ - syntax->quote.str1 = xmemdup0 (lq, lq_len); - syntax->quote.len1 = lq_len; - syntax->quote.str2 = xmemdup0 (rq, rq_len); - syntax->quote.len2 = rq_len; - - /* changequote overrides syntax_table, but be careful when it is - used to select a start-quote sequence that is effectively - disabled. */ - syntax->is_single_quotes = true; - for (ch = UCHAR_MAX + 1; --ch >= 0; ) - { - if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE)) - add_syntax_attribute (syntax, ch, - (syntax->orig[ch] == M4_SYNTAX_LQUOTE - ? M4_SYNTAX_OTHER : syntax->orig[ch])); - if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE)) - remove_syntax_attribute (syntax, ch, M4_SYNTAX_RQUOTE); - } - - if (!m4_has_syntax (syntax, *syntax->quote.str1, - (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE | M4_SYNTAX_ALPHA - | M4_SYNTAX_NUM))) - { - if (syntax->quote.len1 == 1) - add_syntax_attribute (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE); - if (syntax->quote.len2 == 1) - add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE); - } - set_quote_age (syntax, false, false); -} - -/* Set the comment delimiters to BC and EC, with respective lengths - BC_LEN and EC_LEN. Pass NULL if the argument was not present, to - distinguish from an explicit empty string. */ -void -m4_set_comment (m4_syntax_table *syntax, const char *bc, size_t bc_len, - const char *ec, size_t ec_len) -{ - int ch; - - assert (syntax); - - /* POSIX requires no arguments to disable comments, and that one - argument use newline as the close-comment. POSIX XCU ERN 131 - states that empty arguments invoke implementation-defined - behavior. We allow an empty begin comment to disable comments, - and a non-empty begin comment will always create a non-empty end - comment. See the texinfo for what some other implementations - do. */ - if (!bc) - { - bc = ec = ""; - bc_len = ec_len = 0; - } - else if (!ec || (bc_len && !ec_len)) - { - ec = DEF_ECOMM; - ec_len = 1; - } - - if (syntax->comm.len1 == bc_len && syntax->comm.len2 == ec_len - && memcmp (syntax->comm.str1, bc, bc_len) == 0 - && memcmp (syntax->comm.str2, ec, ec_len) == 0) - return; - - free (syntax->comm.str1); - free (syntax->comm.str2); - /* The use of xmemdup0 is exploited by input.c. */ - syntax->comm.str1 = xmemdup0 (bc, bc_len); - syntax->comm.len1 = bc_len; - syntax->comm.str2 = xmemdup0 (ec, ec_len); - syntax->comm.len2 = ec_len; - - /* changecom overrides syntax_table, but be careful when it is used - to select a start-comment sequence that is effectively - disabled. */ - syntax->is_single_comments = true; - for (ch = UCHAR_MAX + 1; --ch >= 0; ) - { - if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM)) - add_syntax_attribute (syntax, ch, - (syntax->orig[ch] == M4_SYNTAX_BCOMM - ? M4_SYNTAX_OTHER : syntax->orig[ch])); - if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM)) - remove_syntax_attribute (syntax, ch, M4_SYNTAX_ECOMM); - } - if (!m4_has_syntax (syntax, *syntax->comm.str1, - (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE | M4_SYNTAX_ALPHA - | M4_SYNTAX_NUM | M4_SYNTAX_LQUOTE))) - { - if (syntax->comm.len1 == 1) - add_syntax_attribute (syntax, syntax->comm.str1[0], M4_SYNTAX_BCOMM); - if (syntax->comm.len2 == 1) - add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM); - } - set_quote_age (syntax, false, false); -} - -/* Call this when changing anything that might impact the quote age, - so that m4__quote_age and m4__safe_quotes will reflect the change. - If RESET, changesyntax was reset to its default stage; if CHANGE, - arbitrary syntax has changed; otherwise, just quotes or comment - delimiters have changed. */ -static void -set_quote_age (m4_syntax_table *syntax, bool reset, bool change) -{ - /* Multi-character quotes are inherently unsafe, since concatenation - of individual characters can result in a quote delimiter, - consider: - - define(echo,``$1'')define(a,A)changequote(<[,]>)echo(<[]]><[>a]>) - => A]> (not ]>a) - - Also, unquoted close delimiters are unsafe, consider: - - define(echo,``$1'')define(a,A)echo(`a''`a') - => aA' (not a'a) - - Duplicated start and end quote delimiters, as well as comment - delimiters that overlap with quote delimiters or active characters, - also present a problem, consider: - - define(echo,$*)echo(a,a,a`'define(a,A)changecom(`,',`,')) - => A,a,A (not A,A,A) - - The impact of arbitrary changesyntax is difficult to characterize. - So if things are in their default state, we use 0 for the upper 16 - bits of quote_age; otherwise we increment syntax_age for each - changesyntax, but saturate it at 0xffff rather than wrapping - around. Perhaps a cache of other frequently used states is - warranted, if changesyntax becomes more popular. - - Perhaps someday we will fix $@ expansion to use the current - settings of the comma category, or even allow multi-character - argument separators via changesyntax. Until then, we use a literal - `,' in $@ expansion, therefore we must insist that `,' be an - argument separator for quote_age to be non-zero. - - Rather than check every token for an unquoted delimiter, we merely - encode current_quote_age to 0 when things are unsafe, and non-zero - when safe (namely, the syntax_age in the upper 16 bits, coupled - with the 16-bit value composed of the single-character start and - end quote delimiters). There may be other situations which are - safe even when this algorithm sets the quote_age to zero, but at - least a quote_age of zero always produces correct results (although - it may take more time in doing so). */ - - unsigned short local_syntax_age; - if (reset) - local_syntax_age = 0; - else if (change && syntax->syntax_age < 0xffff) - local_syntax_age = ++syntax->syntax_age; - else - local_syntax_age = syntax->syntax_age; - if (local_syntax_age < 0xffff && syntax->is_single_quotes - && syntax->quote.len1 == 1 && syntax->quote.len2 == 1 - && !m4_has_syntax (syntax, *syntax->quote.str1, - (M4_SYNTAX_ALPHA | M4_SYNTAX_NUM | M4_SYNTAX_OPEN - | M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE - | M4_SYNTAX_SPACE)) - && !m4_has_syntax (syntax, *syntax->quote.str2, - (M4_SYNTAX_ALPHA | M4_SYNTAX_NUM | M4_SYNTAX_OPEN - | M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE - | M4_SYNTAX_SPACE)) - && *syntax->quote.str1 != *syntax->quote.str2 - && (!syntax->comm.len1 - || (*syntax->comm.str1 != *syntax->quote.str2 - && !m4_has_syntax (syntax, *syntax->comm.str1, - (M4_SYNTAX_OPEN | M4_SYNTAX_COMMA - | M4_SYNTAX_CLOSE)))) - && m4_has_syntax (syntax, ',', M4_SYNTAX_COMMA)) - { - syntax->quote_age = ((local_syntax_age << 16) - | ((*syntax->quote.str1 & 0xff) << 8) - | (*syntax->quote.str2 & 0xff)); - } - else - syntax->quote_age = 0; -} - -/* Interface for caching frequently used quote pairs, independently of - the current quote delimiters (for example, consider a text macro - expansion that includes several copies of $@), and using AGE for - optimization. If QUOTES is NULL, don't use quoting. If OBS is - non-NULL, AGE should be the current quote age, and QUOTES should be - m4_get_syntax_quotes; the return value will be a cached quote pair, - where the pointer is valid at least as long as OBS is not reset, - but whose contents are only guaranteed until the next changequote - or quote_cache. Otherwise, OBS is NULL, AGE should be the same as - before, and QUOTES should be a previously returned cache value; - used to refresh the contents of the result. */ -const m4_string_pair * -m4__quote_cache (m4_syntax_table *syntax, m4_obstack *obs, unsigned int age, - const m4_string_pair *quotes) -{ - /* Implementation - if AGE is non-zero, then the implementation of - set_quote_age guarantees that we can recreate the return value on - the fly; so we use static storage, and the contents must be used - immediately. If AGE is zero, then we must copy QUOTES onto OBS, - but we might as well cache that copy. */ - if (!quotes) - return NULL; - if (age) - { - *syntax->cached_lquote = (age >> 8) & 0xff; - *syntax->cached_rquote = age & 0xff; - return &syntax->cached_simple; - } - if (!obs) - return quotes; - assert (quotes == &syntax->quote); - if (!syntax->cached_quote) - { - assert (obstack_object_size (obs) == 0); - syntax->cached_quote = (m4_string_pair *) obstack_copy (obs, quotes, - sizeof *quotes); - syntax->cached_quote->str1 = (char *) obstack_copy0 (obs, quotes->str1, - quotes->len1); - syntax->cached_quote->str2 = (char *) obstack_copy0 (obs, quotes->str2, - quotes->len2); - } - return syntax->cached_quote; -} - - -/* Define these functions at the end, so that calls in the file use the - faster macro version from m4module.h. */ -#undef m4_get_syntax_lquote -const char * -m4_get_syntax_lquote (m4_syntax_table *syntax) -{ - assert (syntax); - return syntax->quote.str1; -} - -#undef m4_get_syntax_rquote -const char * -m4_get_syntax_rquote (m4_syntax_table *syntax) -{ - assert (syntax); - return syntax->quote.str2; -} - -#undef m4_get_syntax_quotes -const m4_string_pair * -m4_get_syntax_quotes (m4_syntax_table *syntax) -{ - assert (syntax); - return &syntax->quote; -} - -#undef m4_is_syntax_single_quotes -bool -m4_is_syntax_single_quotes (m4_syntax_table *syntax) -{ - assert (syntax); - return syntax->is_single_quotes; -} - -#undef m4_get_syntax_bcomm -const char * -m4_get_syntax_bcomm (m4_syntax_table *syntax) -{ - assert (syntax); - return syntax->comm.str1; -} - -#undef m4_get_syntax_ecomm -const char * -m4_get_syntax_ecomm (m4_syntax_table *syntax) -{ - assert (syntax); - return syntax->comm.str2; -} - -#undef m4_get_syntax_comments -const m4_string_pair * -m4_get_syntax_comments (m4_syntax_table *syntax) -{ - assert (syntax); - return &syntax->comm; -} - -#undef m4_is_syntax_single_comments -bool -m4_is_syntax_single_comments (m4_syntax_table *syntax) -{ - assert (syntax); - return syntax->is_single_comments; -} - -#undef m4_is_syntax_single_dollar -bool -m4_is_syntax_single_dollar (m4_syntax_table *syntax) -{ - assert (syntax); - return syntax->is_single_dollar; -} - -#undef m4_is_syntax_macro_escaped -bool -m4_is_syntax_macro_escaped (m4_syntax_table *syntax) -{ - assert (syntax); - return syntax->is_macro_escaped; -} |