/* GNU m4 -- A simple macro processor Copyright (C) 1989-1994, 2002, 2004, 2006-2010, 2013-2014 Free Software Foundation, Inc. This file is part of GNU M4. GNU M4 is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. GNU M4 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include "m4private.h" /* Define this to see runtime debug info. Implied by DEBUG. */ /*#define DEBUG_SYNTAX */ /* THE SYNTAX TABLE The input is read character by character and grouped together according to a syntax table. The character groups are (definitions are all in m4module.h, those marked with a * are not yet in use): Basic (all characters fall in one of these mutually exclusive bins) M4_SYNTAX_IGNORE *Character to be deleted from input as if not present M4_SYNTAX_OTHER Any character with no special meaning to m4 M4_SYNTAX_SPACE Whitespace (ignored when leading macro arguments) M4_SYNTAX_OPEN Open list of macro arguments M4_SYNTAX_CLOSE Close list of macro arguments M4_SYNTAX_COMMA Separates macro arguments M4_SYNTAX_ACTIVE This character is a macro name by itself M4_SYNTAX_ESCAPE Use this character to prefix all macro names M4_SYNTAX_ALPHA Alphabetic characters (can start macro names) M4_SYNTAX_NUM Numeric characters (can form macro names) M4_SYNTAX_LQUOTE A single character left quote M4_SYNTAX_BCOMM A single character begin comment delimiter Attribute (these are context sensitive, and exist in addition to basic) M4_SYNTAX_RQUOTE A single character right quote M4_SYNTAX_ECOMM A single character end comment delimiter M4_SYNTAX_DOLLAR Indicates macro argument in user macros M4_SYNTAX_LBRACE *Indicates start of extended macro argument M4_SYNTAX_RBRACE *Indicates end of extended macro argument Besides adding new facilities, the use of a syntax table will reduce the number of calls to next_token (). Now groups of OTHER, NUM and SPACE characters can be returned as a single token, since next_token () knows they have no special syntactical meaning to m4. This is, however, only possible if only single character quotes comments comments are used, because otherwise the quote and comment characters will not show up in the syntax-table. Having a syntax table allows new facilities. The new builtin "changesyntax" allows the user to change the category of any character. By default, '\n' is both ECOMM and SPACE, depending on the context. Hence we have basic categories (mutually exclusive, can introduce a context, and can be empty sets), and attribute categories (additive, only recognized in context, and will never be empty). The precedence as implemented by next_token () is: M4_SYNTAX_IGNORE *Filtered out below next_token () M4_SYNTAX_ESCAPE Reads macro name iff set, else next character M4_SYNTAX_ALPHA Reads M4_SYNTAX_ALPHA and M4_SYNTAX_NUM as macro name M4_SYNTAX_LQUOTE Reads all until balanced M4_SYNTAX_RQUOTE M4_SYNTAX_BCOMM Reads all until M4_SYNTAX_ECOMM M4_SYNTAX_OTHER } Reads all M4_SYNTAX_OTHER, M4_SYNTAX_NUM M4_SYNTAX_NUM } M4_SYNTAX_SPACE Reads all M4_SYNTAX_SPACE, depending on buffering M4_SYNTAX_ACTIVE Returns a single char as a macro name M4_SYNTAX_OPEN } Returned as a single char M4_SYNTAX_CLOSE } M4_SYNTAX_COMMA } M4_SYNTAX_RQUOTE and M4_SYNTAX_ECOMM are context-sensitive, and close out M4_SYNTAX_LQUOTE and M4_SYNTAX_BCOMM, respectively. Also, M4_SYNTAX_DOLLAR, M4_SYNTAX_LBRACE, and M4_SYNTAX_RBRACE are context-sensitive, only mattering when expanding macro definitions. There are several optimizations that can be performed depending on known states of the syntax table. For example, when searching for quotes, if there is only a single start quote and end quote delimiter, we can use memchr2 and search a word at a time, instead of performing a table lookup a byte at a time. The is_single_* flags track whether quotes and comments have a single delimiter (always the case if changequote/changecom were used, and potentially the case after changesyntax). Since we frequently need to access quotes, we store the oldest valid quote outside the lookup table; the suspect flag tracks whether a cleanup pass is needed to restore our invariants. On the other hand, coalescing multiple M4_SYNTAX_OTHER bytes could form a delimiter, so many optimizations must be disabled if a multi-byte delimiter exists; this is handled by m4__safe_quotes. Meanwhile, quotes and comments can be disabled if the leading delimiter is length 0. */ static int add_syntax_attribute (m4_syntax_table *, char, int); static int remove_syntax_attribute (m4_syntax_table *, char, int); static void set_quote_age (m4_syntax_table *, bool, bool); m4_syntax_table * m4_syntax_create (void) { m4_syntax_table *syntax = (m4_syntax_table *) xzalloc (sizeof *syntax); int ch; /* Set up default table. This table never changes during operation, and contains no context attributes. */ for (ch = UCHAR_MAX + 1; --ch >= 0; ) switch (ch) { case '(': syntax->orig[ch] = M4_SYNTAX_OPEN; break; case ')': syntax->orig[ch] = M4_SYNTAX_CLOSE; break; case ',': syntax->orig[ch] = M4_SYNTAX_COMMA; break; case '`': syntax->orig[ch] = M4_SYNTAX_LQUOTE; break; case '#': syntax->orig[ch] = M4_SYNTAX_BCOMM; break; default: if (isspace (ch)) syntax->orig[ch] = M4_SYNTAX_SPACE; else if (isalpha (ch) || ch == '_') syntax->orig[ch] = M4_SYNTAX_ALPHA; else if (isdigit (ch)) syntax->orig[ch] = M4_SYNTAX_NUM; else syntax->orig[ch] = M4_SYNTAX_OTHER; } /* Set up current table to match default. */ m4_reset_syntax (syntax); syntax->cached_simple.str1 = syntax->cached_lquote; syntax->cached_simple.len1 = 1; syntax->cached_simple.str2 = syntax->cached_rquote; syntax->cached_simple.len2 = 1; return syntax; } void m4_syntax_delete (m4_syntax_table *syntax) { assert (syntax); free (syntax->quote.str1); free (syntax->quote.str2); free (syntax->comm.str1); free (syntax->comm.str2); free (syntax); } int m4_syntax_code (char ch) { int code; switch (ch) { /* Sorted according to the order of M4_SYNTAX_* in m4module.h. */ /* FIXME - revisit the ignore syntax attribute. */ case 'I': case 'i': code = M4_SYNTAX_IGNORE; break; /* Basic categories. */ case '@': code = M4_SYNTAX_ESCAPE; break; case 'W': case 'w': code = M4_SYNTAX_ALPHA; break; case 'L': case 'l': code = M4_SYNTAX_LQUOTE; break; case 'B': case 'b': code = M4_SYNTAX_BCOMM; break; case 'A': case 'a': code = M4_SYNTAX_ACTIVE; break; case 'D': case 'd': code = M4_SYNTAX_NUM; break; case 'S': case 's': code = M4_SYNTAX_SPACE; break; case '(': code = M4_SYNTAX_OPEN; break; case ')': code = M4_SYNTAX_CLOSE; break; case ',': code = M4_SYNTAX_COMMA; break; case 'O': case 'o': code = M4_SYNTAX_OTHER; break; /* Context categories. */ case '$': code = M4_SYNTAX_DOLLAR; break; case '{': code = M4_SYNTAX_LBRACE; break; case '}': code = M4_SYNTAX_RBRACE; break; case 'R': case 'r': code = M4_SYNTAX_RQUOTE; break; case 'E': case 'e': code = M4_SYNTAX_ECOMM; break; default: code = -1; break; } return code; } /* Functions to manipulate the syntax table. */ static int add_syntax_attribute (m4_syntax_table *syntax, char ch, int code) { int c = to_uchar (ch); if (code & M4_SYNTAX_MASKS) { syntax->table[c] |= code; syntax->suspect = true; } else { if ((code & (M4_SYNTAX_SUSPECT)) != 0 || m4_has_syntax (syntax, c, M4_SYNTAX_SUSPECT)) syntax->suspect = true; syntax->table[c] = ((syntax->table[c] & M4_SYNTAX_MASKS) | code); } #ifdef DEBUG_SYNTAX xfprintf(stderr, "Set syntax %o %c = %04X\n", c, isprint(c) ? c : '-', syntax->table[c]); #endif return syntax->table[c]; } static int remove_syntax_attribute (m4_syntax_table *syntax, char ch, int code) { int c = to_uchar (ch); assert (code & M4_SYNTAX_MASKS); syntax->table[c] &= ~code; syntax->suspect = true; #ifdef DEBUG_SYNTAX xfprintf(stderr, "Unset syntax %o %c = %04X\n", c, isprint(c) ? c : '-', syntax->table[c]); #endif return syntax->table[c]; } /* Add the set CHARS of length LEN to syntax category CODE, removing them from whatever category they used to be in. */ static void add_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len, int code) { while (len--) add_syntax_attribute (syntax, *chars++, code); } /* Remove the set CHARS of length LEN from syntax category CODE, adding them to category M4_SYNTAX_OTHER instead. */ static void subtract_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len, int code) { while (len--) { char ch = *chars++; if ((code & M4_SYNTAX_MASKS) != 0) remove_syntax_attribute (syntax, ch, code); else if (m4_has_syntax (syntax, ch, code)) add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER); } } /* Make the set CHARS of length LEN become syntax category CODE, removing CHARS from any other categories, and sending all bytes in the category but not in CHARS to category M4_SYNTAX_OTHER instead. */ static void set_syntax_set (m4_syntax_table *syntax, const char *chars, size_t len, int code) { int ch; /* Explicit set of characters to install with this category; all other characters that used to have the category get reset to OTHER. */ for (ch = UCHAR_MAX + 1; --ch >= 0; ) { if ((code & M4_SYNTAX_MASKS) != 0) remove_syntax_attribute (syntax, ch, code); else if (m4_has_syntax (syntax, ch, code)) add_syntax_attribute (syntax, ch, M4_SYNTAX_OTHER); } while (len--) { ch = *chars++; add_syntax_attribute (syntax, ch, code); } } /* Reset syntax category CODE to its default state, sending all other characters in the category back to their default state. */ static void reset_syntax_set (m4_syntax_table *syntax, int code) { int ch; for (ch = UCHAR_MAX + 1; --ch >= 0; ) { /* Reset the category back to its default state. All other characters that used to have this category get reset to their default state as well. */ if (code == M4_SYNTAX_RQUOTE) { if (ch == '\'') add_syntax_attribute (syntax, ch, code); else remove_syntax_attribute (syntax, ch, code); } else if (code == M4_SYNTAX_ECOMM) { if (ch == '\n') add_syntax_attribute (syntax, ch, code); else remove_syntax_attribute (syntax, ch, code); } else if (code == M4_SYNTAX_DOLLAR) { if (ch == '$') add_syntax_attribute (syntax, ch, code); else remove_syntax_attribute (syntax, ch, code); } else if (code == M4_SYNTAX_LBRACE) { if (ch == '{') add_syntax_attribute (syntax, ch, code); else remove_syntax_attribute (syntax, ch, code); } else if (code == M4_SYNTAX_RBRACE) { if (ch == '}') add_syntax_attribute (syntax, ch, code); else remove_syntax_attribute (syntax, ch, code); } else if (syntax->orig[ch] == code || m4_has_syntax (syntax, ch, code)) add_syntax_attribute (syntax, ch, syntax->orig[ch]); } } /* Reset the syntax table to its default state. */ void m4_reset_syntax (m4_syntax_table *syntax) { /* Restore the default syntax, which has known quote and comment properties. */ memcpy (syntax->table, syntax->orig, sizeof syntax->orig); free (syntax->quote.str1); free (syntax->quote.str2); free (syntax->comm.str1); free (syntax->comm.str2); /* The use of xmemdup0 is exploited by input.c. */ syntax->quote.str1 = xmemdup0 (DEF_LQUOTE, 1); syntax->quote.len1 = 1; syntax->quote.str2 = xmemdup0 (DEF_RQUOTE, 1); syntax->quote.len2 = 1; syntax->comm.str1 = xmemdup0 (DEF_BCOMM, 1); syntax->comm.len1 = 1; syntax->comm.str2 = xmemdup0 (DEF_ECOMM, 1); syntax->comm.len2 = 1; syntax->dollar = '$'; add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE); add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM); add_syntax_attribute (syntax, '$', M4_SYNTAX_DOLLAR); add_syntax_attribute (syntax, '{', M4_SYNTAX_LBRACE); add_syntax_attribute (syntax, '}', M4_SYNTAX_RBRACE); syntax->is_single_quotes = true; syntax->is_single_comments = true; syntax->is_single_dollar = true; syntax->is_macro_escaped = false; set_quote_age (syntax, true, false); } /* Alter the syntax for category KEY, according to ACTION: '+' to add, '-' to subtract, '=' to set, or '\0' to reset. The array CHARS of length LEN describes the characters to modify; it is ignored if ACTION is '\0'. Return -1 if KEY is invalid, otherwise return the syntax category matching KEY. */ int m4_set_syntax (m4_syntax_table *syntax, char key, char action, const char *chars, size_t len) { int code; assert (syntax && chars); code = m4_syntax_code (key); if (code < 0) { return -1; } syntax->suspect = false; switch (action) { case '+': add_syntax_set (syntax, chars, len, code); break; case '-': subtract_syntax_set (syntax, chars, len, code); break; case '=': set_syntax_set (syntax, chars, len, code); break; case '\0': assert (!len); reset_syntax_set (syntax, code); break; default: assert (false); } /* Check for any cleanup needed. */ if (syntax->suspect) { int ch; int lquote = -1; int rquote = -1; int bcomm = -1; int ecomm = -1; bool single_quote_possible = true; bool single_comm_possible = true; int dollar = -1; if (m4_has_syntax (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE)) { assert (syntax->quote.len1 == 1); lquote = to_uchar (syntax->quote.str1[0]); } if (m4_has_syntax (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE)) { assert (syntax->quote.len2 == 1); rquote = to_uchar (syntax->quote.str2[0]); } if (m4_has_syntax (syntax, syntax->comm.str1[0], M4_SYNTAX_BCOMM)) { assert (syntax->comm.len1 == 1); bcomm = to_uchar (syntax->comm.str1[0]); } if (m4_has_syntax (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM)) { assert (syntax->comm.len2 == 1); ecomm = to_uchar (syntax->comm.str2[0]); } syntax->is_single_dollar = false; syntax->is_macro_escaped = false; /* Find candidates for each category. */ for (ch = UCHAR_MAX + 1; --ch >= 0; ) { if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE)) { if (lquote == -1) lquote = ch; else if (lquote != ch) single_quote_possible = false; } if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE)) { if (rquote == -1) rquote = ch; else if (rquote != ch) single_quote_possible = false; } if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM)) { if (bcomm == -1) bcomm = ch; else if (bcomm != ch) single_comm_possible = false; } if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM)) { if (ecomm == -1) ecomm = ch; else if (ecomm != ch) single_comm_possible = false; } if (m4_has_syntax (syntax, ch, M4_SYNTAX_DOLLAR)) { if (dollar == -1) { syntax->dollar = dollar = ch; syntax->is_single_dollar = true; } else syntax->is_single_dollar = false; } if (m4_has_syntax (syntax, ch, M4_SYNTAX_ESCAPE)) syntax->is_macro_escaped = true; } /* Disable multi-character delimiters if we discovered delimiters. */ if (!single_quote_possible) syntax->is_single_quotes = false; if (!single_comm_possible) syntax->is_single_comments = false; if ((1 < syntax->quote.len1 || 1 < syntax->quote.len2) && (!syntax->is_single_quotes || lquote != -1 || rquote != -1)) { if (syntax->quote.len1) { syntax->quote.len1 = lquote == to_uchar (syntax->quote.str1[0]); syntax->quote.str1[syntax->quote.len1] = '\0'; } if (syntax->quote.len2) { syntax->quote.len2 = rquote == to_uchar (syntax->quote.str2[0]); syntax->quote.str2[syntax->quote.len2] = '\0'; } } if ((1 < syntax->comm.len1 || 1 < syntax->comm.len2) && (!syntax->is_single_comments || bcomm != -1 || ecomm != -1)) { if (syntax->comm.len1) { syntax->comm.len1 = bcomm == to_uchar (syntax->comm.str1[0]); syntax->comm.str1[syntax->comm.len1] = '\0'; } if (syntax->comm.len2) { syntax->comm.len2 = ecomm == to_uchar (syntax->comm.str2[0]); syntax->comm.str2[syntax->comm.len2] = '\0'; } } /* Update the strings. */ if (lquote != -1) { if (single_quote_possible) syntax->is_single_quotes = true; if (syntax->quote.len1) assert (syntax->quote.len1 == 1); else { free (syntax->quote.str1); syntax->quote.str1 = xcharalloc (2); syntax->quote.str1[1] = '\0'; syntax->quote.len1 = 1; } syntax->quote.str1[0] = lquote; if (rquote == -1) { rquote = '\''; add_syntax_attribute (syntax, rquote, M4_SYNTAX_RQUOTE); } if (!syntax->quote.len2) { free (syntax->quote.str2); syntax->quote.str2 = xcharalloc (2); } syntax->quote.str2[0] = rquote; syntax->quote.str2[1] = '\0'; syntax->quote.len2 = 1; } if (bcomm != -1) { if (single_comm_possible) syntax->is_single_comments = true; if (syntax->comm.len1) assert (syntax->comm.len1 == 1); else { free (syntax->comm.str1); syntax->comm.str1 = xcharalloc (2); syntax->comm.str1[1] = '\0'; syntax->comm.len1 = 1; } syntax->comm.str1[0] = bcomm; if (ecomm == -1) { ecomm = '\n'; add_syntax_attribute (syntax, ecomm, M4_SYNTAX_ECOMM); } if (!syntax->comm.len2) { free (syntax->comm.str2); syntax->comm.str2 = xcharalloc (2); } syntax->comm.str2[0] = ecomm; syntax->comm.str2[1] = '\0'; syntax->comm.len2 = 1; } } set_quote_age (syntax, false, true); m4__quote_uncache (syntax); return code; } /* Functions for setting quotes and comment delimiters. Used by m4_changecom () and m4_changequote (). Both functions override the syntax table to maintain compatibility. */ /* Set the quote delimiters to LQ and RQ, with respective lengths LQ_LEN and RQ_LEN. Pass NULL if the argument was not present, to distinguish from an explicit empty string. */ void m4_set_quotes (m4_syntax_table *syntax, const char *lq, size_t lq_len, const char *rq, size_t rq_len) { int ch; assert (syntax); /* POSIX states that with 0 arguments, the default quotes are used. POSIX XCU ERN 112 states that behavior is implementation-defined if there was only one argument, or if there is an empty string in either position when there are two arguments. We allow an empty left quote to disable quoting, but a non-empty left quote will always create a non-empty right quote. See the texinfo for what some other implementations do. */ if (!lq) { lq = DEF_LQUOTE; lq_len = 1; rq = DEF_RQUOTE; rq_len = 1; } else if (!rq || (lq_len && !rq_len)) { rq = DEF_RQUOTE; rq_len = 1; } if (syntax->quote.len1 == lq_len && syntax->quote.len2 == rq_len && memcmp (syntax->quote.str1, lq, lq_len) == 0 && memcmp (syntax->quote.str2, rq, rq_len) == 0) return; free (syntax->quote.str1); free (syntax->quote.str2); /* The use of xmemdup0 is exploited by input.c. */ syntax->quote.str1 = xmemdup0 (lq, lq_len); syntax->quote.len1 = lq_len; syntax->quote.str2 = xmemdup0 (rq, rq_len); syntax->quote.len2 = rq_len; /* changequote overrides syntax_table, but be careful when it is used to select a start-quote sequence that is effectively disabled. */ syntax->is_single_quotes = true; for (ch = UCHAR_MAX + 1; --ch >= 0; ) { if (m4_has_syntax (syntax, ch, M4_SYNTAX_LQUOTE)) add_syntax_attribute (syntax, ch, (syntax->orig[ch] == M4_SYNTAX_LQUOTE ? M4_SYNTAX_OTHER : syntax->orig[ch])); if (m4_has_syntax (syntax, ch, M4_SYNTAX_RQUOTE)) remove_syntax_attribute (syntax, ch, M4_SYNTAX_RQUOTE); } if (!m4_has_syntax (syntax, *syntax->quote.str1, (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE | M4_SYNTAX_ALPHA | M4_SYNTAX_NUM))) { if (syntax->quote.len1 == 1) add_syntax_attribute (syntax, syntax->quote.str1[0], M4_SYNTAX_LQUOTE); if (syntax->quote.len2 == 1) add_syntax_attribute (syntax, syntax->quote.str2[0], M4_SYNTAX_RQUOTE); } set_quote_age (syntax, false, false); } /* Set the comment delimiters to BC and EC, with respective lengths BC_LEN and EC_LEN. Pass NULL if the argument was not present, to distinguish from an explicit empty string. */ void m4_set_comment (m4_syntax_table *syntax, const char *bc, size_t bc_len, const char *ec, size_t ec_len) { int ch; assert (syntax); /* POSIX requires no arguments to disable comments, and that one argument use newline as the close-comment. POSIX XCU ERN 131 states that empty arguments invoke implementation-defined behavior. We allow an empty begin comment to disable comments, and a non-empty begin comment will always create a non-empty end comment. See the texinfo for what some other implementations do. */ if (!bc) { bc = ec = ""; bc_len = ec_len = 0; } else if (!ec || (bc_len && !ec_len)) { ec = DEF_ECOMM; ec_len = 1; } if (syntax->comm.len1 == bc_len && syntax->comm.len2 == ec_len && memcmp (syntax->comm.str1, bc, bc_len) == 0 && memcmp (syntax->comm.str2, ec, ec_len) == 0) return; free (syntax->comm.str1); free (syntax->comm.str2); /* The use of xmemdup0 is exploited by input.c. */ syntax->comm.str1 = xmemdup0 (bc, bc_len); syntax->comm.len1 = bc_len; syntax->comm.str2 = xmemdup0 (ec, ec_len); syntax->comm.len2 = ec_len; /* changecom overrides syntax_table, but be careful when it is used to select a start-comment sequence that is effectively disabled. */ syntax->is_single_comments = true; for (ch = UCHAR_MAX + 1; --ch >= 0; ) { if (m4_has_syntax (syntax, ch, M4_SYNTAX_BCOMM)) add_syntax_attribute (syntax, ch, (syntax->orig[ch] == M4_SYNTAX_BCOMM ? M4_SYNTAX_OTHER : syntax->orig[ch])); if (m4_has_syntax (syntax, ch, M4_SYNTAX_ECOMM)) remove_syntax_attribute (syntax, ch, M4_SYNTAX_ECOMM); } if (!m4_has_syntax (syntax, *syntax->comm.str1, (M4_SYNTAX_IGNORE | M4_SYNTAX_ESCAPE | M4_SYNTAX_ALPHA | M4_SYNTAX_NUM | M4_SYNTAX_LQUOTE))) { if (syntax->comm.len1 == 1) add_syntax_attribute (syntax, syntax->comm.str1[0], M4_SYNTAX_BCOMM); if (syntax->comm.len2 == 1) add_syntax_attribute (syntax, syntax->comm.str2[0], M4_SYNTAX_ECOMM); } set_quote_age (syntax, false, false); } /* Call this when changing anything that might impact the quote age, so that m4__quote_age and m4__safe_quotes will reflect the change. If RESET, changesyntax was reset to its default stage; if CHANGE, arbitrary syntax has changed; otherwise, just quotes or comment delimiters have changed. */ static void set_quote_age (m4_syntax_table *syntax, bool reset, bool change) { /* Multi-character quotes are inherently unsafe, since concatenation of individual characters can result in a quote delimiter, consider: define(echo,``$1'')define(a,A)changequote(<[,]>)echo(<[]]><[>a]>) => A]> (not ]>a) Also, unquoted close delimiters are unsafe, consider: define(echo,``$1'')define(a,A)echo(`a''`a') => aA' (not a'a) Duplicated start and end quote delimiters, as well as comment delimiters that overlap with quote delimiters or active characters, also present a problem, consider: define(echo,$*)echo(a,a,a`'define(a,A)changecom(`,',`,')) => A,a,A (not A,A,A) The impact of arbitrary changesyntax is difficult to characterize. So if things are in their default state, we use 0 for the upper 16 bits of quote_age; otherwise we increment syntax_age for each changesyntax, but saturate it at 0xffff rather than wrapping around. Perhaps a cache of other frequently used states is warranted, if changesyntax becomes more popular. Perhaps someday we will fix $@ expansion to use the current settings of the comma category, or even allow multi-character argument separators via changesyntax. Until then, we use a literal `,' in $@ expansion, therefore we must insist that `,' be an argument separator for quote_age to be non-zero. Rather than check every token for an unquoted delimiter, we merely encode current_quote_age to 0 when things are unsafe, and non-zero when safe (namely, the syntax_age in the upper 16 bits, coupled with the 16-bit value composed of the single-character start and end quote delimiters). There may be other situations which are safe even when this algorithm sets the quote_age to zero, but at least a quote_age of zero always produces correct results (although it may take more time in doing so). */ unsigned short local_syntax_age; if (reset) local_syntax_age = 0; else if (change && syntax->syntax_age < 0xffff) local_syntax_age = ++syntax->syntax_age; else local_syntax_age = syntax->syntax_age; if (local_syntax_age < 0xffff && syntax->is_single_quotes && syntax->quote.len1 == 1 && syntax->quote.len2 == 1 && !m4_has_syntax (syntax, *syntax->quote.str1, (M4_SYNTAX_ALPHA | M4_SYNTAX_NUM | M4_SYNTAX_OPEN | M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE | M4_SYNTAX_SPACE)) && !m4_has_syntax (syntax, *syntax->quote.str2, (M4_SYNTAX_ALPHA | M4_SYNTAX_NUM | M4_SYNTAX_OPEN | M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE | M4_SYNTAX_SPACE)) && *syntax->quote.str1 != *syntax->quote.str2 && (!syntax->comm.len1 || (*syntax->comm.str1 != *syntax->quote.str2 && !m4_has_syntax (syntax, *syntax->comm.str1, (M4_SYNTAX_OPEN | M4_SYNTAX_COMMA | M4_SYNTAX_CLOSE)))) && m4_has_syntax (syntax, ',', M4_SYNTAX_COMMA)) { syntax->quote_age = ((local_syntax_age << 16) | ((*syntax->quote.str1 & 0xff) << 8) | (*syntax->quote.str2 & 0xff)); } else syntax->quote_age = 0; } /* Interface for caching frequently used quote pairs, independently of the current quote delimiters (for example, consider a text macro expansion that includes several copies of $@), and using AGE for optimization. If QUOTES is NULL, don't use quoting. If OBS is non-NULL, AGE should be the current quote age, and QUOTES should be m4_get_syntax_quotes; the return value will be a cached quote pair, where the pointer is valid at least as long as OBS is not reset, but whose contents are only guaranteed until the next changequote or quote_cache. Otherwise, OBS is NULL, AGE should be the same as before, and QUOTES should be a previously returned cache value; used to refresh the contents of the result. */ const m4_string_pair * m4__quote_cache (m4_syntax_table *syntax, m4_obstack *obs, unsigned int age, const m4_string_pair *quotes) { /* Implementation - if AGE is non-zero, then the implementation of set_quote_age guarantees that we can recreate the return value on the fly; so we use static storage, and the contents must be used immediately. If AGE is zero, then we must copy QUOTES onto OBS, but we might as well cache that copy. */ if (!quotes) return NULL; if (age) { *syntax->cached_lquote = (age >> 8) & 0xff; *syntax->cached_rquote = age & 0xff; return &syntax->cached_simple; } if (!obs) return quotes; assert (quotes == &syntax->quote); if (!syntax->cached_quote) { assert (obstack_object_size (obs) == 0); syntax->cached_quote = (m4_string_pair *) obstack_copy (obs, quotes, sizeof *quotes); syntax->cached_quote->str1 = (char *) obstack_copy0 (obs, quotes->str1, quotes->len1); syntax->cached_quote->str2 = (char *) obstack_copy0 (obs, quotes->str2, quotes->len2); } return syntax->cached_quote; } /* Define these functions at the end, so that calls in the file use the faster macro version from m4module.h. */ #undef m4_get_syntax_lquote const char * m4_get_syntax_lquote (m4_syntax_table *syntax) { assert (syntax); return syntax->quote.str1; } #undef m4_get_syntax_rquote const char * m4_get_syntax_rquote (m4_syntax_table *syntax) { assert (syntax); return syntax->quote.str2; } #undef m4_get_syntax_quotes const m4_string_pair * m4_get_syntax_quotes (m4_syntax_table *syntax) { assert (syntax); return &syntax->quote; } #undef m4_is_syntax_single_quotes bool m4_is_syntax_single_quotes (m4_syntax_table *syntax) { assert (syntax); return syntax->is_single_quotes; } #undef m4_get_syntax_bcomm const char * m4_get_syntax_bcomm (m4_syntax_table *syntax) { assert (syntax); return syntax->comm.str1; } #undef m4_get_syntax_ecomm const char * m4_get_syntax_ecomm (m4_syntax_table *syntax) { assert (syntax); return syntax->comm.str2; } #undef m4_get_syntax_comments const m4_string_pair * m4_get_syntax_comments (m4_syntax_table *syntax) { assert (syntax); return &syntax->comm; } #undef m4_is_syntax_single_comments bool m4_is_syntax_single_comments (m4_syntax_table *syntax) { assert (syntax); return syntax->is_single_comments; } #undef m4_is_syntax_single_dollar bool m4_is_syntax_single_dollar (m4_syntax_table *syntax) { assert (syntax); return syntax->is_single_dollar; } #undef m4_is_syntax_macro_escaped bool m4_is_syntax_macro_escaped (m4_syntax_table *syntax) { assert (syntax); return syntax->is_macro_escaped; }