Stage11: full circle for single argument references

author: Eric Blake <ebb9@byu.net> 2007-10-27 05:44:09 -0600
committer: Eric Blake <ebb9@byu.net> 2008-01-19 15:24:14 -0700
commit: b1fef201f5d121e25e5dd61ec8ca3eac41a899ba (patch)
tree: a76568b9a8d798657905f9090405a87b3c933628
parent: 622fc8cb2cb6ce0fc7391a6414bb0aaffeec6fc0 (diff)
download: m4-b1fef201f5d121e25e5dd61ec8ca3eac41a899ba.tar.gz
3 files changed, 348 insertions, 118 deletions
diff --git a/src/input.c b/src/input.c
index 09cf7088..bf92bc44 100644
--- a/src/input.c
+++ b/src/input.c
@@ -153,6 +153,7 @@ static bool input_change;
 
 #define CHAR_EOF	256	/* Character return on EOF.  */
 #define CHAR_MACRO	257	/* Character return for MACRO token.  */
+#define CHAR_QUOTE	258	/* Character return for quoted string.  */
 
 /* Quote chars.  */
 STRING rquote;
@@ -167,7 +168,7 @@ STRING ecomm;
 # define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
 
 /* Table of characters that can start a word.  */
-static char *word_start;
+static char word_start[256];
 
 /* Current regular expression for detecting words.  */
 static struct re_pattern_buffer word_regexp;
@@ -201,7 +202,7 @@ static const char *token_type_string (token_type);
 | chain that starts at *START and ends at *END.  START may be NULL   |
 | if *END is non-NULL.                                               |
 `-------------------------------------------------------------------*/
-static void
+void
 make_text_link (struct obstack *obs, token_chain **start, token_chain **end)
 {
   token_chain *chain;
@@ -218,6 +219,7 @@ make_text_link (struct obstack *obs, token_chain **start, token_chain **end)
 	*start = chain;
       *end = chain;
       chain->next = NULL;
+      chain->quote_age = 0;
       chain->str = str;
       chain->len = len;
       chain->level = -1;
@@ -361,6 +363,7 @@ push_token (token_data *token, int level)
     next->u.u_c.chain = chain;
   next->u.u_c.end = chain;
   chain->next = NULL;
+  chain->quote_age = TOKEN_DATA_QUOTE_AGE (token);
   chain->str = TOKEN_DATA_TEXT (token);
   chain->len = TOKEN_DATA_LEN (token);
   chain->level = level;
@@ -563,19 +566,6 @@ pop_wrapup (void)
   return true;
 }
 
-/*-------------------------------------------------------------------.
-| When a MACRO token is seen, next_token () uses init_macro_token () |
-| to retrieve the value of the function pointer and store it in TD.  |
-`-------------------------------------------------------------------*/
-
-static void
-init_macro_token (token_data *td)
-{
-  assert (isp->type == INPUT_MACRO);
-  TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
-  TOKEN_DATA_FUNC (td) = isp->u.func;
-}
-
 /*--------------------------------------------------------------.
 | Dump a representation of INPUT to the obstack OBS, for use in |
 | tracing.                                                      |
@@ -699,16 +689,19 @@ peek_input (void)
 | consisting of a newline alone is taken as belonging to the line it |
 | ends, and the current line number is not incremented until the     |
 | next character is read.  99.9% of all calls will read from a       |
-| string, so factor that out into a macro for speed.                 |
+| string, so factor that out into a macro for speed.  If             |
+| ALLOW_QUOTE, and the current input matches the current quote age,  |
+| return CHAR_QUOTE and leave consumption of data for                |
+| append_quote_token.                                                |
 `-------------------------------------------------------------------*/
 
-#define next_char()							\
+#define next_char(AQ)							\
   (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change	\
    ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++))			\
-   : next_char_1 ())
+   : next_char_1 (AQ))
 
 static int
-next_char_1 (void)
+next_char_1 (bool allow_quote)
 {
   int ch;
   token_chain *chain;
@@ -765,10 +758,14 @@ next_char_1 (void)
 	  chain = isp->u.u_c.chain;
 	  while (chain)
 	    {
+	      if (allow_quote && chain->quote_age == current_quote_age)
+		return CHAR_QUOTE;
 	      if (chain->str)
 		{
 		  if (chain->len)
 		    {
+		      /* Partial consumption invalidates quote age.  */
+		      chain->quote_age = 0;
 		      chain->len--;
 		      return to_uchar (*chain->str++);
 		    }
@@ -808,7 +805,7 @@ skip_line (const char *name)
   const char *file = current_file;
   int line = current_line;
 
-  while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
+  while ((ch = next_char (false)) != CHAR_EOF && ch != '\n')
     ;
   if (ch == CHAR_EOF)
     /* current_file changed to "" if we see CHAR_EOF, use the
@@ -825,6 +822,49 @@ skip_line (const char *name)
 }
 
 
+/*-------------------------------------------------------------------.
+| When a MACRO token is seen, next_token () uses init_macro_token () |
+| to retrieve the value of the function pointer and store it in TD.  |
+`-------------------------------------------------------------------*/
+
+static void
+init_macro_token (token_data *td)
+{
+  assert (isp->type == INPUT_MACRO);
+  TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
+  TOKEN_DATA_FUNC (td) = isp->u.func;
+}
+
+/*-------------------------------------------------------------------.
+| When a QUOTE token is seen, convert TD to a composite (if it is    |
+| not one already), consisting of any unfinished text on OBS, as     |
+| well as the quoted token from the top of the input stack.  Use OBS |
+| for any additional allocations needed to store the token chain.    |
+`-------------------------------------------------------------------*/
+static void
+append_quote_token (struct obstack *obs, token_data *td)
+{
+  token_chain *src_chain = isp->u.u_c.chain;
+  token_chain *chain;
+  assert (isp->type == INPUT_CHAIN && obs && current_quote_age);
+
+  if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
+    {
+      TOKEN_DATA_TYPE (td) = TOKEN_COMP;
+      td->u.u_c.chain = td->u.u_c.end = NULL;
+    }
+  assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP);
+  make_text_link (obs, &td->u.u_c.chain, &td->u.u_c.end);
+  chain = (token_chain *) obstack_copy (obs, src_chain, sizeof *chain);
+  if (td->u.u_c.end)
+    td->u.u_c.end->next = chain;
+  else
+    td->u.u_c.chain = chain;
+  td->u.u_c.end = chain;
+  td->u.u_c.end->next = NULL;
+  isp->u.u_c.chain = src_chain->next;
+}
+
 /*------------------------------------------------------------------.
 | This function is for matching a string against a prefix of the    |
 | input stream.  If the string S matches the input and CONSUME is   |
@@ -848,14 +888,14 @@ match_input (const char *s, bool consume)
   if (s[1] == '\0')
     {
       if (consume)
-	(void) next_char ();
+	next_char (false);
       return true;			/* short match */
     }
 
-  (void) next_char ();
+  next_char (false);
   for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
     {
-      (void) next_char ();
+      next_char (false);
       n++;
       if (*s == '\0')		/* long match */
 	{
@@ -1016,7 +1056,6 @@ void
 set_word_regexp (const char *caller, const char *regexp)
 {
   int i;
-  char test[2];
   const char *msg;
   struct re_pattern_buffer new_word_regexp;
 
@@ -1048,15 +1087,10 @@ set_word_regexp (const char *caller, const char *regexp)
   default_word_regexp = false;
   set_quote_age ();
 
-  if (word_start == NULL)
-    word_start = (char *) xmalloc (256);
-
-  word_start[0] = '\0';
-  test[1] = '\0';
   for (i = 1; i < 256; i++)
     {
-      test[0] = i;
-      word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0;
+      char test = i;
+      word_start[i] = re_match (&word_regexp, &test, 1, 0, NULL) > 0;
     }
 }
 
@@ -1140,16 +1174,17 @@ safe_quotes (void)
 
 
 /*--------------------------------------------------------------------.
-| Parse and return a single token from the input stream.  A token     |
-| can either be TOKEN_EOF, if the input_stack is empty; it can be     |
-| TOKEN_STRING for a quoted string or comment; TOKEN_WORD for         |
-| something that is a potential macro name; and TOKEN_SIMPLE for any  |
-| single character that is not a part of any of the previous types.   |
-| If LINE is not NULL, set *LINE to the line where the token starts.  |
-| If OBS is not NULL, expand TOKEN_STRING directly into OBS rather    |
-| than in token_stack temporary storage area.  Report errors          |
-| (unterminated comments or strings) on behalf of CALLER, if          |
-| non-NULL.                                                           |
+| Parse a single token from the input stream, set TD to its           |
+| contents, and return its type.  A token is TOKEN_EOF if the         |
+| input_stack is empty; TOKEN_STRING for a quoted string or comment;  |
+| TOKEN_WORD for something that is a potential macro name; and        |
+| TOKEN_SIMPLE for any single character that is not a part of any of  |
+| the previous types.  If LINE is not NULL, set *LINE to the line     |
+| where the token starts.  If OBS is not NULL, expand TOKEN_STRING    |
+| directly into OBS rather than in token_stack temporary storage      |
+| area, and TD could be a TOKEN_COMP instead of the usual             |
+| TOKEN_TEXT.  Report errors (unterminated comments or strings) on    |
+| behalf of CALLER, if non-NULL.                                      |
 |                                                                     |
 | Next_token () returns the token type, and passes back a pointer to  |
 | the token data through TD.  Non-string token text is collected on   |
@@ -1165,7 +1200,6 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
   int quote_level;
   token_type type;
 #ifdef ENABLE_CHANGEWORD
-  int startpos;
   char *orig_text = NULL;
 #endif /* ENABLE_CHANGEWORD */
   const char *file;
@@ -1181,19 +1215,20 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
     line = &dummy;
 
   /* Can't consume character until after CHAR_MACRO is handled.  */
+  TOKEN_DATA_TYPE (td) = TOKEN_VOID;
   ch = peek_input ();
   if (ch == CHAR_EOF)
     {
 #ifdef DEBUG_INPUT
       xfprintf (stderr, "next_token -> EOF\n");
 #endif /* DEBUG_INPUT */
-      next_char ();
+      next_char (false);
       return TOKEN_EOF;
     }
   if (ch == CHAR_MACRO)
     {
       init_macro_token (td);
-      next_char ();
+      next_char (false);
 #ifdef DEBUG_INPUT
       xfprintf (stderr, "next_token -> MACDEF (%s)\n",
 		find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
@@ -1201,7 +1236,7 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
       return TOKEN_MACDEF;
     }
 
-  next_char (); /* Consume character we already peeked at.  */
+  next_char (false); /* Consume character we already peeked at.  */
   file = current_file;
   *line = current_line;
   if (MATCH (ch, bcomm.string, true))
@@ -1209,11 +1244,14 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
       if (obs)
 	obs_td = obs;
       obstack_grow (obs_td, bcomm.string, bcomm.length);
-      while ((ch = next_char ()) != CHAR_EOF
+      while ((ch = next_char (false)) < CHAR_EOF
 	     && !MATCH (ch, ecomm.string, true))
 	obstack_1grow (obs_td, ch);
       if (ch != CHAR_EOF)
-	obstack_grow (obs_td, ecomm.string, ecomm.length);
+	{
+	  assert (ch < CHAR_EOF);
+	  obstack_grow (obs_td, ecomm.string, ecomm.length);
+	}
       else
 	/* Current_file changed to "" if we see CHAR_EOF, use the
 	   previous value we stored earlier.  */
@@ -1225,10 +1263,10 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
   else if (default_word_regexp && (isalpha (ch) || ch == '_'))
     {
       obstack_1grow (&token_stack, ch);
-      while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
+      while ((ch = peek_input ()) < CHAR_EOF && (isalnum (ch) || ch == '_'))
 	{
 	  obstack_1grow (&token_stack, ch);
-	  (void) next_char ();
+	  next_char (false);
 	}
       type = TOKEN_WORD;
     }
@@ -1241,20 +1279,17 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
       while (1)
 	{
 	  ch = peek_input ();
-	  if (ch == CHAR_EOF)
+	  if (ch >= CHAR_EOF)
 	    break;
 	  obstack_1grow (&token_stack, ch);
-	  startpos = re_search (&word_regexp,
-				(char *) obstack_base (&token_stack),
-				obstack_object_size (&token_stack), 0, 0,
-				&regs);
-	  if (startpos != 0 ||
-	      regs.end [0] != obstack_object_size (&token_stack))
+	  if (re_match (&word_regexp, (char *) obstack_base (&token_stack),
+			obstack_object_size (&token_stack), 0, &regs)
+	      != obstack_object_size (&token_stack))
 	    {
 	      obstack_blank (&token_stack, -1);
 	      break;
 	    }
-	  next_char ();
+	  next_char (false);
 	}
 
       obstack_1grow (&token_stack, '\0');
@@ -1297,14 +1332,16 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
       quote_level = 1;
       while (1)
 	{
-	  ch = next_char ();
+	  ch = next_char (obs != NULL && current_quote_age);
 	  if (ch == CHAR_EOF)
 	    /* Current_file changed to "" if we see CHAR_EOF, use
 	       the previous value we stored earlier.  */
 	    m4_error_at_line (EXIT_FAILURE, 0, file, *line, caller,
 			      _("end of file in string"));
 
-	  if (MATCH (ch, rquote.string, true))
+	  if (ch == CHAR_QUOTE)
+	    append_quote_token (obs, td);
+	  else if (MATCH (ch, rquote.string, true))
 	    {
 	      if (--quote_level == 0)
 		break;
@@ -1316,35 +1353,49 @@ next_token (token_data *td, int *line, struct obstack *obs, const char *caller)
 	      obstack_grow (obs_td, lquote.string, lquote.length);
 	    }
 	  else
-	    obstack_1grow (obs_td, ch);
+	    {
+	      assert (ch < CHAR_EOF);
+	      obstack_1grow (obs_td, ch);
+	    }
 	}
       type = TOKEN_STRING;
     }
 
-  TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
-  TOKEN_DATA_LEN (td) = obstack_object_size (obs_td);
-  if (obs_td != obs)
+  if (TOKEN_DATA_TYPE (td) == TOKEN_VOID)
     {
-      obstack_1grow (obs_td, '\0');
-      TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td);
-    }
-  else
-    TOKEN_DATA_TEXT (td) = NULL;
-  TOKEN_DATA_QUOTE_AGE (td) = current_quote_age;
+      TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
+      TOKEN_DATA_LEN (td) = obstack_object_size (obs_td);
+      if (obs_td != obs)
+	{
+	  obstack_1grow (obs_td, '\0');
+	  TOKEN_DATA_TEXT (td) = (char *) obstack_finish (obs_td);
+	}
+      else
+	TOKEN_DATA_TEXT (td) = NULL;
+      TOKEN_DATA_QUOTE_AGE (td) = current_quote_age;
 #ifdef ENABLE_CHANGEWORD
-  if (orig_text == NULL)
-    TOKEN_DATA_ORIG_TEXT (td) = TOKEN_DATA_TEXT (td);
+      if (orig_text == NULL)
+	TOKEN_DATA_ORIG_TEXT (td) = TOKEN_DATA_TEXT (td);
+      else
+	{
+	  TOKEN_DATA_ORIG_TEXT (td) = orig_text;
+	  TOKEN_DATA_LEN (td) = strlen (orig_text);
+	}
+#endif /* ENABLE_CHANGEWORD */
+#ifdef DEBUG_INPUT
+      xfprintf (stderr, "next_token -> %s (%s), len %zu\n",
+		token_type_string (type), TOKEN_DATA_TEXT (td),
+		TOKEN_DATA_LEN (td));
+#endif /* DEBUG_INPUT */
+    }
   else
     {
-      TOKEN_DATA_ORIG_TEXT (td) = orig_text;
-      TOKEN_DATA_LEN (td) = strlen (orig_text);
-    }
-#endif /* ENABLE_CHANGEWORD */
+      assert (TOKEN_DATA_TYPE (td) == TOKEN_COMP && type == TOKEN_STRING);
 #ifdef DEBUG_INPUT
-  xfprintf (stderr, "next_token -> %s (%s), len %zu\n",
-	    token_type_string (type), TOKEN_DATA_TEXT (td),
-	    TOKEN_DATA_LEN (td));
+      xfprintf (stderr, "next_token -> %s <chain>\n",
+		token_type_string (type));
 #endif /* DEBUG_INPUT */
+    }
   return type;
 }
 
diff --git a/src/m4.h b/src/m4.h
index ea3947ff..474338b4 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -271,19 +271,20 @@ enum token_data_type
   TOKEN_VOID,	/* Token still being constructed, u is invalid.  */
   TOKEN_TEXT,	/* Straight text, u.u_t is valid.  */
   TOKEN_FUNC,	/* Builtin function definition, u.func is valid.  */
-  TOKEN_COMP	/* Composite argument, u.chain is valid.  */
+  TOKEN_COMP	/* Composite argument, u.u_c is valid.  */
 };
 
 /* Composite tokens are built of a linked list of chains.  */
 struct token_chain
 {
-  token_chain *next;	/* Pointer to next link of chain.  */
-  const char *str;	/* NUL-terminated string if text, else NULL.  */
-  size_t len;		/* Length of str, else 0.  */
-  int level;		/* Expansion level of link content, or -1.  */
-  macro_arguments *argv;/* Reference to earlier $@.  */
-  unsigned int index;	/* Argument index within argv.  */
-  bool flatten;		/* True to treat builtins as text.  */
+  token_chain *next;		/* Pointer to next link of chain.  */
+  unsigned int quote_age;	/* Quote_age of this link of chain, or 0.  */
+  const char *str;		/* NUL-terminated string if text, or NULL.  */
+  size_t len;			/* Length of str, else 0.  */
+  int level;			/* Expansion level of link content, or -1.  */
+  macro_arguments *argv;	/* Reference to earlier $@.  */
+  unsigned int index;		/* Argument index within argv.  */
+  bool flatten;			/* True to treat builtins as text.  */
 };
 
 /* The content of a token or macro argument.  */
@@ -319,7 +320,12 @@ struct token_data
 
       /* Composite text: a linked list of straight text and $@
 	 placeholders.  */
-      token_chain *chain;
+      struct
+	{
+	  token_chain *chain;	/* First link of the chain.  */
+	  token_chain *end;	/* Last link of the chain.  */
+	}
+      u_c;
     }
   u;
 };
@@ -342,6 +348,7 @@ token_type next_token (token_data *, int *, struct obstack *, const char *);
 void skip_line (const char *);
 
 /* push back input */
+void make_text_link (struct obstack *, token_chain **, token_chain **);
 void push_file (FILE *, const char *, bool);
 void push_macro (builtin_func *);
 struct obstack *push_string_init (void);
diff --git a/src/macro.c b/src/macro.c
index 228f82d5..bb8f4fa9 100644
--- a/src/macro.c
+++ b/src/macro.c
@@ -45,6 +45,9 @@ struct macro_arguments
   bool_bitfield inuse : 1;
   /* False if all arguments are just text or func, true if this argv
      refers to another one.  */
+  bool_bitfield wrapper : 1;
+  /* False if all arguments belong to this argv, true if some of them
+     include references to another.  */
   bool_bitfield has_ref : 1;
   const char *argv0; /* The macro name being expanded.  */
   size_t argv0_len; /* Length of argv0.  */
@@ -382,11 +385,16 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
 		    return t == TOKEN_COMMA;
 		  warn_builtin_concat (caller, TOKEN_DATA_FUNC (argp));
 		}
-	      obstack_1grow (obs, '\0');
-	      TOKEN_DATA_TYPE (argp) = TOKEN_TEXT;
-	      TOKEN_DATA_TEXT (argp) = (char *) obstack_finish (obs);
-	      TOKEN_DATA_LEN (argp) = len;
-	      TOKEN_DATA_QUOTE_AGE (argp) = age;
+	      if (TOKEN_DATA_TYPE (argp) != TOKEN_COMP)
+		{
+		  obstack_1grow (obs, '\0');
+		  TOKEN_DATA_TYPE (argp) = TOKEN_TEXT;
+		  TOKEN_DATA_TEXT (argp) = (char *) obstack_finish (obs);
+		  TOKEN_DATA_LEN (argp) = len;
+		  TOKEN_DATA_QUOTE_AGE (argp) = age;
+		}
+	      else
+		make_text_link (obs, NULL, &argp->u.u_c.end);
 	      return t == TOKEN_COMMA;
 	    }
 	  /* fallthru */
@@ -411,6 +419,23 @@ expand_argument (struct obstack *obs, token_data *argp, const char *caller)
 	case TOKEN_STRING:
 	  if (!expand_token (obs, t, &td, line, first))
 	    age = 0;
+	  if (TOKEN_DATA_TYPE (&td) == TOKEN_COMP)
+	    {
+	      if (TOKEN_DATA_TYPE (argp) != TOKEN_COMP)
+		{
+		  if (TOKEN_DATA_TYPE (argp) == TOKEN_FUNC)
+		    warn_builtin_concat (caller, TOKEN_DATA_FUNC (argp));
+		  TOKEN_DATA_TYPE (argp) = TOKEN_COMP;
+		  argp->u.u_c.chain = td.u.u_c.chain;
+		  argp->u.u_c.end = td.u.u_c.end;
+		}
+	      else
+		{
+		  assert (argp->u.u_c.end);
+		  argp->u.u_c.end->next = td.u.u_c.chain;
+		  argp->u.u_c.end = td.u.u_c.end;
+		}
+	    }
 	  break;
 
 	case TOKEN_MACDEF:
@@ -459,6 +484,7 @@ collect_arguments (symbol *sym, struct obstack *arguments,
 
   args.argc = 1;
   args.inuse = false;
+  args.wrapper = false;
   args.has_ref = false;
   args.argv0 = SYMBOL_NAME (sym);
   args.argv0_len = strlen (args.argv0);
@@ -490,11 +516,14 @@ collect_arguments (symbol *sym, struct obstack *arguments,
 	      && TOKEN_DATA_LEN (tdp) > 0
 	      && TOKEN_DATA_QUOTE_AGE (tdp) != args.quote_age)
 	    args.quote_age = 0;
+	  else if (TOKEN_DATA_TYPE (tdp) == TOKEN_COMP)
+	    args.has_ref = true;
 	}
       while (more_args);
     }
   argv = (macro_arguments *) obstack_finish (argv_stack);
   argv->argc = args.argc;
+  argv->has_ref = args.has_ref;
   if (args.quote_age != quote_age ())
     argv->quote_age = 0;
   argv->arraylen = args.arraylen;
@@ -633,8 +662,23 @@ expand_macro (symbol *sym)
   if (SYMBOL_DELETED (sym))
     free_symbol (sym);
 
-  /* If argv contains references, those refcounts can be reduced now.  */
-  // TODO - support references in argv
+  /* If argv contains references, those refcounts must be reduced now.  */
+  if (argv->has_ref)
+    {
+      token_chain *chain;
+      size_t i;
+      for (i = 0; i < argv->arraylen; i++)
+	if (TOKEN_DATA_TYPE (argv->array[i]) == TOKEN_COMP)
+	  {
+	    chain = argv->array[i]->u.u_c.chain;
+	    while (chain)
+	      {
+		if (chain->level >= 0)
+		  adjust_refcount (chain->level, false);
+		chain = chain->next;
+	      }
+	  }
+    }
 
   /* We no longer need argv, so reduce the refcount.  Additionally, if
      no other references to argv were created, we can free our portion
@@ -698,7 +742,7 @@ arg_token (macro_arguments *argv, unsigned int index)
   token_data *token;
 
   assert (index && index < argv->argc);
-  if (!argv->has_ref)
+  if (!argv->wrapper)
     return argv->array[index - 1];
   /* Must cycle through all tokens, until we find index, since a ref
      may occupy multiple indices.  */
@@ -707,7 +751,7 @@ arg_token (macro_arguments *argv, unsigned int index)
       token = argv->array[i];
       if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
 	{
-	  token_chain *chain = token->u.chain;
+	  token_chain *chain = token->u.u_c.chain;
 	  // TODO for now we support only a single-length $@ chain...
 	  assert (!chain->next && !chain->str);
 	  if (index < chain->argv->argc - (chain->index - 1))
@@ -731,14 +775,14 @@ static void
 arg_mark (macro_arguments *argv)
 {
   argv->inuse = true;
-  if (argv->has_ref)
+  if (argv->wrapper)
     {
       // TODO for now we support only a single-length $@ chain...
       assert (argv->arraylen == 1
 	      && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP
-	      && !argv->array[0]->u.chain->next
-	      && !argv->array[0]->u.chain->str);
-      argv->array[0]->u.chain->argv->inuse = true;
+	      && !argv->array[0]->u.u_c.chain->next
+	      && !argv->array[0]->u.u_c.chain->str);
+      argv->array[0]->u.u_c.chain->argv->inuse = true;
     }
 }
 
@@ -761,17 +805,22 @@ arg_type (macro_arguments *argv, unsigned int index)
     return TOKEN_TEXT;
   token = arg_token (argv, index);
   type = TOKEN_DATA_TYPE (token);
-  assert (type != TOKEN_COMP);
+  /* Composite tokens are currently sequences of text only.  */
+  if (type == TOKEN_COMP)
+    type = TOKEN_TEXT;
   return type;
 }
 
 /* Given ARGV, return the text at argument INDEX.  Abort if the
    argument is not text.  Index 0 is always text, and indices beyond
-   argc return the empty string.  */
+   argc return the empty string.  The result is always NUL-terminated,
+   even if it includes embedded NUL characters.  */
 const char *
 arg_text (macro_arguments *argv, unsigned int index)
 {
   token_data *token;
+  token_chain *chain;
+  struct obstack *obs;
 
   if (index == 0)
     return argv->argv0;
@@ -783,8 +832,19 @@ arg_text (macro_arguments *argv, unsigned int index)
     case TOKEN_TEXT:
       return TOKEN_DATA_TEXT (token);
     case TOKEN_COMP:
-      // TODO - how to concatenate multiple arguments?  For now, we expect
-      // only one element in the chain, and arg_token dereferences it...
+      // TODO - concatenate argv refs, or even functions?  For now, we assume
+      // all chain elements are text.
+      chain = token->u.u_c.chain;
+      obs = arg_scratch ();
+      while (chain)
+	{
+	  // TODO - cache compiled chains?
+	  assert (chain->str);
+	  obstack_grow (obs, chain->str, chain->len);
+	  chain = chain->next;
+	}
+      obstack_1grow (obs, '\0');
+      return (char *) obstack_finish (obs);
     default:
       break;
     }
@@ -801,14 +861,84 @@ arg_equal (macro_arguments *argv, unsigned int indexa, unsigned int indexb)
 {
   token_data *ta = arg_token (argv, indexa);
   token_data *tb = arg_token (argv, indexb);
+  token_chain tmpa;
+  token_chain tmpb;
+  token_chain *ca = &tmpa;
+  token_chain *cb = &tmpb;
 
+  /* Quick tests.  */
   if (ta == &empty_token || tb == &empty_token)
     return ta == tb;
+  if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT
+      && TOKEN_DATA_TYPE (tb) == TOKEN_TEXT)
+    return (TOKEN_DATA_LEN (ta) == TOKEN_DATA_LEN (tb)
+	    && memcmp (TOKEN_DATA_TEXT (ta), TOKEN_DATA_TEXT (tb),
+		       TOKEN_DATA_LEN (ta)) == 0);
+
+  /* Convert both arguments to chains, if not one already.  */
   // TODO - allow builtin tokens in the comparison?
-  assert (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT
-	  && TOKEN_DATA_TYPE (tb) == TOKEN_TEXT);
-  return (TOKEN_DATA_LEN (ta) == TOKEN_DATA_LEN (tb)
-	  && strcmp (TOKEN_DATA_TEXT (ta), TOKEN_DATA_TEXT (tb)) == 0);
+  if (TOKEN_DATA_TYPE (ta) == TOKEN_TEXT)
+    {
+      tmpa.next = NULL;
+      tmpa.str = TOKEN_DATA_TEXT (ta);
+      tmpa.len = TOKEN_DATA_LEN (ta);
+    }
+  else
+    {
+      assert (TOKEN_DATA_TYPE (ta) == TOKEN_COMP);
+      ca = ta->u.u_c.chain;
+    }
+  if (TOKEN_DATA_TYPE (tb) == TOKEN_TEXT)
+    {
+      tmpb.next = NULL;
+      tmpb.str = TOKEN_DATA_TEXT (tb);
+      tmpb.len = TOKEN_DATA_LEN (tb);
+    }
+  else
+    {
+      assert (TOKEN_DATA_TYPE (tb) == TOKEN_COMP);
+      cb = tb->u.u_c.chain;
+    }
+
+  /* Compare each link of the chain.  */
+  while (ca && cb)
+    {
+      // TODO support comparison against $@ refs.
+      assert (ca->str && cb->str);
+      if (ca->len == cb->len)
+	{
+	  if (memcmp (ca->str, cb->str, ca->len) != 0)
+	    return false;
+	  ca = ca->next;
+	  cb = cb->next;
+	}
+      else if (ca->len < cb->len)
+	{
+	  if (memcmp (ca->str, cb->str, ca->len) != 0)
+	    return false;
+	  tmpb.next = cb->next;
+	  tmpb.str = cb->str + ca->len;
+	  tmpb.len = cb->len - ca->len;
+	  ca = ca->next;
+	  cb = &tmpb;
+	}
+      else
+	{
+	  assert (ca->len > cb->len);
+	  if (memcmp (ca->str, cb->str, cb->len) != 0)
+	    return false;
+	  tmpa.next = ca->next;
+	  tmpa.str = ca->str + cb->len;
+	  tmpa.len = ca->len - cb->len;
+	  ca = &tmpa;
+	  cb = cb->next;
+	}
+    }
+
+  /* If we get this far, the two tokens are equal only if both chains
+     are exhausted.  */
+  assert (ca != cb || ca == NULL);
+  return ca == cb;
 }
 
 /* Given ARGV, return true if argument INDEX is the empty string.
@@ -830,6 +960,8 @@ size_t
 arg_len (macro_arguments *argv, unsigned int index)
 {
   token_data *token;
+  token_chain *chain;
+  size_t len;
 
   if (index == 0)
     return argv->argv0_len;
@@ -842,8 +974,18 @@ arg_len (macro_arguments *argv, unsigned int index)
       assert ((token == &empty_token) == (TOKEN_DATA_LEN (token) == 0));
       return TOKEN_DATA_LEN (token);
     case TOKEN_COMP:
-      // TODO - how to concatenate multiple arguments?  For now, we expect
-      // only one element in the chain, and arg_token dereferences it...
+      // TODO - concatenate argv refs, or even functions?  For now, we assume
+      // all chain elements are text.
+      chain = token->u.u_c.chain;
+      len = 0;
+      while (chain)
+	{
+	  assert (chain->str);
+	  len += chain->len;
+	  chain = chain->next;
+	}
+      assert (len);
+      return len;
     default:
       break;
     }
@@ -892,12 +1034,12 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len,
 
   /* When making a reference through a reference, point to the
      original if possible.  */
-  if (argv->has_ref)
+  if (argv->wrapper)
     {
       // TODO for now we support only a single-length $@ chain...
       assert (argv->arraylen == 1
 	      && TOKEN_DATA_TYPE (argv->array[0]) == TOKEN_COMP);
-      chain = argv->array[0]->u.chain;
+      chain = argv->array[0]->u.u_c.chain;
       assert (!chain->next && !chain->str);
       argv = chain->argv;
       index += chain->index - 1;
@@ -907,6 +1049,7 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len,
       new_argv = (macro_arguments *)
 	obstack_alloc (obs, offsetof (macro_arguments, array));
       new_argv->arraylen = 0;
+      new_argv->wrapper = false;
       new_argv->has_ref = false;
     }
   else
@@ -918,10 +1061,12 @@ make_argv_ref (macro_arguments *argv, const char *argv0, size_t argv0_len,
       chain = (token_chain *) obstack_alloc (obs, sizeof *chain);
       new_argv->arraylen = 1;
       new_argv->array[0] = token;
+      new_argv->wrapper = true;
       new_argv->has_ref = true;
       TOKEN_DATA_TYPE (token) = TOKEN_COMP;
-      token->u.chain = chain;
+      token->u.u_c.chain = token->u.u_c.end = chain;
       chain->next = NULL;
+      chain->quote_age = argv->quote_age;
       chain->str = NULL;
       chain->len = 0;
       chain->level = expansion_level - 1;
@@ -955,9 +1100,23 @@ push_arg (struct obstack *obs, macro_arguments *argv, unsigned int index)
     return;
   token = arg_token (argv, index);
   // TODO handle func tokens?
-  assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
-  if (push_token (token, expansion_level - 1))
-    arg_mark (argv);
+  if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
+    {
+      if (push_token (token, expansion_level - 1))
+	arg_mark (argv);
+    }
+  else if (TOKEN_DATA_TYPE (token) == TOKEN_COMP)
+    {
+      // TODO - really handle composites; for now, just flatten the
+      // composite and push its text
+      token_chain *chain = token->u.u_c.chain;
+      while (chain)
+	{
+	  assert (chain->str);
+	  obstack_grow (obs, chain->str, chain->len);
+	  chain = chain->next;
+	}
+    }
 }
 
 /* Push series of comma-separated arguments from ARGV, which should
@@ -968,6 +1127,7 @@ void
 push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
 {
   token_data *token;
+  token_chain *chain;
   unsigned int i = skip ? 2 : 1;
   const char *sep = ",";
   size_t sep_len = 1;
@@ -1007,8 +1167,20 @@ push_args (struct obstack *obs, macro_arguments *argv, bool skip, bool quote)
       else
 	use_sep = true;
       // TODO handle func tokens?
-      assert (TOKEN_DATA_TYPE (token) == TOKEN_TEXT);
-      inuse |= push_token (token, expansion_level - 1);
+      if (TOKEN_DATA_TYPE (token) == TOKEN_TEXT)
+	inuse |= push_token (token, expansion_level - 1);
+      else
+	{
+	  // TODO - handle composite text in push_token
+	  assert (TOKEN_DATA_TYPE (token) == TOKEN_COMP);
+	  chain = token->u.u_c.chain;
+	  while (chain)
+	    {
+	      assert (chain->str);
+	      obstack_grow (obs, chain->str, chain->len);
+	      chain = chain->next;
+	    }
+	}
     }
   if (quote)
     obstack_grow (obs, rquote.string, rquote.length);
author	Eric Blake <ebb9@byu.net>	2007-10-27 05:44:09 -0600
committer	Eric Blake <ebb9@byu.net>	2008-01-19 15:24:14 -0700
commit	b1fef201f5d121e25e5dd61ec8ca3eac41a899ba (patch)
tree	a76568b9a8d798657905f9090405a87b3c933628
parent	622fc8cb2cb6ce0fc7391a6414bb0aaffeec6fc0 (diff)
download	m4-b1fef201f5d121e25e5dd61ec8ca3eac41a899ba.tar.gz