1 files changed, 3592 insertions, 0 deletions
diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c
new file mode 100644
index 0000000..571f6de
--- /dev/null
+++ b/gettext-tools/src/x-perl.c
@@ -0,0 +1,3592 @@
+/* xgettext Perl backend.
+   Copyright (C) 2002-2010 Free Software Foundation, Inc.
+
+   This file was written by Guido Flohr <guido@imperia.net>, 2002-2010.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/* Specification.  */
+#include "x-perl.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "xgettext.h"
+#include "error.h"
+#include "error-progname.h"
+#include "xalloc.h"
+#include "po-charset.h"
+#include "unistr.h"
+#include "uniname.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+/* The Perl syntax is defined in perlsyn.pod.  Try the command
+   "man perlsyn" or "perldoc perlsyn".
+   Also, the syntax after the 'sub' keyword is specified in perlsub.pod.
+   Try the command "man perlsub" or "perldoc perlsub".
+   Perl 5.10 has new operators '//' and '//=', see
+   <http://perldoc.perl.org/perldelta.html#Defined-or-operator>.  */
+
+#define DEBUG_PERL 0
+
+
+/* ====================== Keyword set customization.  ====================== */
+
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_perl_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_perl_keyword (const char *name)
+{
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      struct callshape shape;
+      const char *colon;
+
+      if (keywords.table == NULL)
+        hash_init (&keywords, 100);
+
+      split_keywordspec (name, &end, &shape);
+
+      /* The characters between name and end should form a valid C identifier.
+         A colon means an invalid parse in split_keywordspec().  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+        insert_keyword_callshape (&keywords, name, end - name, &shape);
+    }
+}
+
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      /* When adding new keywords here, also update the documentation in
+         xgettext.texi!  */
+      x_perl_keyword ("gettext");
+      x_perl_keyword ("%gettext");
+      x_perl_keyword ("$gettext");
+      x_perl_keyword ("dgettext:2");
+      x_perl_keyword ("dcgettext:2");
+      x_perl_keyword ("ngettext:1,2");
+      x_perl_keyword ("dngettext:2,3");
+      x_perl_keyword ("dcngettext:2,3");
+      x_perl_keyword ("gettext_noop");
+#if 0
+      x_perl_keyword ("__");
+      x_perl_keyword ("$__");
+      x_perl_keyword ("%__");
+      x_perl_keyword ("__x");
+      x_perl_keyword ("__n:1,2");
+      x_perl_keyword ("__nx:1,2");
+      x_perl_keyword ("__xn:1,2");
+      x_perl_keyword ("N__");
+#endif
+      default_keywords = false;
+    }
+}
+
+void
+init_flag_table_perl ()
+{
+  xgettext_record_flag ("gettext:1:pass-perl-format");
+  xgettext_record_flag ("gettext:1:pass-perl-brace-format");
+  xgettext_record_flag ("%gettext:1:pass-perl-format");
+  xgettext_record_flag ("%gettext:1:pass-perl-brace-format");
+  xgettext_record_flag ("$gettext:1:pass-perl-format");
+  xgettext_record_flag ("$gettext:1:pass-perl-brace-format");
+  xgettext_record_flag ("dgettext:2:pass-perl-format");
+  xgettext_record_flag ("dgettext:2:pass-perl-brace-format");
+  xgettext_record_flag ("dcgettext:2:pass-perl-format");
+  xgettext_record_flag ("dcgettext:2:pass-perl-brace-format");
+  xgettext_record_flag ("ngettext:1:pass-perl-format");
+  xgettext_record_flag ("ngettext:2:pass-perl-format");
+  xgettext_record_flag ("ngettext:1:pass-perl-brace-format");
+  xgettext_record_flag ("ngettext:2:pass-perl-brace-format");
+  xgettext_record_flag ("dngettext:2:pass-perl-format");
+  xgettext_record_flag ("dngettext:3:pass-perl-format");
+  xgettext_record_flag ("dngettext:2:pass-perl-brace-format");
+  xgettext_record_flag ("dngettext:3:pass-perl-brace-format");
+  xgettext_record_flag ("dcngettext:2:pass-perl-format");
+  xgettext_record_flag ("dcngettext:3:pass-perl-format");
+  xgettext_record_flag ("dcngettext:2:pass-perl-brace-format");
+  xgettext_record_flag ("dcngettext:3:pass-perl-brace-format");
+  xgettext_record_flag ("gettext_noop:1:pass-perl-format");
+  xgettext_record_flag ("gettext_noop:1:pass-perl-brace-format");
+  xgettext_record_flag ("printf:1:perl-format"); /* argument 1 or 2 ?? */
+  xgettext_record_flag ("sprintf:1:perl-format");
+#if 0
+  xgettext_record_flag ("__:1:pass-perl-format");
+  xgettext_record_flag ("__:1:pass-perl-brace-format");
+  xgettext_record_flag ("%__:1:pass-perl-format");
+  xgettext_record_flag ("%__:1:pass-perl-brace-format");
+  xgettext_record_flag ("$__:1:pass-perl-format");
+  xgettext_record_flag ("$__:1:pass-perl-brace-format");
+  xgettext_record_flag ("__x:1:perl-brace-format");
+  xgettext_record_flag ("__n:1:pass-perl-format");
+  xgettext_record_flag ("__n:2:pass-perl-format");
+  xgettext_record_flag ("__n:1:pass-perl-brace-format");
+  xgettext_record_flag ("__n:2:pass-perl-brace-format");
+  xgettext_record_flag ("__nx:1:perl-brace-format");
+  xgettext_record_flag ("__nx:2:perl-brace-format");
+  xgettext_record_flag ("__xn:1:perl-brace-format");
+  xgettext_record_flag ("__xn:2:perl-brace-format");
+  xgettext_record_flag ("N__:1:pass-perl-format");
+  xgettext_record_flag ("N__:1:pass-perl-brace-format");
+#endif
+}
+
+
+/* ======================== Reading of characters.  ======================== */
+
+/* Real filename, used in error messages about the input file.  */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages.  */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream.  */
+static FILE *fp;
+
+/* The current line buffer.  */
+static char *linebuf;
+
+/* The size of the current line.  */
+static int linesize;
+
+/* The position in the current line.  */
+static int linepos;
+
+/* The size of the input buffer.  */
+static size_t linebuf_size;
+
+/* Number of lines eaten for here documents.  */
+static int eaten_here;
+
+/* Paranoia: EOF marker for __END__ or __DATA__.  */
+static bool end_of_file;
+
+
+/* 1. line_number handling.  */
+
+/* Returns the next character from the input stream or EOF.  */
+static int
+phase1_getc ()
+{
+  line_number += eaten_here;
+  eaten_here = 0;
+
+  if (end_of_file)
+    return EOF;
+
+  if (linepos >= linesize)
+    {
+      linesize = getline (&linebuf, &linebuf_size, fp);
+
+      if (linesize < 0)
+        {
+          if (ferror (fp))
+            error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+                   real_file_name);
+          end_of_file = true;
+          return EOF;
+        }
+
+      linepos = 0;
+      ++line_number;
+
+      /* Undosify.  This is important for catching the end of <<EOF and
+         <<'EOF'.  We could rely on stdio doing this for us but
+         it is not uncommon to to come across Perl scripts with CRLF
+         newline conventions on systems that do not follow this
+         convention.  */
+      if (linesize >= 2 && linebuf[linesize - 1] == '\n'
+          && linebuf[linesize - 2] == '\r')
+        {
+          linebuf[linesize - 2] = '\n';
+          linebuf[linesize - 1] = '\0';
+          --linesize;
+        }
+    }
+
+  return linebuf[linepos++];
+}
+
+/* Supports only one pushback character.  */
+static void
+phase1_ungetc (int c)
+{
+  if (c != EOF)
+    {
+      if (linepos == 0)
+        /* Attempt to ungetc across line boundary.  Shouldn't happen.
+           No two phase1_ungetc calls are permitted in a row.  */
+        abort ();
+
+      --linepos;
+    }
+}
+
+/* Read a here document and return its contents.
+   The delimiter is an UTF-8 encoded string; the resulting string is UTF-8
+   encoded as well.  */
+
+static char *
+get_here_document (const char *delimiter)
+{
+  /* Accumulator for the entire here document, including a NUL byte
+     at the end.  */
+  static char *buffer;
+  static size_t bufmax = 0;
+  size_t bufpos = 0;
+  /* Current line being appended.  */
+  static char *my_linebuf = NULL;
+  static size_t my_linebuf_size = 0;
+
+  /* Allocate the initial buffer.  Later on, bufmax > 0.  */
+  if (bufmax == 0)
+    {
+      buffer = XNMALLOC (1, char);
+      buffer[0] = '\0';
+      bufmax = 1;
+    }
+
+  for (;;)
+    {
+      int read_bytes = getline (&my_linebuf, &my_linebuf_size, fp);
+      char *my_line_utf8;
+      bool chomp;
+
+      if (read_bytes < 0)
+        {
+          if (ferror (fp))
+            {
+              error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+                     real_file_name);
+            }
+          else
+            {
+              error_with_progname = false;
+              error (EXIT_SUCCESS, 0, _("\
+%s:%d: can't find string terminator \"%s\" anywhere before EOF"),
+                     real_file_name, line_number, delimiter);
+              error_with_progname = true;
+
+              break;
+            }
+        }
+
+      ++eaten_here;
+
+      /* Convert to UTF-8.  */
+      my_line_utf8 =
+        from_current_source_encoding (my_linebuf, lc_string, logical_file_name,
+                                      line_number + eaten_here);
+      if (my_line_utf8 != my_linebuf)
+        {
+          if (strlen (my_line_utf8) >= my_linebuf_size)
+            {
+              my_linebuf_size = strlen (my_line_utf8) + 1;
+              my_linebuf = xrealloc (my_linebuf, my_linebuf_size);
+            }
+          strcpy (my_linebuf, my_line_utf8);
+          free (my_line_utf8);
+        }
+
+      /* Undosify.  This is important for catching the end of <<EOF and
+         <<'EOF'.  We could rely on stdio doing this for us but you
+         it is not uncommon to to come across Perl scripts with CRLF
+         newline conventions on systems that do not follow this
+         convention.  */
+      if (read_bytes >= 2 && my_linebuf[read_bytes - 1] == '\n'
+          && my_linebuf[read_bytes - 2] == '\r')
+        {
+          my_linebuf[read_bytes - 2] = '\n';
+          my_linebuf[read_bytes - 1] = '\0';
+          --read_bytes;
+        }
+
+      /* Temporarily remove the trailing newline from my_linebuf.  */
+      chomp = false;
+      if (read_bytes >= 1 && my_linebuf[read_bytes - 1] == '\n')
+        {
+          chomp = true;
+          my_linebuf[read_bytes - 1] = '\0';
+        }
+
+      /* See whether this line terminates the here document.  */
+      if (strcmp (my_linebuf, delimiter) == 0)
+        break;
+
+      /* Add back the trailing newline to my_linebuf.  */
+      if (chomp)
+        my_linebuf[read_bytes - 1] = '\n';
+
+      /* Ensure room for read_bytes + 1 bytes.  */
+      if (bufpos + read_bytes >= bufmax)
+        {
+          do
+            bufmax = 2 * bufmax + 10;
+          while (bufpos + read_bytes >= bufmax);
+          buffer = xrealloc (buffer, bufmax);
+        }
+      /* Append this line to the accumulator.  */
+      strcpy (buffer + bufpos, my_linebuf);
+      bufpos += read_bytes;
+    }
+
+  /* Done accumulating the here document.  */
+  return xstrdup (buffer);
+}
+
+/* Skips pod sections.  */
+static void
+skip_pod ()
+{
+  line_number += eaten_here;
+  eaten_here = 0;
+  linepos = 0;
+
+  for (;;)
+    {
+      linesize = getline (&linebuf, &linebuf_size, fp);
+
+      if (linesize < 0)
+        {
+          if (ferror (fp))
+            error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+                   real_file_name);
+          return;
+        }
+
+      ++line_number;
+
+      if (strncmp ("=cut", linebuf, 4) == 0)
+        {
+          /* Force reading of a new line on next call to phase1_getc().  */
+          linepos = linesize;
+          return;
+        }
+    }
+}
+
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* 2. Replace each comment that is not inside a string literal or regular
+   expression with a newline character.  We need to remember the comment
+   for later, because it may be attached to a keyword string.  */
+
+static int
+phase2_getc ()
+{
+  static char *buffer;
+  static size_t bufmax;
+  size_t buflen;
+  int lineno;
+  int c;
+  char *utf8_string;
+
+  c = phase1_getc ();
+  if (c == '#')
+    {
+      buflen = 0;
+      lineno = line_number;
+      /* Skip leading whitespace.  */
+      for (;;)
+        {
+          c = phase1_getc ();
+          if (c == EOF)
+            break;
+          if (c != ' ' && c != '\t' && c != '\r' && c != '\f')
+            {
+              phase1_ungetc (c);
+              break;
+            }
+        }
+      /* Accumulate the comment.  */
+      for (;;)
+        {
+          c = phase1_getc ();
+          if (c == '\n' || c == EOF)
+            break;
+          if (buflen >= bufmax)
+            {
+              bufmax = 2 * bufmax + 10;
+              buffer = xrealloc (buffer, bufmax);
+            }
+          buffer[buflen++] = c;
+        }
+      if (buflen >= bufmax)
+        {
+          bufmax = 2 * bufmax + 10;
+          buffer = xrealloc (buffer, bufmax);
+        }
+      buffer[buflen] = '\0';
+      /* Convert it to UTF-8.  */
+      utf8_string =
+        from_current_source_encoding (buffer, lc_comment, logical_file_name,
+                                      lineno);
+      /* Save it until we encounter the corresponding string.  */
+      savable_comment_add (utf8_string);
+      last_comment_line = lineno;
+    }
+  return c;
+}
+
+/* Supports only one pushback character.  */
+static void
+phase2_ungetc (int c)
+{
+  if (c != EOF)
+    phase1_ungetc (c);
+}
+
+/* Whitespace recognition.  */
+
+#define case_whitespace \
+  case ' ': case '\t': case '\r': case '\n': case '\f'
+
+static inline bool
+is_whitespace (int c)
+{
+  return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
+}
+
+
+/* ========================== Reading of tokens.  ========================== */
+
+
+enum token_type_ty
+{
+  token_type_eof,
+  token_type_lparen,            /* ( */
+  token_type_rparen,            /* ) */
+  token_type_comma,             /* , */
+  token_type_fat_comma,         /* => */
+  token_type_dereference,       /* -> */
+  token_type_semicolon,         /* ; */
+  token_type_lbrace,            /* { */
+  token_type_rbrace,            /* } */
+  token_type_lbracket,          /* [ */
+  token_type_rbracket,          /* ] */
+  token_type_string,            /* quote-like */
+  token_type_number,            /* starting with a digit o dot */
+  token_type_named_op,          /* if, unless, while, ... */
+  token_type_variable,          /* $... */
+  token_type_object,            /* A dereferenced variable, maybe a blessed
+                                   object.  */
+  token_type_symbol,            /* symbol, number */
+  token_type_regex_op,          /* s, tr, y, m.  */
+  token_type_dot,               /* . */
+  token_type_other,             /* regexp, misc. operator */
+  /* The following are not really token types, but variants used by
+     the parser.  */
+  token_type_keyword_symbol     /* keyword symbol */
+};
+typedef enum token_type_ty token_type_ty;
+
+/* Subtypes for strings, important for interpolation.  */
+enum string_type_ty
+{
+  string_type_verbatim,     /* "<<'EOF'", "m'...'", "s'...''...'",
+                               "tr/.../.../", "y/.../.../".  */
+  string_type_q,            /* "'..'", "q/.../".  */
+  string_type_qq,           /* '"..."', "`...`", "qq/.../", "qx/.../",
+                               "<file*glob>".  */
+  string_type_qr            /* Not supported.  */
+};
+
+/* Subtypes for symbols, important for dollar interpretation.  */
+enum symbol_type_ty
+{
+  symbol_type_none,         /* Nothing special.  */
+  symbol_type_sub,          /* 'sub'.  */
+  symbol_type_function      /* Function name after 'sub'.  */
+};
+
+typedef struct token_ty token_ty;
+struct token_ty
+{
+  token_type_ty type;
+  token_type_ty last_type;
+  int sub_type;                 /* for token_type_string, token_type_symbol */
+  char *string;                 /* for:                 in encoding:
+                                   token_type_named_op  ASCII
+                                   token_type_string    UTF-8
+                                   token_type_symbol    ASCII
+                                   token_type_variable  global_source_encoding
+                                   token_type_object    global_source_encoding
+                                 */
+  refcounted_string_list_ty *comment; /* for token_type_string */
+  int line_number;
+};
+
+#if DEBUG_PERL
+static const char *
+token2string (const token_ty *token)
+{
+  switch (token->type)
+    {
+    case token_type_eof:
+      return "token_type_eof";
+    case token_type_lparen:
+      return "token_type_lparen";
+    case token_type_rparen:
+      return "token_type_rparen";
+    case token_type_comma:
+      return "token_type_comma";
+    case token_type_fat_comma:
+      return "token_type_fat_comma";
+    case token_type_dereference:
+      return "token_type_dereference";
+    case token_type_semicolon:
+      return "token_type_semicolon";
+    case token_type_lbrace:
+      return "token_type_lbrace";
+    case token_type_rbrace:
+      return "token_type_rbrace";
+    case token_type_lbracket:
+      return "token_type_lbracket";
+    case token_type_rbracket:
+      return "token_type_rbracket";
+    case token_type_string:
+      return "token_type_string";
+    case token_type_number:
+      return "token type number";
+    case token_type_named_op:
+      return "token_type_named_op";
+    case token_type_variable:
+      return "token_type_variable";
+    case token_type_object:
+      return "token_type_object";
+    case token_type_symbol:
+      return "token_type_symbol";
+    case token_type_regex_op:
+      return "token_type_regex_op";
+    case token_type_dot:
+      return "token_type_dot";
+    case token_type_other:
+      return "token_type_other";
+    default:
+      return "unknown";
+    }
+}
+#endif
+
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (token_ty *tp)
+{
+  switch (tp->type)
+    {
+    case token_type_named_op:
+    case token_type_string:
+    case token_type_symbol:
+    case token_type_variable:
+    case token_type_object:
+      free (tp->string);
+      break;
+    default:
+      break;
+    }
+  if (tp->type == token_type_string)
+    drop_reference (tp->comment);
+  free (tp);
+}
+
+/* Pass 1 of extracting quotes: Find the end of the string, regardless
+   of the semantics of the construct.  Return the complete string,
+   including the starting and the trailing delimiter, with backslashes
+   removed where appropriate.  */
+static char *
+extract_quotelike_pass1 (int delim)
+{
+  /* This function is called recursively.  No way to allocate stuff
+     statically.  Also alloca() is inappropriate due to limited stack
+     size on some platforms.  So we use malloc().  */
+  int bufmax = 10;
+  char *buffer = XNMALLOC (bufmax, char);
+  int bufpos = 0;
+  bool nested = true;
+  int counter_delim;
+
+  buffer[bufpos++] = delim;
+
+  /* Find the closing delimiter.  */
+  switch (delim)
+    {
+    case '(':
+      counter_delim = ')';
+      break;
+    case '{':
+      counter_delim = '}';
+      break;
+    case '[':
+      counter_delim = ']';
+      break;
+    case '<':
+      counter_delim = '>';
+      break;
+    default: /* "..." or '...' or |...| etc. */
+      nested = false;
+      counter_delim = delim;
+      break;
+    }
+
+  for (;;)
+    {
+      int c = phase1_getc ();
+
+      /* This round can produce 1 or 2 bytes.  Ensure room for 2 bytes.  */
+      if (bufpos + 2 > bufmax)
+        {
+          bufmax = 2 * bufmax + 10;
+          buffer = xrealloc (buffer, bufmax);
+        }
+
+      if (c == counter_delim || c == EOF)
+        {
+          buffer[bufpos++] = counter_delim; /* will be stripped off later */
+          buffer[bufpos++] = '\0';
+#if DEBUG_PERL
+          fprintf (stderr, "PASS1: %s\n", buffer);
+#endif
+          return buffer;
+        }
+
+      if (nested && c == delim)
+        {
+          char *inner = extract_quotelike_pass1 (delim);
+          size_t len = strlen (inner);
+
+          /* Ensure room for len + 1 bytes.  */
+          if (bufpos + len >= bufmax)
+            {
+              do
+                bufmax = 2 * bufmax + 10;
+              while (bufpos + len >= bufmax);
+              buffer = xrealloc (buffer, bufmax);
+            }
+          strcpy (buffer + bufpos, inner);
+          free (inner);
+          bufpos += len;
+        }
+      else if (c == '\\')
+        {
+          c = phase1_getc ();
+          if (c == '\\')
+            {
+              buffer[bufpos++] = '\\';
+              buffer[bufpos++] = '\\';
+            }
+          else if (c == delim || c == counter_delim)
+            {
+              /* This is pass2 in Perl.  */
+              buffer[bufpos++] = c;
+            }
+          else
+            {
+              buffer[bufpos++] = '\\';
+              phase1_ungetc (c);
+            }
+        }
+      else
+        {
+          buffer[bufpos++] = c;
+        }
+    }
+}
+
+/* Like extract_quotelike_pass1, but return the complete string in UTF-8
+   encoding.  */
+static char *
+extract_quotelike_pass1_utf8 (int delim)
+{
+  char *string = extract_quotelike_pass1 (delim);
+  char *utf8_string =
+    from_current_source_encoding (string, lc_string, logical_file_name,
+                                  line_number);
+  if (utf8_string != string)
+    free (string);
+  return utf8_string;
+}
+
+
+/* ========= Reading of tokens and commands.  Extracting strings.  ========= */
+
+
+/* Context lookup table.  */
+static flag_context_list_table_ty *flag_context_list_table;
+
+
+/* Forward declaration of local functions.  */
+static void interpolate_keywords (message_list_ty *mlp, const char *string,
+                                  int lineno);
+static token_ty *x_perl_lex (message_list_ty *mlp);
+static void x_perl_unlex (token_ty *tp);
+static bool extract_balanced (message_list_ty *mlp,
+                              token_type_ty delim, bool eat_delim,
+                              bool comma_delim,
+                              flag_context_ty outer_context,
+                              flag_context_list_iterator_ty context_iter,
+                              int arg, struct arglist_parser *argparser);
+
+
+/* Extract an unsigned hexadecimal number from STRING, considering at
+   most LEN bytes and place the result in *RESULT.  Returns a pointer
+   to the first character past the hexadecimal number.  */
+static const char *
+extract_hex (const char *string, size_t len, unsigned int *result)
+{
+  size_t i;
+
+  *result = 0;
+
+  for (i = 0; i < len; i++)
+    {
+      char c = string[i];
+      int number;
+
+      if (c >= 'A' && c <= 'F')
+        number = c - 'A' + 10;
+      else if (c >= 'a' && c <= 'f')
+        number = c - 'a' + 10;
+      else if (c >= '0' && c <= '9')
+        number = c - '0';
+      else
+        break;
+
+      *result <<= 4;
+      *result |= number;
+    }
+
+  return string + i;
+}
+
+/* Extract an unsigned octal number from STRING, considering at
+   most LEN bytes and place the result in *RESULT.  Returns a pointer
+   to the first character past the octal number.  */
+static const char *
+extract_oct (const char *string, size_t len, unsigned int *result)
+{
+  size_t i;
+
+  *result = 0;
+
+  for (i = 0; i < len; i++)
+    {
+      char c = string[i];
+      int number;
+
+      if (c >= '0' && c <= '7')
+        number = c - '0';
+      else
+        break;
+
+      *result <<= 3;
+      *result |= number;
+    }
+
+  return string + i;
+}
+
+/* Extract the various quotelike constructs except for <<EOF.  See the
+   section "Gory details of parsing quoted constructs" in perlop.pod.
+   Return the resulting token in *tp; tp->type == token_type_string.  */
+static void
+extract_quotelike (token_ty *tp, int delim)
+{
+  char *string = extract_quotelike_pass1_utf8 (delim);
+  size_t len = strlen (string);
+
+  tp->type = token_type_string;
+  /* Take the string without the delimiters at the start and at the end.  */
+  if (!(len >= 2))
+    abort ();
+  string[len - 1] = '\0';
+  tp->string = xstrdup (string + 1);
+  free (string);
+  tp->comment = add_reference (savable_comment);
+}
+
+/* Extract the quotelike constructs with double delimiters, like
+   s/[SEARCH]/[REPLACE]/.  This function does not eat up trailing
+   modifiers (left to the caller).
+   Return the resulting token in *tp; tp->type == token_type_regex_op.  */
+static void
+extract_triple_quotelike (message_list_ty *mlp, token_ty *tp, int delim,
+                          bool interpolate)
+{
+  char *string;
+
+  tp->type = token_type_regex_op;
+
+  string = extract_quotelike_pass1_utf8 (delim);
+  if (interpolate)
+    interpolate_keywords (mlp, string, line_number);
+  free (string);
+
+  if (delim == '(' || delim == '<' || delim == '{' || delim == '[')
+    {
+      /* The delimiter for the second string can be different, e.g.
+         s{SEARCH}{REPLACE} or s{SEARCH}/REPLACE/.  See "man perlrequick".  */
+      delim = phase1_getc ();
+      while (is_whitespace (delim))
+        {
+          /* The hash-sign is not a valid delimiter after whitespace, ergo
+             use phase2_getc() and not phase1_getc() now.  */
+          delim = phase2_getc ();
+        }
+    }
+  string = extract_quotelike_pass1_utf8 (delim);
+  if (interpolate)
+    interpolate_keywords (mlp, string, line_number);
+  free (string);
+}
+
+/* Perform pass 3 of quotelike extraction (interpolation).
+   *tp is a token of type token_type_string.
+   This function replaces tp->string.
+   This function does not access tp->comment.  */
+/* FIXME: Currently may writes null-bytes into the string.  */
+static void
+extract_quotelike_pass3 (token_ty *tp, int error_level)
+{
+  static char *buffer;
+  static int bufmax = 0;
+  int bufpos = 0;
+  const char *crs;
+  bool uppercase;
+  bool lowercase;
+  bool quotemeta;
+
+#if DEBUG_PERL
+  switch (tp->sub_type)
+    {
+    case string_type_verbatim:
+      fprintf (stderr, "Interpolating string_type_verbatim:\n");
+      break;
+    case string_type_q:
+      fprintf (stderr, "Interpolating string_type_q:\n");
+      break;
+    case string_type_qq:
+      fprintf (stderr, "Interpolating string_type_qq:\n");
+      break;
+    case string_type_qr:
+      fprintf (stderr, "Interpolating string_type_qr:\n");
+      break;
+    }
+  fprintf (stderr, "%s\n", tp->string);
+  if (tp->sub_type == string_type_verbatim)
+    fprintf (stderr, "---> %s\n", tp->string);
+#endif
+
+  if (tp->sub_type == string_type_verbatim)
+    return;
+
+  /* Loop over tp->string, accumulating the expansion in buffer.  */
+  crs = tp->string;
+  uppercase = false;
+  lowercase = false;
+  quotemeta = false;
+  while (*crs)
+    {
+      bool backslashed;
+
+      /* Ensure room for 7 bytes, 6 (multi-)bytes plus a leading backslash
+         if \Q modifier is present.  */
+      if (bufpos + 7 > bufmax)
+        {
+          bufmax = 2 * bufmax + 10;
+          buffer = xrealloc (buffer, bufmax);
+        }
+
+      if (tp->sub_type == string_type_q)
+        {
+          switch (*crs)
+            {
+            case '\\':
+              if (crs[1] == '\\')
+                {
+                  crs += 2;
+                  buffer[bufpos++] = '\\';
+                  break;
+                }
+              /* FALLTHROUGH */
+            default:
+              buffer[bufpos++] = *crs++;
+              break;
+            }
+          continue;
+        }
+
+      /* We only get here for double-quoted strings or regular expressions.
+         Unescape escape sequences.  */
+      if (*crs == '\\')
+        {
+          switch (crs[1])
+            {
+            case 't':
+              crs += 2;
+              buffer[bufpos++] = '\t';
+              continue;
+            case 'n':
+              crs += 2;
+              buffer[bufpos++] = '\n';
+              continue;
+            case 'r':
+              crs += 2;
+              buffer[bufpos++] = '\r';
+              continue;
+            case 'f':
+              crs += 2;
+              buffer[bufpos++] = '\f';
+              continue;
+            case 'b':
+              crs += 2;
+              buffer[bufpos++] = '\b';
+              continue;
+            case 'a':
+              crs += 2;
+              buffer[bufpos++] = '\a';
+              continue;
+            case 'e':
+              crs += 2;
+              buffer[bufpos++] = 0x1b;
+              continue;
+            case '0': case '1': case '2': case '3':
+            case '4': case '5': case '6': case '7':
+              {
+                unsigned int oct_number;
+                int length;
+
+                crs = extract_oct (crs + 1, 3, &oct_number);
+
+                /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
+                   true, the character should be converted to its uppercase
+                   resp. lowercase equivalent.  I don't know if the necessary
+                   facilities are already included in gettext.  For US-Ascii
+                   the conversion can be already be done, however.  */
+                if (uppercase && oct_number >= 'a' && oct_number <= 'z')
+                  {
+                    oct_number = oct_number - 'a' + 'A';
+                  }
+                else if (lowercase && oct_number >= 'A' && oct_number <= 'Z')
+                  {
+                    oct_number = oct_number - 'A' + 'a';
+                  }
+
+
+                /* Yes, octal escape sequences in the range 0x100..0x1ff are
+                   valid.  */
+                length = u8_uctomb ((unsigned char *) (buffer + bufpos),
+                                    oct_number, 2);
+                if (length > 0)
+                  bufpos += length;
+              }
+              continue;
+            case 'x':
+              {
+                unsigned int hex_number = 0;
+                int length;
+
+                crs += 2;
+                if (*crs == '{')
+                  {
+                    const char *end = strchr (crs, '}');
+                    if (end == NULL)
+                      {
+                        error_with_progname = false;
+                        error (error_level, 0, _("\
+%s:%d: missing right brace on \\x{HEXNUMBER}"), real_file_name, line_number);
+                        error_with_progname = true;
+                        ++crs;
+                        continue;
+                      }
+                    else
+                      {
+                        ++crs;
+                        (void) extract_hex (crs, end - crs, &hex_number);
+                        crs = end + 1;
+                      }
+                  }
+                else
+                  {
+                    crs = extract_hex (crs, 2, &hex_number);
+                  }
+
+                /* FIXME: If one of the variables UPPERCASE or LOWERCASE is
+                   true, the character should be converted to its uppercase
+                   resp. lowercase equivalent.  I don't know if the necessary
+                   facilities are already included in gettext.  For US-Ascii
+                   the conversion can be already be done, however.  */
+                if (uppercase && hex_number >= 'a' && hex_number <= 'z')
+                  {
+                    hex_number = hex_number - 'a' + 'A';
+                  }
+                else if (lowercase && hex_number >= 'A' && hex_number <= 'Z')
+                  {
+                    hex_number = hex_number - 'A' + 'a';
+                  }
+
+                length = u8_uctomb ((unsigned char *) (buffer + bufpos),
+                                    hex_number, 6);
+
+                if (length > 0)
+                  bufpos += length;
+              }
+              continue;
+            case 'c':
+              /* Perl's notion of control characters.  */
+              crs += 2;
+              if (*crs)
+                {
+                  int the_char = (unsigned char) *crs;
+                  if (the_char >= 'a' && the_char <= 'z')
+                    the_char = the_char - 'a' + 'A';
+                  buffer[bufpos++] = the_char ^ 0x40;
+                }
+              continue;
+            case 'N':
+              crs += 2;
+              if (*crs == '{')
+                {
+                  const char *end = strchr (crs + 1, '}');
+                  if (end != NULL)
+                    {
+                      char *name;
+                      unsigned int unicode;
+
+                      name = XNMALLOC (end - (crs + 1) + 1, char);
+                      memcpy (name, crs + 1, end - (crs + 1));
+                      name[end - (crs + 1)] = '\0';
+
+                      unicode = unicode_name_character (name);
+                      if (unicode != UNINAME_INVALID)
+                        {
+                          /* FIXME: Convert to upper/lowercase if the
+                             corresponding flag is set to true.  */
+                          int length =
+                            u8_uctomb ((unsigned char *) (buffer + bufpos),
+                                       unicode, 6);
+                          if (length > 0)
+                            bufpos += length;
+                        }
+
+                      free (name);
+
+                      crs = end + 1;
+                    }
+                }
+              continue;
+            }
+        }
+
+      /* No escape sequence, go on.  */
+      if (*crs == '\\')
+        {
+          ++crs;
+          switch (*crs)
+            {
+            case 'E':
+              uppercase = false;
+              lowercase = false;
+              quotemeta = false;
+              ++crs;
+              continue;
+            case 'L':
+              uppercase = false;
+              lowercase = true;
+              ++crs;
+              continue;
+            case 'U':
+              uppercase = true;
+              lowercase = false;
+              ++crs;
+              continue;
+            case 'Q':
+              quotemeta = true;
+              ++crs;
+              continue;
+            case 'l':
+              ++crs;
+              if (*crs >= 'A' && *crs <= 'Z')
+                {
+                  buffer[bufpos++] = *crs - 'A' + 'a';
+                }
+              else if ((unsigned char) *crs >= 0x80)
+                {
+                  error_with_progname = false;
+                  error (error_level, 0, _("\
+%s:%d: invalid interpolation (\"\\l\") of 8bit character \"%c\""),
+                         real_file_name, line_number, *crs);
+                  error_with_progname = true;
+                }
+              else
+                {
+                  buffer[bufpos++] = *crs;
+                }
+              ++crs;
+              continue;
+            case 'u':
+              ++crs;
+              if (*crs >= 'a' && *crs <= 'z')
+                {
+                  buffer[bufpos++] = *crs - 'a' + 'A';
+                }
+              else if ((unsigned char) *crs >= 0x80)
+                {
+                  error_with_progname = false;
+                  error (error_level, 0, _("\
+%s:%d: invalid interpolation (\"\\u\") of 8bit character \"%c\""),
+                         real_file_name, line_number, *crs);
+                  error_with_progname = true;
+                }
+              else
+                {
+                  buffer[bufpos++] = *crs;
+                }
+              ++crs;
+              continue;
+            case '\\':
+              buffer[bufpos++] = *crs;
+              ++crs;
+              continue;
+            default:
+              backslashed = true;
+              break;
+            }
+        }
+      else
+        backslashed = false;
+
+      if (quotemeta
+          && !((*crs >= 'A' && *crs <= 'Z') || (*crs >= 'A' && *crs <= 'z')
+               || (*crs >= '0' && *crs <= '9') || *crs == '_'))
+        {
+          buffer[bufpos++] = '\\';
+          backslashed = true;
+        }
+
+      if (!backslashed && !extract_all && (*crs == '$' || *crs == '@'))
+        {
+          error_with_progname = false;
+          error (error_level, 0, _("\
+%s:%d: invalid variable interpolation at \"%c\""),
+                 real_file_name, line_number, *crs);
+          error_with_progname = true;
+          ++crs;
+        }
+      else if (lowercase)
+        {
+          if (*crs >= 'A' && *crs <= 'Z')
+            buffer[bufpos++] = *crs - 'A' + 'a';
+          else if ((unsigned char) *crs >= 0x80)
+            {
+              error_with_progname = false;
+              error (error_level, 0, _("\
+%s:%d: invalid interpolation (\"\\L\") of 8bit character \"%c\""),
+                     real_file_name, line_number, *crs);
+              error_with_progname = true;
+              buffer[bufpos++] = *crs;
+            }
+          else
+            buffer[bufpos++] = *crs;
+          ++crs;
+        }
+      else if (uppercase)
+        {
+          if (*crs >= 'a' && *crs <= 'z')
+            buffer[bufpos++] = *crs - 'a' + 'A';
+          else if ((unsigned char) *crs >= 0x80)
+            {
+              error_with_progname = false;
+              error (error_level, 0, _("\
+%s:%d: invalid interpolation (\"\\U\") of 8bit character \"%c\""),
+                     real_file_name, line_number, *crs);
+              error_with_progname = true;
+              buffer[bufpos++] = *crs;
+            }
+          else
+            buffer[bufpos++] = *crs;
+          ++crs;
+        }
+      else
+        {
+          buffer[bufpos++] = *crs++;
+        }
+    }
+
+  /* Ensure room for 1 more byte.  */
+  if (bufpos >= bufmax)
+    {
+      bufmax = 2 * bufmax + 10;
+      buffer = xrealloc (buffer, bufmax);
+    }
+
+  buffer[bufpos++] = '\0';
+
+#if DEBUG_PERL
+  fprintf (stderr, "---> %s\n", buffer);
+#endif
+
+  /* Replace tp->string.  */
+  free (tp->string);
+  tp->string = xstrdup (buffer);
+}
+
+/* Parse a variable.  This is done in several steps:
+     1) Consume all leading occurencies of '$', '@', '%', and '*'.
+     2) Determine the name of the variable from the following input.
+     3) Parse possible following hash keys or array indexes.
+ */
+static void
+extract_variable (message_list_ty *mlp, token_ty *tp, int first)
+{
+  static char *buffer;
+  static int bufmax = 0;
+  int bufpos = 0;
+  int c = first;
+  size_t varbody_length = 0;
+  bool maybe_hash_deref = false;
+  bool maybe_hash_value = false;
+
+  tp->type = token_type_variable;
+
+#if DEBUG_PERL
+  fprintf (stderr, "%s:%d: extracting variable type '%c'\n",
+           real_file_name, line_number, first);
+#endif
+
+  /*
+   * 1) Consume dollars and so on (not euros ...).  Unconditionally
+   *    accepting the hash sign (#) will maybe lead to inaccurate
+   *    results.  FIXME!
+   */
+  while (c == '$' || c == '*' || c == '#' || c == '@' || c == '%')
+    {
+      if (bufpos >= bufmax)
+        {
+          bufmax = 2 * bufmax + 10;
+          buffer = xrealloc (buffer, bufmax);
+        }
+      buffer[bufpos++] = c;
+      c = phase1_getc ();
+    }
+
+  if (c == EOF)
+    {
+      tp->type = token_type_eof;
+      return;
+    }
+
+  /* Hash references are treated in a special way, when looking for
+     our keywords.  */
+  if (buffer[0] == '$')
+    {
+      if (bufpos == 1)
+        maybe_hash_value = true;
+      else if (bufpos == 2 && buffer[1] == '$')
+        {
+          if (!(c == '{'
+                || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+                || (c >= '0' && c <= '9')
+                || c == '_' || c == ':' || c == '\'' || c >= 0x80))
+            {
+              /* Special variable $$ for pid.  */
+              if (bufpos >= bufmax)
+                {
+                  bufmax = 2 * bufmax + 10;
+                  buffer = xrealloc (buffer, bufmax);
+                }
+              buffer[bufpos++] = '\0';
+              tp->string = xstrdup (buffer);
+#if DEBUG_PERL
+              fprintf (stderr, "%s:%d: is PID ($$)\n",
+                       real_file_name, line_number);
+#endif
+
+              phase1_ungetc (c);
+              return;
+            }
+
+          maybe_hash_deref = true;
+          bufpos = 1;
+        }
+    }
+
+  /*
+   * 2) Get the name of the variable.  The first character is practically
+   *    arbitrary.  Punctuation and numbers automagically put a variable
+   *    in the global namespace but that subtle difference is not interesting
+   *    for us.
+   */
+  if (bufpos >= bufmax)
+    {
+      bufmax = 2 * bufmax + 10;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  if (c == '{')
+    {
+      /* Yuck, we cannot accept ${gettext} as a keyword...  Except for
+       * debugging purposes it is also harmless, that we suppress the
+       * real name of the variable.
+       */
+#if DEBUG_PERL
+      fprintf (stderr, "%s:%d: braced {variable_name}\n",
+               real_file_name, line_number);
+#endif
+
+      if (extract_balanced (mlp, token_type_rbrace, true, false,
+                            null_context, null_context_list_iterator,
+                            1, arglist_parser_alloc (mlp, NULL)))
+        {
+          tp->type = token_type_eof;
+          return;
+        }
+      buffer[bufpos++] = c;
+    }
+  else
+    {
+      while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+             || (c >= '0' && c <= '9')
+             || c == '_' || c == ':' || c == '\'' || c >= 0x80)
+        {
+          ++varbody_length;
+          if (bufpos >= bufmax)
+            {
+              bufmax = 2 * bufmax + 10;
+              buffer = xrealloc (buffer, bufmax);
+            }
+          buffer[bufpos++] = c;
+          c = phase1_getc ();
+        }
+      phase1_ungetc (c);
+    }
+
+  /* Probably some strange Perl variable like $`.  */
+  if (varbody_length == 0)
+    {
+      c = phase1_getc ();
+      if (c == EOF || is_whitespace (c))
+        phase1_ungetc (c);  /* Loser.  */
+      else
+        {
+          if (bufpos >= bufmax)
+            {
+              bufmax = 2 * bufmax + 10;
+              buffer = xrealloc (buffer, bufmax);
+            }
+          buffer[bufpos++] = c;
+        }
+    }
+
+  if (bufpos >= bufmax)
+    {
+      bufmax = 2 * bufmax + 10;
+      buffer = xrealloc (buffer, bufmax);
+    }
+  buffer[bufpos++] = '\0';
+
+  tp->string = xstrdup (buffer);
+
+#if DEBUG_PERL
+  fprintf (stderr, "%s:%d: complete variable name: %s\n",
+           real_file_name, line_number, tp->string);
+#endif
+
+  /*
+   * 3) If the following looks strange to you, this is valid Perl syntax:
+   *
+   *      $var = $$hashref    # We can place a
+   *                          # comment here and then ...
+   *             {key_into_hashref};
+   *
+   *    POD sections are not allowed but we leave complaints about
+   *    that to the compiler/interpreter.
+   */
+  /* We only extract strings from the first hash key (if present).  */
+
+  if (maybe_hash_deref || maybe_hash_value)
+    {
+      bool is_dereference = false;
+      int c;
+
+      do
+        c = phase2_getc ();
+      while (is_whitespace (c));
+
+      if (c == '-')
+        {
+          int c2 = phase1_getc ();
+
+          if (c2 == '>')
+            {
+              is_dereference = true;
+
+              do
+                c = phase2_getc ();
+              while (is_whitespace (c));
+            }
+          else if (c2 != '\n')
+            {
+              /* Discarding the newline is harmless here.  The only
+                 special character recognized after a minus is greater-than
+                 for dereference.  However, the sequence "-\n>" that we
+                 treat incorrectly here, is a syntax error.  */
+              phase1_ungetc (c2);
+            }
+        }
+
+      if (maybe_hash_value && is_dereference)
+        {
+          tp->type = token_type_object;
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: first keys preceded by \"->\"\n",
+                   real_file_name, line_number);
+#endif
+        }
+      else if (maybe_hash_value)
+        {
+          /* Fake it into a hash.  */
+          tp->string[0] = '%';
+        }
+
+      /* Do NOT change that into else if (see above).  */
+      if ((maybe_hash_value || maybe_hash_deref) && c == '{')
+        {
+          void *keyword_value;
+
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: first keys preceded by '{'\n",
+                   real_file_name, line_number);
+#endif
+
+          if (hash_find_entry (&keywords, tp->string, strlen (tp->string),
+                               &keyword_value) == 0)
+            {
+              /* TODO: Shouldn't we use the shapes of the keyword, instead
+                 of hardwiring argnum1 = 1 ?
+              const struct callshapes *shapes =
+                (const struct callshapes *) keyword_value;
+              */
+              struct callshapes shapes;
+              shapes.keyword = tp->string; /* XXX storage duration? */
+              shapes.keyword_len = strlen (tp->string);
+              shapes.nshapes = 1;
+              shapes.shapes[0].argnum1 = 1;
+              shapes.shapes[0].argnum2 = 0;
+              shapes.shapes[0].argnumc = 0;
+              shapes.shapes[0].argnum1_glib_context = false;
+              shapes.shapes[0].argnum2_glib_context = false;
+              shapes.shapes[0].argtotal = 0;
+              string_list_init (&shapes.shapes[0].xcomments);
+
+              {
+                /* Extract a possible string from the key.  Before proceeding
+                   we check whether the open curly is followed by a symbol and
+                   then by a right curly.  */
+                flag_context_list_iterator_ty context_iter =
+                  flag_context_list_iterator (
+                    flag_context_list_table_lookup (
+                      flag_context_list_table,
+                      tp->string, strlen (tp->string)));
+                token_ty *t1 = x_perl_lex (mlp);
+
+#if DEBUG_PERL
+                fprintf (stderr, "%s:%d: extracting string key\n",
+                         real_file_name, line_number);
+#endif
+
+                if (t1->type == token_type_symbol
+                    || t1->type == token_type_named_op)
+                  {
+                    token_ty *t2 = x_perl_lex (mlp);
+                    if (t2->type == token_type_rbrace)
+                      {
+                        flag_context_ty context;
+                        lex_pos_ty pos;
+
+                        context =
+                          inherited_context (null_context,
+                                             flag_context_list_iterator_advance (
+                                               &context_iter));
+
+                        pos.line_number = line_number;
+                        pos.file_name = logical_file_name;
+
+                        xgettext_current_source_encoding = po_charset_utf8;
+                        remember_a_message (mlp, NULL, xstrdup (t1->string),
+                                            context, &pos, NULL, savable_comment);
+                        xgettext_current_source_encoding = xgettext_global_source_encoding;
+                        free_token (t2);
+                        free_token (t1);
+                      }
+                    else
+                      {
+                        x_perl_unlex (t2);
+                      }
+                  }
+                else
+                  {
+                    x_perl_unlex (t1);
+                    if (extract_balanced (mlp, token_type_rbrace, true, false,
+                                          null_context, context_iter,
+                                          1, arglist_parser_alloc (mlp, &shapes)))
+                      return;
+                  }
+              }
+            }
+          else
+            {
+              phase2_ungetc (c);
+            }
+        }
+      else
+        {
+          phase2_ungetc (c);
+        }
+    }
+
+  /* Now consume "->", "[...]", and "{...}".  */
+  for (;;)
+    {
+      int c = phase2_getc ();
+      int c2;
+
+      switch (c)
+        {
+        case '{':
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: extracting balanced '{' after varname\n",
+                   real_file_name, line_number);
+#endif
+          extract_balanced (mlp, token_type_rbrace, true, false,
+                            null_context, null_context_list_iterator,
+                            1, arglist_parser_alloc (mlp, NULL));
+          break;
+
+        case '[':
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: extracting balanced '[' after varname\n",
+                   real_file_name, line_number);
+#endif
+          extract_balanced (mlp, token_type_rbracket, true, false,
+                            null_context, null_context_list_iterator,
+                            1, arglist_parser_alloc (mlp, NULL));
+          break;
+
+        case '-':
+          c2 = phase1_getc ();
+          if (c2 == '>')
+            {
+#if DEBUG_PERL
+              fprintf (stderr, "%s:%d: another \"->\" after varname\n",
+                       real_file_name, line_number);
+#endif
+              break;
+            }
+          else if (c2 != '\n')
+            {
+              /* Discarding the newline is harmless here.  The only
+                 special character recognized after a minus is greater-than
+                 for dereference.  However, the sequence "-\n>" that we
+                 treat incorrectly here, is a syntax error.  */
+              phase1_ungetc (c2);
+            }
+          /* FALLTHROUGH */
+
+        default:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: variable finished\n",
+                   real_file_name, line_number);
+#endif
+          phase2_ungetc (c);
+          return;
+        }
+    }
+}
+
+/* Actually a simplified version of extract_variable().  It searches for
+   variables inside a double-quoted string that may interpolate to
+   some keyword hash (reference).  The string is UTF-8 encoded.  */
+static void
+interpolate_keywords (message_list_ty *mlp, const char *string, int lineno)
+{
+  static char *buffer;
+  static int bufmax = 0;
+  int bufpos = 0;
+  flag_context_ty context;
+  int c;
+  bool maybe_hash_deref = false;
+  enum parser_state
+    {
+      initial,
+      one_dollar,
+      two_dollars,
+      identifier,
+      minus,
+      wait_lbrace,
+      wait_quote,
+      dquote,
+      squote,
+      barekey,
+      wait_rbrace
+    } state;
+  token_ty token;
+
+  lex_pos_ty pos;
+
+  /* States are:
+   *
+   * initial:      initial
+   * one_dollar:   dollar sign seen in state INITIAL
+   * two_dollars:  another dollar-sign has been seen in state ONE_DOLLAR
+   * identifier:   a valid identifier character has been seen in state
+   *               ONE_DOLLAR or TWO_DOLLARS
+   * minus:        a minus-sign has been seen in state IDENTIFIER
+   * wait_lbrace:  a greater-than has been seen in state MINUS
+   * wait_quote:   a left brace has been seen in state IDENTIFIER or in
+   *               state WAIT_LBRACE
+   * dquote:       a double-quote has been seen in state WAIT_QUOTE
+   * squote:       a single-quote has been seen in state WAIT_QUOTE
+   * barekey:      an bareword character has been seen in state WAIT_QUOTE
+   * wait_rbrace:  closing quote has been seen in state DQUOTE or SQUOTE
+   *
+   * In the states initial...identifier the context is null_context; in the
+   * states minus...wait_rbrace the context is the one suitable for the first
+   * argument of the last seen identifier.
+   */
+  state = initial;
+  context = null_context;
+
+  token.type = token_type_string;
+  token.sub_type = string_type_qq;
+  token.line_number = line_number;
+  /* No need for  token.comment = add_reference (savable_comment);  here.
+     We can let token.comment uninitialized here, and use savable_comment
+     directly, because this function only parses the given string and does
+     not call phase2_getc.  */
+  pos.file_name = logical_file_name;
+  pos.line_number = lineno;
+
+  while ((c = (unsigned char) *string++) != '\0')
+    {
+      void *keyword_value;
+
+      if (state == initial)
+        bufpos = 0;
+
+      if (c == '\n')
+        lineno++;
+
+      if (bufpos + 1 >= bufmax)
+        {
+          bufmax = 2 * bufmax + 10;
+          buffer = xrealloc (buffer, bufmax);
+        }
+
+      switch (state)
+        {
+        case initial:
+          switch (c)
+            {
+            case '\\':
+              c = (unsigned char) *string++;
+              if (c == '\0')
+                return;
+              break;
+            case '$':
+              buffer[bufpos++] = '$';
+              maybe_hash_deref = false;
+              state = one_dollar;
+              break;
+            default:
+              break;
+            }
+          break;
+        case one_dollar:
+          switch (c)
+            {
+            case '$':
+              /*
+               * This is enough to make us believe later that we dereference
+               * a hash reference.
+               */
+              maybe_hash_deref = true;
+              state = two_dollars;
+              break;
+            default:
+              if (c == '_' || c == ':' || c == '\'' || c >= 0x80
+                  || (c >= 'A' && c <= 'Z')
+                  || (c >= 'a' && c <= 'z')
+                  || (c >= '0' && c <= '9'))
+                {
+                  buffer[bufpos++] = c;
+                  state = identifier;
+                }
+              else
+                state = initial;
+              break;
+            }
+          break;
+        case two_dollars:
+          if (c == '_' || c == ':' || c == '\'' || c >= 0x80
+              || (c >= 'A' && c <= 'Z')
+              || (c >= 'a' && c <= 'z')
+              || (c >= '0' && c <= '9'))
+            {
+              buffer[bufpos++] = c;
+              state = identifier;
+            }
+          else
+            state = initial;
+          break;
+        case identifier:
+          switch (c)
+            {
+            case '-':
+              if (hash_find_entry (&keywords, buffer, bufpos, &keyword_value)
+                  == 0)
+                {
+                  flag_context_list_iterator_ty context_iter =
+                    flag_context_list_iterator (
+                      flag_context_list_table_lookup (
+                        flag_context_list_table,
+                        buffer, bufpos));
+                  context =
+                    inherited_context (null_context,
+                                       flag_context_list_iterator_advance (
+                                         &context_iter));
+                  state = minus;
+                }
+              else
+                state = initial;
+              break;
+            case '{':
+              if (!maybe_hash_deref)
+                buffer[0] = '%';
+              if (hash_find_entry (&keywords, buffer, bufpos, &keyword_value)
+                  == 0)
+                {
+                  flag_context_list_iterator_ty context_iter =
+                    flag_context_list_iterator (
+                      flag_context_list_table_lookup (
+                        flag_context_list_table,
+                        buffer, bufpos));
+                  context =
+                    inherited_context (null_context,
+                                       flag_context_list_iterator_advance (
+                                         &context_iter));
+                  state = wait_quote;
+                }
+              else
+                state = initial;
+              break;
+            default:
+              if (c == '_' || c == ':' || c == '\'' || c >= 0x80
+                  || (c >= 'A' && c <= 'Z')
+                  || (c >= 'a' && c <= 'z')
+                  || (c >= '0' && c <= '9'))
+                {
+                  buffer[bufpos++] = c;
+                }
+              else
+                state = initial;
+              break;
+            }
+          break;
+        case minus:
+          switch (c)
+            {
+            case '>':
+              state = wait_lbrace;
+              break;
+            default:
+              context = null_context;
+              state = initial;
+              break;
+            }
+          break;
+        case wait_lbrace:
+          switch (c)
+            {
+            case '{':
+              state = wait_quote;
+              break;
+            default:
+              context = null_context;
+              state = initial;
+              break;
+            }
+          break;
+        case wait_quote:
+          switch (c)
+            {
+            case_whitespace:
+              break;
+            case '\'':
+              pos.line_number = lineno;
+              bufpos = 0;
+              state = squote;
+              break;
+            case '"':
+              pos.line_number = lineno;
+              bufpos = 0;
+              state = dquote;
+              break;
+            default:
+              if (c == '_' || (c >= '0' && c <= '9') || c >= 0x80
+                  || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
+                {
+                  pos.line_number = lineno;
+                  bufpos = 0;
+                  buffer[bufpos++] = c;
+                  state = barekey;
+                }
+              else
+                {
+                  context = null_context;
+                  state = initial;
+                }
+              break;
+            }
+          break;
+        case dquote:
+          switch (c)
+            {
+            case '"':
+              /* The resulting string has to be interpolated twice.  */
+              buffer[bufpos] = '\0';
+              token.string = xstrdup (buffer);
+              extract_quotelike_pass3 (&token, EXIT_FAILURE);
+              /* The string can only shrink with interpolation (because
+                 we ignore \Q).  */
+              if (!(strlen (token.string) <= bufpos))
+                abort ();
+              strcpy (buffer, token.string);
+              free (token.string);
+              state = wait_rbrace;
+              break;
+            case '\\':
+              if (string[0] == '\"')
+                {
+                  buffer[bufpos++] = string++[0];
+                }
+              else if (string[0])
+                {
+                  buffer[bufpos++] = '\\';
+                  buffer[bufpos++] = string++[0];
+                }
+              else
+                {
+                  context = null_context;
+                  state = initial;
+                }
+              break;
+            default:
+              buffer[bufpos++] = c;
+              break;
+            }
+          break;
+        case squote:
+          switch (c)
+            {
+            case '\'':
+              state = wait_rbrace;
+              break;
+            case '\\':
+              if (string[0] == '\'')
+                {
+                  buffer[bufpos++] = string++[0];
+                }
+              else if (string[0])
+                {
+                  buffer[bufpos++] = '\\';
+                  buffer[bufpos++] = string++[0];
+                }
+              else
+                {
+                  context = null_context;
+                  state = initial;
+                }
+              break;
+            default:
+              buffer[bufpos++] = c;
+              break;
+            }
+          break;
+        case barekey:
+          if (c == '_' || (c >= '0' && c <= '9') || c >= 0x80
+              || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
+            {
+              buffer[bufpos++] = c;
+              break;
+            }
+          else if (is_whitespace (c))
+            {
+              state = wait_rbrace;
+              break;
+            }
+          else if (c != '}')
+            {
+              context = null_context;
+              state = initial;
+              break;
+            }
+          /* Must be right brace.  */
+          /* FALLTHROUGH */
+        case wait_rbrace:
+          switch (c)
+            {
+            case_whitespace:
+              break;
+            case '}':
+              buffer[bufpos] = '\0';
+              token.string = xstrdup (buffer);
+              extract_quotelike_pass3 (&token, EXIT_FAILURE);
+              xgettext_current_source_encoding = po_charset_utf8;
+              remember_a_message (mlp, NULL, token.string, context, &pos,
+                                  NULL, savable_comment);
+              xgettext_current_source_encoding = xgettext_global_source_encoding;
+              /* FALLTHROUGH */
+            default:
+              context = null_context;
+              state = initial;
+              break;
+            }
+          break;
+        }
+    }
+}
+
+/* There is an ambiguity about '/' and '?': They can start an operator
+   (division operator '/' or '/=' or the conditional operator '?'), or they can
+   start a regular expression.  The distinction is important because inside
+   regular expressions, '#' loses its special meaning.  This function helps
+   making the decision (a heuristic).  See the documentation for details.  */
+static bool
+prefer_regexp_over_division (token_type_ty type)
+{
+  bool retval = true;
+
+  switch (type)
+    {
+      case token_type_eof:
+        retval = true;
+        break;
+      case token_type_lparen:
+        retval = true;
+        break;
+      case token_type_rparen:
+        retval = false;
+        break;
+      case token_type_comma:
+        retval = true;
+        break;
+      case token_type_fat_comma:
+        retval = true;
+        break;
+      case token_type_dereference:
+        retval = true;
+        break;
+      case token_type_semicolon:
+        retval = true;
+        break;
+      case token_type_lbrace:
+        retval = true;
+        break;
+      case token_type_rbrace:
+        retval = false;
+        break;
+      case token_type_lbracket:
+        retval = true;
+        break;
+      case token_type_rbracket:
+        retval = false;
+        break;
+      case token_type_string:
+        retval = false;
+        break;
+      case token_type_number:
+        retval = false;
+        break;
+      case token_type_named_op:
+        retval = true;
+        break;
+      case token_type_variable:
+        retval = false;
+        break;
+      case token_type_object:
+        retval = false;
+        break;
+      case token_type_symbol:
+      case token_type_keyword_symbol:
+        retval = true;
+        break;
+      case token_type_regex_op:
+        retval = false;
+        break;
+      case token_type_dot:
+        retval = true;
+        break;
+      case token_type_other:
+        retval = true;
+        break;
+  }
+
+#if DEBUG_PERL
+  token_ty ty;
+  ty.type = type;
+  fprintf (stderr, "Prefer regexp over division after %s: %s\n",
+           token2string (&ty), retval ? "true" : "false");
+#endif
+
+  return retval;
+}
+
+/* Last token type seen in the stream.  Important for the interpretation
+   of slash and question mark.  */
+static token_type_ty last_token_type;
+
+/* Combine characters into tokens.  Discard whitespace.  */
+
+static void
+x_perl_prelex (message_list_ty *mlp, token_ty *tp)
+{
+  static char *buffer;
+  static int bufmax;
+  int bufpos;
+  int c;
+
+  for (;;)
+    {
+      c = phase2_getc ();
+      tp->line_number = line_number;
+      tp->last_type = last_token_type;
+
+      switch (c)
+        {
+        case EOF:
+          tp->type = token_type_eof;
+          return;
+
+        case '\n':
+          if (last_non_comment_line > last_comment_line)
+            savable_comment_reset ();
+          /* FALLTHROUGH */
+        case '\t':
+        case ' ':
+          /* Ignore whitespace.  */
+          continue;
+
+        case '%':
+        case '@':
+        case '*':
+        case '$':
+          if (!extract_all)
+            {
+              extract_variable (mlp, tp, c);
+              return;
+            }
+          break;
+        }
+
+      last_non_comment_line = tp->line_number;
+
+      switch (c)
+        {
+        case '.':
+          {
+            int c2 = phase1_getc ();
+            phase1_ungetc (c2);
+            if (c2 == '.')
+              {
+                tp->type = token_type_other;
+                return;
+              }
+            else if (!(c2 >= '0' && c2 <= '9'))
+              {
+                tp->type = token_type_dot;
+                return;
+              }
+          }
+          /* FALLTHROUGH */
+        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+        case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+        case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+        case 'Y': case 'Z':
+        case '_':
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+        case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+        case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+        case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+        case 'y': case 'z':
+        case '0': case '1': case '2': case '3': case '4':
+        case '5': case '6': case '7': case '8': case '9':
+          /* Symbol, or part of a number.  */
+          bufpos = 0;
+          for (;;)
+            {
+              if (bufpos >= bufmax)
+                {
+                  bufmax = 2 * bufmax + 10;
+                  buffer = xrealloc (buffer, bufmax);
+                }
+              buffer[bufpos++] = c;
+              c = phase1_getc ();
+              switch (c)
+                {
+                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+                case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+                case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+                case 'Y': case 'Z':
+                case '_':
+                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+                case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+                case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+                case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+                case 'y': case 'z':
+                case '0': case '1': case '2': case '3': case '4':
+                case '5': case '6': case '7': case '8': case '9':
+                  continue;
+
+                default:
+                  phase1_ungetc (c);
+                  break;
+                }
+              break;
+            }
+          if (bufpos >= bufmax)
+            {
+              bufmax = 2 * bufmax + 10;
+              buffer = xrealloc (buffer, bufmax);
+            }
+          buffer[bufpos] = '\0';
+
+          if (strcmp (buffer, "__END__") == 0
+              || strcmp (buffer, "__DATA__") == 0)
+            {
+              end_of_file = true;
+              tp->type = token_type_eof;
+              return;
+            }
+          else if (strcmp (buffer, "and") == 0
+                   || strcmp (buffer, "cmp") == 0
+                   || strcmp (buffer, "eq") == 0
+                   || strcmp (buffer, "if") == 0
+                   || strcmp (buffer, "ge") == 0
+                   || strcmp (buffer, "gt") == 0
+                   || strcmp (buffer, "le") == 0
+                   || strcmp (buffer, "lt") == 0
+                   || strcmp (buffer, "ne") == 0
+                   || strcmp (buffer, "not") == 0
+                   || strcmp (buffer, "or") == 0
+                   || strcmp (buffer, "unless") == 0
+                   || strcmp (buffer, "while") == 0
+                   || strcmp (buffer, "xor") == 0)
+            {
+              tp->type = token_type_named_op;
+              tp->string = xstrdup (buffer);
+              return;
+            }
+          else if (strcmp (buffer, "s") == 0
+                 || strcmp (buffer, "y") == 0
+                 || strcmp (buffer, "tr") == 0)
+            {
+              int delim = phase1_getc ();
+
+              while (is_whitespace (delim))
+                delim = phase2_getc ();
+
+              if (delim == EOF)
+                {
+                  tp->type = token_type_eof;
+                  return;
+                }
+              if ((delim >= '0' && delim <= '9')
+                  || (delim >= 'A' && delim <= 'Z')
+                  || (delim >= 'a' && delim <= 'z'))
+                {
+                  /* False positive.  */
+                  phase2_ungetc (delim);
+                  tp->type = token_type_symbol;
+                  tp->sub_type = symbol_type_none;
+                  tp->string = xstrdup (buffer);
+                  return;
+                }
+              extract_triple_quotelike (mlp, tp, delim,
+                                        buffer[0] == 's' && delim != '\'');
+
+              /* Eat the following modifiers.  */
+              do
+                c = phase1_getc ();
+              while (c >= 'a' && c <= 'z');
+              phase1_ungetc (c);
+              return;
+            }
+          else if (strcmp (buffer, "m") == 0)
+            {
+              int delim = phase1_getc ();
+
+              while (is_whitespace (delim))
+                delim = phase2_getc ();
+
+              if (delim == EOF)
+                {
+                  tp->type = token_type_eof;
+                  return;
+                }
+              if ((delim >= '0' && delim <= '9')
+                  || (delim >= 'A' && delim <= 'Z')
+                  || (delim >= 'a' && delim <= 'z'))
+                {
+                  /* False positive.  */
+                  phase2_ungetc (delim);
+                  tp->type = token_type_symbol;
+                  tp->sub_type = symbol_type_none;
+                  tp->string = xstrdup (buffer);
+                  return;
+                }
+              extract_quotelike (tp, delim);
+              if (delim != '\'')
+                interpolate_keywords (mlp, tp->string, line_number);
+              free (tp->string);
+              drop_reference (tp->comment);
+              tp->type = token_type_regex_op;
+
+              /* Eat the following modifiers.  */
+              do
+                c = phase1_getc ();
+              while (c >= 'a' && c <= 'z');
+              phase1_ungetc (c);
+              return;
+            }
+          else if (strcmp (buffer, "qq") == 0
+                   || strcmp (buffer, "q") == 0
+                   || strcmp (buffer, "qx") == 0
+                   || strcmp (buffer, "qw") == 0
+                   || strcmp (buffer, "qr") == 0)
+            {
+              /* The qw (...) construct is not really a string but we
+                 can treat in the same manner and then pretend it is
+                 a symbol.  Rationale: Saying "qw (foo bar)" is the
+                 same as "my @list = ('foo', 'bar'); @list;".  */
+
+              int delim = phase1_getc ();
+
+              while (is_whitespace (delim))
+                delim = phase2_getc ();
+
+              if (delim == EOF)
+                {
+                  tp->type = token_type_eof;
+                  return;
+                }
+
+              if ((delim >= '0' && delim <= '9')
+                  || (delim >= 'A' && delim <= 'Z')
+                  || (delim >= 'a' && delim <= 'z'))
+                {
+                  /* False positive.  */
+                  phase2_ungetc (delim);
+                  tp->type = token_type_symbol;
+                  tp->sub_type = symbol_type_none;
+                  tp->string = xstrdup (buffer);
+                  return;
+                }
+
+              extract_quotelike (tp, delim);
+
+              switch (buffer[1])
+                {
+                case 'q':
+                case 'x':
+                  tp->type = token_type_string;
+                  tp->sub_type = string_type_qq;
+                  interpolate_keywords (mlp, tp->string, line_number);
+                  break;
+                case 'r':
+                  drop_reference (tp->comment);
+                  tp->type = token_type_regex_op;
+                  break;
+                case 'w':
+                  drop_reference (tp->comment);
+                  tp->type = token_type_symbol;
+                  tp->sub_type = symbol_type_none;
+                  break;
+                case '\0':
+                  tp->type = token_type_string;
+                  tp->sub_type = string_type_q;
+                  break;
+                default:
+                  abort ();
+                }
+              return;
+            }
+          else if ((buffer[0] >= '0' && buffer[0] <= '9') || buffer[0] == '.')
+            {
+              tp->type = token_type_number;
+              return;
+            }
+          tp->type = token_type_symbol;
+          tp->sub_type = (strcmp (buffer, "sub") == 0
+                          ? symbol_type_sub
+                          : symbol_type_none);
+          tp->string = xstrdup (buffer);
+          return;
+
+        case '"':
+          extract_quotelike (tp, c);
+          tp->sub_type = string_type_qq;
+          interpolate_keywords (mlp, tp->string, line_number);
+          return;
+
+        case '`':
+          extract_quotelike (tp, c);
+          tp->sub_type = string_type_qq;
+          interpolate_keywords (mlp, tp->string, line_number);
+          return;
+
+        case '\'':
+          extract_quotelike (tp, c);
+          tp->sub_type = string_type_q;
+          return;
+
+        case '(':
+          c = phase2_getc ();
+          if (c == ')')
+            /* Ignore empty list.  */
+            continue;
+          else
+            phase2_ungetc (c);
+          tp->type = token_type_lparen;
+          return;
+
+        case ')':
+          tp->type = token_type_rparen;
+          return;
+
+        case '{':
+          tp->type = token_type_lbrace;
+          return;
+
+        case '}':
+          tp->type = token_type_rbrace;
+          return;
+
+        case '[':
+          tp->type = token_type_lbracket;
+          return;
+
+        case ']':
+          tp->type = token_type_rbracket;
+          return;
+
+        case ';':
+          tp->type = token_type_semicolon;
+          return;
+
+        case ',':
+          tp->type = token_type_comma;
+          return;
+
+        case '=':
+          /* Check for fat comma.  */
+          c = phase1_getc ();
+          if (c == '>')
+            {
+              tp->type = token_type_fat_comma;
+              return;
+            }
+          else if (linepos == 2
+                   && (last_token_type == token_type_semicolon
+                       || last_token_type == token_type_rbrace)
+                   && ((c >= 'A' && c <='Z')
+                       || (c >= 'a' && c <= 'z')))
+            {
+#if DEBUG_PERL
+              fprintf (stderr, "%s:%d: start pod section\n",
+                       real_file_name, line_number);
+#endif
+              skip_pod ();
+#if DEBUG_PERL
+              fprintf (stderr, "%s:%d: end pod section\n",
+                       real_file_name, line_number);
+#endif
+              continue;
+            }
+          phase1_ungetc (c);
+          tp->type = token_type_other;
+          return;
+
+        case '<':
+          /* Check for <<EOF and friends.  */
+          c = phase1_getc ();
+          if (c == '<')
+            {
+              c = phase1_getc ();
+              if (c == '\'')
+                {
+                  char *string;
+                  extract_quotelike (tp, c);
+                  string = get_here_document (tp->string);
+                  free (tp->string);
+                  tp->string = string;
+                  tp->type = token_type_string;
+                  tp->sub_type = string_type_verbatim;
+                  tp->line_number = line_number + 1;
+                  return;
+                }
+              else if (c == '"')
+                {
+                  char *string;
+                  extract_quotelike (tp, c);
+                  string = get_here_document (tp->string);
+                  free (tp->string);
+                  tp->string = string;
+                  tp->type = token_type_string;
+                  tp->sub_type = string_type_qq;
+                  tp->line_number = line_number + 1;
+                  interpolate_keywords (mlp, tp->string, tp->line_number);
+                  return;
+                }
+              else if ((c >= 'A' && c <= 'Z')
+                       || (c >= 'a' && c <= 'z')
+                       || c == '_')
+                {
+                  bufpos = 0;
+                  while ((c >= 'A' && c <= 'Z')
+                         || (c >= 'a' && c <= 'z')
+                         || (c >= '0' && c <= '9')
+                         || c == '_' || c >= 0x80)
+                    {
+                      if (bufpos >= bufmax)
+                        {
+                          bufmax = 2 * bufmax + 10;
+                          buffer = xrealloc (buffer, bufmax);
+                        }
+                      buffer[bufpos++] = c;
+                      c = phase1_getc ();
+                    }
+                  if (c == EOF)
+                    {
+                      tp->type = token_type_eof;
+                      return;
+                    }
+                  else
+                    {
+                      char *string;
+                      phase1_ungetc (c);
+                      if (bufpos >= bufmax)
+                        {
+                          bufmax = 2 * bufmax + 10;
+                          buffer = xrealloc (buffer, bufmax);
+                        }
+                      buffer[bufpos++] = '\0';
+                      string = get_here_document (buffer);
+                      tp->string = string;
+                      tp->type = token_type_string;
+                      tp->sub_type = string_type_qq;
+                      tp->comment = add_reference (savable_comment);
+                      tp->line_number = line_number + 1;
+                      interpolate_keywords (mlp, tp->string, tp->line_number);
+                      return;
+                    }
+                }
+              else
+                {
+                  tp->type = token_type_other;
+                  return;
+                }
+            }
+          else
+            {
+              phase1_ungetc (c);
+              tp->type = token_type_other;
+            }
+          return;  /* End of case '>'.  */
+
+        case '-':
+          /* Check for dereferencing operator.  */
+          c = phase1_getc ();
+          if (c == '>')
+            {
+              tp->type = token_type_dereference;
+              return;
+            }
+          else if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
+            {
+              /* One of the -X (filetest) functions.  We play safe
+                 and accept all alphabetical characters here.  */
+              tp->type = token_type_other;
+              return;
+            }
+          phase1_ungetc (c);
+          tp->type = token_type_other;
+          return;
+
+        case '/':
+        case '?':
+          if (prefer_regexp_over_division (tp->last_type))
+            {
+              extract_quotelike (tp, c);
+              interpolate_keywords (mlp, tp->string, line_number);
+              free (tp->string);
+              drop_reference (tp->comment);
+              tp->type = token_type_regex_op;
+              /* Eat the following modifiers.  */
+              do
+                c = phase1_getc ();
+              while (c >= 'a' && c <= 'z');
+              phase1_ungetc (c);
+              return;
+            }
+          /* Recognize operator '//'.  */
+          if (c == '/')
+            {
+              c = phase1_getc ();
+              if (c != '/')
+                phase1_ungetc (c);
+            }
+          /* FALLTHROUGH */
+
+        default:
+          /* We could carefully recognize each of the 2 and 3 character
+             operators, but it is not necessary, except for the '//' operator,
+             as we only need to recognize gettext invocations.  Don't
+             bother.  */
+          tp->type = token_type_other;
+          return;
+        }
+    }
+}
+
+
+/* A token stack used as a lookahead buffer.  */
+
+typedef struct token_stack_ty token_stack_ty;
+struct token_stack_ty
+{
+  token_ty **items;
+  size_t nitems;
+  size_t nitems_max;
+};
+
+static struct token_stack_ty token_stack;
+
+#if DEBUG_PERL
+/* Dumps all resources allocated by stack STACK.  */
+static int
+token_stack_dump (token_stack_ty *stack)
+{
+  size_t i;
+
+  fprintf (stderr, "BEGIN STACK DUMP\n");
+  for (i = 0; i < stack->nitems; i++)
+    {
+      token_ty *token = stack->items[i];
+      fprintf (stderr, "  [%s]\n", token2string (token));
+      switch (token->type)
+        {
+        case token_type_named_op:
+        case token_type_string:
+        case token_type_symbol:
+        case token_type_variable:
+          fprintf (stderr, "    string: %s\n", token->string);
+          break;
+        case token_type_object:
+          fprintf (stderr, "    string: %s->\n", token->string);
+        default:
+          break;
+        }
+    }
+  fprintf (stderr, "END STACK DUMP\n");
+  return 0;
+}
+#endif
+
+/* Pushes the token TOKEN onto the stack STACK.  */
+static inline void
+token_stack_push (token_stack_ty *stack, token_ty *token)
+{
+  if (stack->nitems >= stack->nitems_max)
+    {
+      size_t nbytes;
+
+      stack->nitems_max = 2 * stack->nitems_max + 4;
+      nbytes = stack->nitems_max * sizeof (token_ty *);
+      stack->items = xrealloc (stack->items, nbytes);
+    }
+  stack->items[stack->nitems++] = token;
+}
+
+/* Pops the most recently pushed token from the stack STACK and returns it.
+   Returns NULL if the stack is empty.  */
+static inline token_ty *
+token_stack_pop (token_stack_ty *stack)
+{
+  if (stack->nitems > 0)
+    return stack->items[--(stack->nitems)];
+  else
+    return NULL;
+}
+
+/* Return the top of the stack without removing it from the stack, or
+   NULL if the stack is empty.  */
+static inline token_ty *
+token_stack_peek (const token_stack_ty *stack)
+{
+  if (stack->nitems > 0)
+    return stack->items[stack->nitems - 1];
+  else
+    return NULL;
+}
+
+/* Frees all resources allocated by stack STACK.  */
+static inline void
+token_stack_free (token_stack_ty *stack)
+{
+  size_t i;
+
+  for (i = 0; i < stack->nitems; i++)
+    free_token (stack->items[i]);
+  free (stack->items);
+}
+
+
+static token_ty *
+x_perl_lex (message_list_ty *mlp)
+{
+#if DEBUG_PERL
+  int dummy = token_stack_dump (&token_stack);
+#endif
+  token_ty *tp = token_stack_pop (&token_stack);
+
+  if (!tp)
+    {
+      tp = XMALLOC (token_ty);
+      x_perl_prelex (mlp, tp);
+      tp->last_type = last_token_type;
+      last_token_type = tp->type;
+
+#if DEBUG_PERL
+      fprintf (stderr, "%s:%d: x_perl_prelex returned %s\n",
+               real_file_name, line_number, token2string (tp));
+#endif
+
+      /* The interpretation of a slash or question mark after a function call
+         depends on the prototype of that function.  If the function expects
+         at least one argument, a regular expression is preferred, otherwise
+         an operator.  With our limited means, we can only guess here.  If
+         the function is a builtin that takes no arguments, we prefer an
+         operator by silently turning the last symbol into a variable instead
+         of a symbol.
+
+         Method calls without parentheses are not ambiguous.  After them, an
+         operator must follow.  Due to some ideosyncrasies in this parser
+         they are treated in two different manners.  If the call is
+         chained ($foo->bar->baz) the token left of the symbol is a
+         dereference operator.  If it is not chained ($foo->bar) the
+         dereference operator is consumed with the extracted variable.  The
+         latter case is handled below.  */
+      if (tp->type == token_type_symbol)
+        {
+          if (tp->last_type == token_type_dereference)
+            {
+              /* Class method call or chained method call (with at least
+                 two arrow operators).  */
+              last_token_type = token_type_variable;
+            }
+          else if (tp->last_type == token_type_object)
+            {
+              /* Instance method, not chained.  */
+              last_token_type = token_type_variable;
+            }
+          else if (strcmp (tp->string, "wantarray") == 0
+                   || strcmp (tp->string, "fork") == 0
+                   || strcmp (tp->string, "getlogin") == 0
+                   || strcmp (tp->string, "getppid") == 0
+                   || strcmp (tp->string, "getpwent") == 0
+                   || strcmp (tp->string, "getgrent") == 0
+                   || strcmp (tp->string, "gethostent") == 0
+                   || strcmp (tp->string, "getnetent") == 0
+                   || strcmp (tp->string, "getprotoent") == 0
+                   || strcmp (tp->string, "getservent") == 0
+                   || strcmp (tp->string, "setpwent") == 0
+                   || strcmp (tp->string, "setgrent") == 0
+                   || strcmp (tp->string, "endpwent") == 0
+                   || strcmp (tp->string, "endgrent") == 0
+                   || strcmp (tp->string, "endhostent") == 0
+                   || strcmp (tp->string, "endnetent") == 0
+                   || strcmp (tp->string, "endprotoent") == 0
+                   || strcmp (tp->string, "endservent") == 0
+                   || strcmp (tp->string, "time") == 0
+                   || strcmp (tp->string, "times") == 0
+                   || strcmp (tp->string, "wait") == 0
+                   || strcmp (tp->string, "wantarray") == 0)
+            {
+              /* A Perl built-in function that does not accept arguments.  */
+              last_token_type = token_type_variable;
+            }
+        }
+    }
+#if DEBUG_PERL
+  else
+    {
+      fprintf (stderr, "%s:%d: %s recycled from stack\n",
+               real_file_name, line_number, token2string (tp));
+    }
+#endif
+
+  /* A symbol followed by a fat comma is really a single-quoted string.
+     Function definitions or forward declarations also need a special
+     handling because the dollars and at signs inside the parentheses
+     must not be interpreted as the beginning of a variable ')'.  */
+  if (tp->type == token_type_symbol || tp->type == token_type_named_op)
+    {
+      token_ty *next = token_stack_peek (&token_stack);
+
+      if (!next)
+        {
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: pre-fetching next token\n",
+                   real_file_name, line_number);
+#endif
+          next = x_perl_lex (mlp);
+          x_perl_unlex (next);
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: unshifted next token\n",
+                   real_file_name, line_number);
+#endif
+        }
+
+#if DEBUG_PERL
+      fprintf (stderr, "%s:%d: next token is %s\n",
+               real_file_name, line_number, token2string (next));
+#endif
+
+      if (next->type == token_type_fat_comma)
+        {
+          tp->type = token_type_string;
+          tp->sub_type = string_type_q;
+          tp->comment = add_reference (savable_comment);
+#if DEBUG_PERL
+          fprintf (stderr,
+                   "%s:%d: token %s mutated to token_type_string\n",
+                   real_file_name, line_number, token2string (tp));
+#endif
+        }
+      else if (tp->type == token_type_symbol && tp->sub_type == symbol_type_sub
+               && next->type == token_type_symbol)
+        {
+          /* Start of a function declaration or definition.  Mark this
+             symbol as a function name, so that we can later eat up
+             possible prototype information.  */
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: subroutine declaration/definition '%s'\n",
+                   real_file_name, line_number, next->string);
+#endif
+          next->sub_type = symbol_type_function;
+        }
+      else if (tp->type == token_type_symbol
+               && (tp->sub_type == symbol_type_sub
+                   || tp->sub_type == symbol_type_function)
+               && next->type == token_type_lparen)
+        {
+          /* For simplicity we simply consume everything up to the
+             closing parenthesis.  Actually only a limited set of
+             characters is allowed inside parentheses but we leave
+             complaints to the interpreter and are prepared for
+             future extensions to the Perl syntax.  */
+          int c;
+
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: consuming prototype information\n",
+                   real_file_name, line_number);
+#endif
+
+          do
+            {
+              c = phase1_getc ();
+#if DEBUG_PERL
+              fprintf (stderr, "  consuming character '%c'\n", c);
+#endif
+            }
+          while (c != EOF && c != ')');
+          phase1_ungetc (c);
+        }
+    }
+
+  return tp;
+}
+
+static void
+x_perl_unlex (token_ty *tp)
+{
+  token_stack_push (&token_stack, tp);
+}
+
+
+/* ========================= Extracting strings.  ========================== */
+
+/* Assuming TP is a string token, this function accumulates all subsequent
+   . string2 . string3 ... to the string.  (String concatenation.)  */
+
+static char *
+collect_message (message_list_ty *mlp, token_ty *tp, int error_level)
+{
+  char *string;
+  size_t len;
+
+  extract_quotelike_pass3 (tp, error_level);
+  string = xstrdup (tp->string);
+  len = strlen (tp->string) + 1;
+
+  for (;;)
+    {
+      int c;
+
+      do
+        c = phase2_getc ();
+      while (is_whitespace (c));
+
+      if (c != '.')
+        {
+          phase2_ungetc (c);
+          return string;
+        }
+
+      do
+        c = phase2_getc ();
+      while (is_whitespace (c));
+
+      phase2_ungetc (c);
+
+      if (c == '"' || c == '\'' || c == '`'
+          || ((c == '/' || c == '?')
+              && prefer_regexp_over_division (tp->last_type))
+          || c == 'q')
+        {
+          token_ty *qstring = x_perl_lex (mlp);
+          if (qstring->type != token_type_string)
+            {
+              /* assert (qstring->type == token_type_symbol) */
+              x_perl_unlex (qstring);
+              return string;
+            }
+
+          extract_quotelike_pass3 (qstring, error_level);
+          len += strlen (qstring->string);
+          string = xrealloc (string, len);
+          strcat (string, qstring->string);
+          free_token (qstring);
+        }
+    }
+}
+
+/* The file is broken into tokens.  Scan the token stream, looking for
+   a keyword, followed by a left paren, followed by a string.  When we
+   see this sequence, we have something to remember.  We assume we are
+   looking at a valid Perl program, and leave the complaints about
+   the grammar to the compiler.
+
+     Normal handling: Look for
+       keyword ( ... msgid ... )
+     Plural handling: Look for
+       keyword ( ... msgid ... msgid_plural ... )
+
+   We use recursion because the arguments before msgid or between msgid
+   and msgid_plural can contain subexpressions of the same form.
+
+   In Perl, parentheses around function arguments can be omitted.
+
+   The general rules are:
+     1) Functions declared with a prototype take exactly the specified number
+        of arguments.
+          sub one_arg ($) { ... }
+          sub two_args ($$) { ... }
+     2) When a function name is immediately followed by an opening parenthesis,
+        the argument list ends at the corresponding closing parenthesis.
+
+   If rule 1 and rule 2 are contradictory, i.e. when the program calls a
+   function with an explicit argument list and the wrong number of arguments,
+   the program is invalid:
+     sub two_args ($$) { ... }
+     foo two_args (x), y             - invalid due to rules 1 and 2
+
+   Ambiguities are resolved as follows:
+     3) Some built-ins, such as 'abs', 'sqrt', 'sin', 'cos', ..., and functions
+        declared with a prototype of exactly one argument take exactly one
+        argument:
+          foo sin x, y  ==>  foo (sin (x), y)
+          sub one_arg ($) { ... }
+          foo one_arg x, y, z  ==>  foo (one_arg (x), y, z)
+     4) Other identifiers, if not immediately followed by an opening
+        parenthesis, consume the entire remaining argument list:
+          foo bar x, y  ==>  foo (bar (x, y))
+          sub two_args ($$) { ... }
+          foo two_args x, y  ==>  foo (two_args (x, y))
+
+   Other series of comma separated expressions without a function name at
+   the beginning are comma expressions:
+          sub two_args ($$) { ... }
+          foo two_args x, (y, z)  ==>  foo (two_args (x, (y, z)))
+   Note that the evaluation of comma expressions returns a list of values
+   when in list context (e.g. inside the argument list of a function without
+   prototype) but only one value when inside the argument list of a function
+   with a prototype:
+          sub print3 ($$$) { print @_ }
+          print3 5, (6, 7), 8  ==>  578
+          print 5, (6, 7), 8  ==>  5678
+
+   Where rule 3 or 4 contradict rule 1 or 2, the program is invalid:
+     sin (x, y)                      - invalid due to rules 2 and 3
+     sub one_arg ($) { ... }
+     one_arg (x, y)                  - invalid due to rules 2 and 3
+     sub two_args ($$) { ... }
+     foo two_args x, y, z            - invalid due to rules 1 and 4
+ */
+
+/* Extract messages until the next balanced closing parenthesis.
+   Extracted messages are added to MLP.
+
+   DELIM can be either token_type_rbrace, token_type_rbracket,
+   token_type_rparen.  Additionally, if COMMA_DELIM is true, parsing
+   stops at the next comma outside parentheses.
+
+   ARG is the current argument list position, starts with 1.
+   ARGPARSER is the corresponding argument list parser.
+
+   Returns true for EOF, false otherwise.  */
+
+static bool
+extract_balanced (message_list_ty *mlp,
+                  token_type_ty delim, bool eat_delim, bool comma_delim,
+                  flag_context_ty outer_context,
+                  flag_context_list_iterator_ty context_iter,
+                  int arg, struct arglist_parser *argparser)
+{
+  /* Whether to implicitly assume the next tokens are arguments even without
+     a '('.  */
+  bool next_is_argument = false;
+  /* Parameters of the keyword just seen.  Defined only when next_is_argument
+     is true.  */
+  const struct callshapes *next_shapes = NULL;
+  struct arglist_parser *next_argparser = NULL;
+
+  /* Whether to not consider strings until the next comma.  */
+  bool skip_until_comma = false;
+
+  /* Context iterator that will be used if the next token is a '('.  */
+  flag_context_list_iterator_ty next_context_iter =
+    passthrough_context_list_iterator;
+  /* Current context.  */
+  flag_context_ty inner_context =
+    inherited_context (outer_context,
+                       flag_context_list_iterator_advance (&context_iter));
+
+#if DEBUG_PERL
+  static int nesting_level = 0;
+
+  ++nesting_level;
+#endif
+
+  for (;;)
+    {
+      /* The current token.  */
+      token_ty *tp;
+
+      tp = x_perl_lex (mlp);
+
+      if (delim == tp->type)
+        {
+          xgettext_current_source_encoding = po_charset_utf8;
+          arglist_parser_done (argparser, arg);
+          xgettext_current_source_encoding = xgettext_global_source_encoding;
+          if (next_argparser != NULL)
+            free (next_argparser);
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: extract_balanced finished (%d)\n",
+                   logical_file_name, tp->line_number, --nesting_level);
+#endif
+          if (eat_delim)
+            free_token (tp);
+          else
+            /* Preserve the delimiter for the caller.  */
+            x_perl_unlex (tp);
+          return false;
+        }
+
+      if (comma_delim && tp->type == token_type_comma)
+        {
+          xgettext_current_source_encoding = po_charset_utf8;
+          arglist_parser_done (argparser, arg);
+          xgettext_current_source_encoding = xgettext_global_source_encoding;
+          if (next_argparser != NULL)
+            free (next_argparser);
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: extract_balanced finished at comma (%d)\n",
+                   logical_file_name, tp->line_number, --nesting_level);
+#endif
+          x_perl_unlex (tp);
+          return false;
+        }
+
+      if (next_is_argument && tp->type != token_type_lparen)
+        {
+          /* An argument list starts, even though there is no '('.  */
+          bool next_comma_delim;
+
+          x_perl_unlex (tp);
+
+          if (next_shapes != NULL)
+            /* We know something about the function being called.  Assume
+               that it consumes only one argument if no argument number or
+               total > 1 is specified.  */
+            {
+              size_t i;
+
+              next_comma_delim = true;
+              for (i = 0; i < next_shapes->nshapes; i++)
+                {
+                  const struct callshape *shape = &next_shapes->shapes[i];
+
+                  if (shape->argnum1 > 1
+                      || shape->argnum2 > 1
+                      || shape->argnumc > 1
+                      || shape->argtotal > 1)
+                    next_comma_delim = false;
+                }
+            }
+          else
+            /* We know nothing about the function being called.  It could be
+               a function prototyped to take only one argument, or on the other
+               hand it could be prototyped to take more than one argument or an
+               arbitrary argument list or it could be unprototyped.  Due to
+               the way the parser works, assuming the first case gives the
+               best results.  */
+            next_comma_delim = true;
+
+          if (extract_balanced (mlp, delim, false, next_comma_delim,
+                                inner_context, next_context_iter,
+                                1, next_argparser))
+            {
+              xgettext_current_source_encoding = po_charset_utf8;
+              arglist_parser_done (argparser, arg);
+              xgettext_current_source_encoding = xgettext_global_source_encoding;
+              return true;
+            }
+
+          next_is_argument = false;
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          continue;
+        }
+
+      switch (tp->type)
+        {
+        case token_type_symbol:
+        case token_type_keyword_symbol:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type symbol (%d) \"%s\"\n",
+                   logical_file_name, tp->line_number, nesting_level,
+                   tp->string);
+#endif
+
+          {
+            void *keyword_value;
+
+            if (hash_find_entry (&keywords, tp->string, strlen (tp->string),
+                                 &keyword_value) == 0)
+              {
+                const struct callshapes *shapes =
+                  (const struct callshapes *) keyword_value;
+
+                next_shapes = shapes;
+                next_argparser = arglist_parser_alloc (mlp, shapes);
+              }
+            else
+              {
+                next_shapes = NULL;
+                next_argparser = arglist_parser_alloc (mlp, NULL);
+              }
+          }
+          next_is_argument = true;
+          next_context_iter =
+            flag_context_list_iterator (
+              flag_context_list_table_lookup (
+                flag_context_list_table,
+                tp->string, strlen (tp->string)));
+          break;
+
+        case token_type_variable:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type variable (%d) \"%s\"\n",
+                   logical_file_name, tp->line_number, nesting_level,
+                   tp->string);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_object:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type object (%d) \"%s->\"\n",
+                   logical_file_name, tp->line_number, nesting_level,
+                   tp->string);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_lparen:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type left parenthesis (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          if (next_is_argument)
+            {
+              /* Parse the argument list of a function call.  */
+              if (extract_balanced (mlp, token_type_rparen, true, false,
+                                    inner_context, next_context_iter,
+                                    1, next_argparser))
+                {
+                  xgettext_current_source_encoding = po_charset_utf8;
+                  arglist_parser_done (argparser, arg);
+                  xgettext_current_source_encoding = xgettext_global_source_encoding;
+                  return true;
+                }
+              next_is_argument = false;
+              next_argparser = NULL;
+            }
+          else
+            {
+              /* Parse a parenthesized expression or comma expression.  */
+              if (extract_balanced (mlp, token_type_rparen, true, false,
+                                    inner_context, next_context_iter,
+                                    arg, arglist_parser_clone (argparser)))
+                {
+                  xgettext_current_source_encoding = po_charset_utf8;
+                  arglist_parser_done (argparser, arg);
+                  xgettext_current_source_encoding = xgettext_global_source_encoding;
+                  if (next_argparser != NULL)
+                    free (next_argparser);
+                  free_token (tp);
+                  return true;
+                }
+              next_is_argument = false;
+              if (next_argparser != NULL)
+                free (next_argparser);
+              next_argparser = NULL;
+            }
+          skip_until_comma = true;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_rparen:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type right parenthesis (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          skip_until_comma = true;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_comma:
+        case token_type_fat_comma:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type comma (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          if (arglist_parser_decidedp (argparser, arg))
+            {
+              /* We have missed the argument.  */
+              xgettext_current_source_encoding = po_charset_utf8;
+              arglist_parser_done (argparser, arg);
+              xgettext_current_source_encoding = xgettext_global_source_encoding;
+              argparser = arglist_parser_alloc (mlp, NULL);
+              arg = 0;
+            }
+          arg++;
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: arg: %d\n",
+                   real_file_name, tp->line_number, arg);
+#endif
+          inner_context =
+            inherited_context (outer_context,
+                               flag_context_list_iterator_advance (
+                                 &context_iter));
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          skip_until_comma = false;
+          next_context_iter = passthrough_context_list_iterator;
+          break;
+
+        case token_type_string:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type string (%d): \"%s\"\n",
+                   logical_file_name, tp->line_number, nesting_level,
+                   tp->string);
+#endif
+
+          if (extract_all)
+            {
+              char *string = collect_message (mlp, tp, EXIT_SUCCESS);
+              lex_pos_ty pos;
+
+              pos.file_name = logical_file_name;
+              pos.line_number = tp->line_number;
+              xgettext_current_source_encoding = po_charset_utf8;
+              remember_a_message (mlp, NULL, string, inner_context, &pos,
+                                  NULL, tp->comment);
+              xgettext_current_source_encoding = xgettext_global_source_encoding;
+            }
+          else if (!skip_until_comma)
+            {
+              /* Need to collect the complete string, with error checking,
+                 only if the argument ARG is used in ARGPARSER.  */
+              bool must_collect = false;
+              {
+                size_t nalternatives = argparser->nalternatives;
+                size_t i;
+
+                for (i = 0; i < nalternatives; i++)
+                  {
+                    struct partial_call *cp = &argparser->alternative[i];
+
+                    if (arg == cp->argnumc
+                        || arg == cp->argnum1 || arg == cp->argnum2)
+                      must_collect = true;
+                  }
+              }
+
+              if (must_collect)
+                {
+                  char *string = collect_message (mlp, tp, EXIT_FAILURE);
+
+                  xgettext_current_source_encoding = po_charset_utf8;
+                  arglist_parser_remember (argparser, arg,
+                                           string, inner_context,
+                                           logical_file_name, tp->line_number,
+                                           tp->comment);
+                  xgettext_current_source_encoding = xgettext_global_source_encoding;
+                }
+            }
+
+          if (arglist_parser_decidedp (argparser, arg))
+            {
+              xgettext_current_source_encoding = po_charset_utf8;
+              arglist_parser_done (argparser, arg);
+              xgettext_current_source_encoding = xgettext_global_source_encoding;
+              argparser = arglist_parser_alloc (mlp, NULL);
+            }
+
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_number:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type number (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_eof:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type EOF (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          xgettext_current_source_encoding = po_charset_utf8;
+          arglist_parser_done (argparser, arg);
+          xgettext_current_source_encoding = xgettext_global_source_encoding;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          free_token (tp);
+          return true;
+
+        case token_type_lbrace:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type lbrace (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          if (extract_balanced (mlp, token_type_rbrace, true, false,
+                                null_context, null_context_list_iterator,
+                                1, arglist_parser_alloc (mlp, NULL)))
+            {
+              xgettext_current_source_encoding = po_charset_utf8;
+              arglist_parser_done (argparser, arg);
+              xgettext_current_source_encoding = xgettext_global_source_encoding;
+              if (next_argparser != NULL)
+                free (next_argparser);
+              free_token (tp);
+              return true;
+            }
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_rbrace:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type rbrace (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_lbracket:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type lbracket (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          if (extract_balanced (mlp, token_type_rbracket, true, false,
+                                null_context, null_context_list_iterator,
+                                1, arglist_parser_alloc (mlp, NULL)))
+            {
+              xgettext_current_source_encoding = po_charset_utf8;
+              arglist_parser_done (argparser, arg);
+              xgettext_current_source_encoding = xgettext_global_source_encoding;
+              if (next_argparser != NULL)
+                free (next_argparser);
+              free_token (tp);
+              return true;
+            }
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_rbracket:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type rbracket (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_semicolon:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type semicolon (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+
+          /* The ultimate sign.  */
+          xgettext_current_source_encoding = po_charset_utf8;
+          arglist_parser_done (argparser, arg);
+          xgettext_current_source_encoding = xgettext_global_source_encoding;
+          argparser = arglist_parser_alloc (mlp, NULL);
+
+          /* FIXME: Instead of resetting outer_context here, it may be better
+             to recurse in the next_is_argument handling above, waiting for
+             the next semicolon or other statement terminator.  */
+          outer_context = null_context;
+          context_iter = null_context_list_iterator;
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = passthrough_context_list_iterator;
+          inner_context =
+            inherited_context (outer_context,
+                               flag_context_list_iterator_advance (
+                                 &context_iter));
+          break;
+
+        case token_type_dereference:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type dereference (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_dot:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type dot (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_named_op:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type named operator (%d): %s\n",
+                   logical_file_name, tp->line_number, nesting_level,
+                   tp->string);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_regex_op:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type regex operator (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        case token_type_other:
+#if DEBUG_PERL
+          fprintf (stderr, "%s:%d: type other (%d)\n",
+                   logical_file_name, tp->line_number, nesting_level);
+#endif
+          next_is_argument = false;
+          if (next_argparser != NULL)
+            free (next_argparser);
+          next_argparser = NULL;
+          next_context_iter = null_context_list_iterator;
+          break;
+
+        default:
+          fprintf (stderr, "%s:%d: unknown token type %d\n",
+                   real_file_name, tp->line_number, tp->type);
+          abort ();
+        }
+
+      free_token (tp);
+    }
+}
+
+void
+extract_perl (FILE *f, const char *real_filename, const char *logical_filename,
+              flag_context_list_table_ty *flag_table,
+              msgdomain_list_ty *mdlp)
+{
+  message_list_ty *mlp = mdlp->item[0]->messages;
+
+  fp = f;
+  real_file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  line_number = 0;
+
+  last_comment_line = -1;
+  last_non_comment_line = -1;
+
+  flag_context_list_table = flag_table;
+
+  init_keywords ();
+
+  token_stack.items = NULL;
+  token_stack.nitems = 0;
+  token_stack.nitems_max = 0;
+  linesize = 0;
+  linepos = 0;
+  eaten_here = 0;
+  end_of_file = false;
+
+  /* Safe assumption.  */
+  last_token_type = token_type_semicolon;
+
+  /* Eat tokens until eof is seen.  When extract_balanced returns
+     due to an unbalanced closing brace, just restart it.  */
+  while (!extract_balanced (mlp, token_type_rbrace, true, false,
+                            null_context, null_context_list_iterator,
+                            1, arglist_parser_alloc (mlp, NULL)))
+    ;
+
+  fp = NULL;
+  real_file_name = NULL;
+  free (logical_file_name);
+  logical_file_name = NULL;
+  line_number = 0;
+  last_token_type = token_type_semicolon;
+  token_stack_free (&token_stack);
+  eaten_here = 0;
+  end_of_file = true;
+}