summaryrefslogtreecommitdiff
path: root/gettext-tools/src/x-elisp.c
diff options
context:
space:
mode:
Diffstat (limited to 'gettext-tools/src/x-elisp.c')
-rw-r--r--gettext-tools/src/x-elisp.c1255
1 files changed, 1255 insertions, 0 deletions
diff --git a/gettext-tools/src/x-elisp.c b/gettext-tools/src/x-elisp.c
new file mode 100644
index 0000000..50c3d55
--- /dev/null
+++ b/gettext-tools/src/x-elisp.c
@@ -0,0 +1,1255 @@
+/* xgettext Emacs Lisp backend.
+ Copyright (C) 2001-2003, 2005-2009 Free Software Foundation, Inc.
+
+ This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/* Specification. */
+#include "x-elisp.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "message.h"
+#include "xgettext.h"
+#include "error.h"
+#include "xalloc.h"
+#include "hash.h"
+#include "c-ctype.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+
+/* Summary of Emacs Lisp syntax:
+ - ';' starts a comment until end of line.
+ - '#@nn' starts a comment of nn bytes.
+ - Integers are constituted of an optional prefix (#b, #B for binary,
+ #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix),
+ an optional sign (+ or -), the digits, and an optional trailing dot.
+ - Characters are written as '?' followed by the character, possibly
+ with an escape sequence, for examples '?a', '?\n', '?\177'.
+ - Strings are delimited by double quotes. Backslash introduces an escape
+ sequence. The following are understood: '\n', '\r', '\f', '\t', '\a',
+ '\\', '\^C', '\012' (octal), '\x12' (hexadecimal).
+ - Symbols: can contain meta-characters if preceded by backslash.
+ - Uninterned symbols: written as #:SYMBOL.
+ - () delimit lists.
+ - [] delimit vectors.
+ The reader is implemented in emacs-21.1/src/lread.c. */
+
+
+/* ====================== Keyword set customization. ====================== */
+
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+x_elisp_extract_all ()
+{
+ extract_all = true;
+}
+
+
+void
+x_elisp_keyword (const char *name)
+{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ struct callshape shape;
+ const char *colon;
+
+ if (keywords.table == NULL)
+ hash_init (&keywords, 100);
+
+ split_keywordspec (name, &end, &shape);
+
+ /* The characters between name and end should form a valid Lisp
+ symbol. */
+ colon = strchr (name, ':');
+ if (colon == NULL || colon >= end)
+ insert_keyword_callshape (&keywords, name, end - name, &shape);
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ /* When adding new keywords here, also update the documentation in
+ xgettext.texi! */
+ x_elisp_keyword ("_");
+ default_keywords = false;
+ }
+}
+
+void
+init_flag_table_elisp ()
+{
+ xgettext_record_flag ("_:1:pass-elisp-format");
+ xgettext_record_flag ("format:1:elisp-format");
+}
+
+
+/* ======================== Reading of characters. ======================== */
+
+/* Real filename, used in error messages about the input file. */
+static const char *real_file_name;
+
+/* Logical filename and line number, used to label the extracted messages. */
+static char *logical_file_name;
+static int line_number;
+
+/* The input file stream. */
+static FILE *fp;
+
+
+/* Fetch the next character from the input file. */
+static int
+do_getc ()
+{
+ int c = getc (fp);
+
+ if (c == EOF)
+ {
+ if (ferror (fp))
+ error (EXIT_FAILURE, errno, _("\
+error while reading \"%s\""), real_file_name);
+ }
+ else if (c == '\n')
+ line_number++;
+
+ return c;
+}
+
+/* Put back the last fetched character, not EOF. */
+static void
+do_ungetc (int c)
+{
+ if (c == '\n')
+ line_number--;
+ ungetc (c, fp);
+}
+
+
+/* ========================== Reading of tokens. ========================== */
+
+
+/* A token consists of a sequence of characters. */
+struct token
+{
+ int allocated; /* number of allocated 'token_char's */
+ int charcount; /* number of used 'token_char's */
+ char *chars; /* the token's constituents */
+};
+
+/* Initialize a 'struct token'. */
+static inline void
+init_token (struct token *tp)
+{
+ tp->allocated = 10;
+ tp->chars = XNMALLOC (tp->allocated, char);
+ tp->charcount = 0;
+}
+
+/* Free the memory pointed to by a 'struct token'. */
+static inline void
+free_token (struct token *tp)
+{
+ free (tp->chars);
+}
+
+/* Ensure there is enough room in the token for one more character. */
+static inline void
+grow_token (struct token *tp)
+{
+ if (tp->charcount == tp->allocated)
+ {
+ tp->allocated *= 2;
+ tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
+ }
+}
+
+/* Test whether a token has integer syntax. */
+static inline bool
+is_integer (const char *p)
+{
+ /* NB: Yes, '+.' and '-.' both designate the integer 0. */
+ const char *p_start = p;
+
+ if (*p == '+' || *p == '-')
+ p++;
+ if (*p == '\0')
+ return false;
+ while (*p >= '0' && *p <= '9')
+ p++;
+ if (p > p_start && *p == '.')
+ p++;
+ return (*p == '\0');
+}
+
+/* Test whether a token has float syntax. */
+static inline bool
+is_float (const char *p)
+{
+ enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 };
+ int state;
+
+ state = 0;
+ if (*p == '+' || *p == '-')
+ p++;
+ if (*p >= '0' && *p <= '9')
+ {
+ state |= LEAD_INT;
+ do
+ p++;
+ while (*p >= '0' && *p <= '9');
+ }
+ if (*p == '.')
+ {
+ state |= DOT_CHAR;
+ p++;
+ }
+ if (*p >= '0' && *p <= '9')
+ {
+ state |= TRAIL_INT;
+ do
+ p++;
+ while (*p >= '0' && *p <= '9');
+ }
+ if (*p == 'e' || *p == 'E')
+ {
+ state |= E_CHAR;
+ p++;
+ if (*p == '+' || *p == '-')
+ p++;
+ if (*p >= '0' && *p <= '9')
+ {
+ state |= EXP_INT;
+ do
+ p++;
+ while (*p >= '0' && *p <= '9');
+ }
+ else if (p[-1] == '+'
+ && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F')
+ || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N')))
+ {
+ state |= EXP_INT;
+ p += 3;
+ }
+ }
+ return (*p == '\0')
+ && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT)
+ || state == (DOT_CHAR | TRAIL_INT)
+ || state == (LEAD_INT | E_CHAR | EXP_INT)
+ || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)
+ || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT));
+}
+
+/* Read the next token. 'first' is the first character, which has already
+ been read. Returns true for a symbol, false for a number. */
+static bool
+read_token (struct token *tp, int first)
+{
+ int c;
+ bool quoted = false;
+
+ init_token (tp);
+
+ c = first;
+
+ for (;; c = do_getc ())
+ {
+ if (c == EOF)
+ break;
+ if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
+ break;
+ if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')'
+ || c == '[' || c == ']' || c == '#')
+ break;
+ if (c == '\\')
+ {
+ quoted = true;
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid, but be tolerant. */
+ break;
+ }
+ grow_token (tp);
+ tp->chars[tp->charcount++] = c;
+ }
+ if (c != EOF)
+ do_ungetc (c);
+
+ if (quoted)
+ return true; /* symbol */
+
+ /* Add a NUL byte at the end, for is_integer and is_float. */
+ grow_token (tp);
+ tp->chars[tp->charcount] = '\0';
+
+ if (is_integer (tp->chars) || is_float (tp->chars))
+ return false; /* number */
+ else
+ return true; /* symbol */
+}
+
+
+/* ========================= Accumulating comments ========================= */
+
+
+static char *buffer;
+static size_t bufmax;
+static size_t buflen;
+
+static inline void
+comment_start ()
+{
+ buflen = 0;
+}
+
+static inline void
+comment_add (int c)
+{
+ if (buflen >= bufmax)
+ {
+ bufmax = 2 * bufmax + 10;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen++] = c;
+}
+
+static inline void
+comment_line_end (size_t chars_to_remove)
+{
+ buflen -= chars_to_remove;
+ while (buflen >= 1
+ && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
+ --buflen;
+ if (chars_to_remove == 0 && buflen >= bufmax)
+ {
+ bufmax = 2 * bufmax + 10;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[buflen] = '\0';
+ savable_comment_add (buffer);
+}
+
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+
+/* ========================= Accumulating messages ========================= */
+
+
+static message_list_ty *mlp;
+
+
+/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */
+
+
+/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings.
+ Other objects need not to be represented precisely. */
+enum object_type
+{
+ t_symbol, /* symbol */
+ t_string, /* string */
+ t_other, /* other kind of real object */
+ t_dot, /* '.' pseudo object */
+ t_listclose, /* ')' pseudo object */
+ t_vectorclose,/* ']' pseudo object */
+ t_eof /* EOF marker */
+};
+
+struct object
+{
+ enum object_type type;
+ struct token *token; /* for t_symbol and t_string */
+ int line_number_at_start; /* for t_string */
+};
+
+/* Free the memory pointed to by a 'struct object'. */
+static inline void
+free_object (struct object *op)
+{
+ if (op->type == t_symbol || op->type == t_string)
+ {
+ free_token (op->token);
+ free (op->token);
+ }
+}
+
+/* Convert a t_symbol/t_string token to a char*. */
+static char *
+string_of_object (const struct object *op)
+{
+ char *str;
+ int n;
+
+ if (!(op->type == t_symbol || op->type == t_string))
+ abort ();
+ n = op->token->charcount;
+ str = XNMALLOC (n + 1, char);
+ memcpy (str, op->token->chars, n);
+ str[n] = '\0';
+ return str;
+}
+
+/* Context lookup table. */
+static flag_context_list_table_ty *flag_context_list_table;
+
+/* Returns the character represented by an escape sequence. */
+#define IGNORABLE_ESCAPE (EOF - 1)
+static int
+do_getc_escaped (int c, bool in_string)
+{
+ switch (c)
+ {
+ case 'a':
+ return '\a';
+ case 'b':
+ return '\b';
+ case 'd':
+ return 0x7F;
+ case 'e':
+ return 0x1B;
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+
+ case '\n':
+ return IGNORABLE_ESCAPE;
+
+ case ' ':
+ return (in_string ? IGNORABLE_ESCAPE : ' ');
+
+ case 'M': /* meta */
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c != '-')
+ /* Invalid input. But be tolerant. */
+ return c;
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ c = do_getc_escaped (c, false);
+ }
+ return c | 0x80;
+
+ case 'S': /* shift */
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c != '-')
+ /* Invalid input. But be tolerant. */
+ return c;
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ c = do_getc_escaped (c, false);
+ }
+ return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
+
+ case 'H': /* hyper */
+ case 'A': /* alt */
+ case 's': /* super */
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c != '-')
+ /* Invalid input. But be tolerant. */
+ return c;
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ c = do_getc_escaped (c, false);
+ }
+ return c;
+
+ case 'C': /* ctrl */
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c != '-')
+ /* Invalid input. But be tolerant. */
+ return c;
+ /*FALLTHROUGH*/
+ case '^':
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ return EOF;
+ c = do_getc_escaped (c, false);
+ }
+ if (c == '?')
+ return 0x7F;
+ if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A)
+ return c & 0x9F;
+ if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F)
+ return c & 0x9F;
+#if 0 /* We cannot handle NUL bytes in strings. */
+ if (c == ' ')
+ return 0x00;
+#endif
+ return c;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ /* An octal escape, as in ANSI C. */
+ {
+ int n = c - '0';
+
+ c = do_getc ();
+ if (c != EOF)
+ {
+ if (c >= '0' && c <= '7')
+ {
+ n = (n << 3) + (c - '0');
+ c = do_getc ();
+ if (c != EOF)
+ {
+ if (c >= '0' && c <= '7')
+ n = (n << 3) + (c - '0');
+ else
+ do_ungetc (c);
+ }
+ }
+ else
+ do_ungetc (c);
+ }
+ return (unsigned char) n;
+ }
+
+ case 'x':
+ /* A hexadecimal escape, as in ANSI C. */
+ {
+ int n = 0;
+
+ for (;;)
+ {
+ c = do_getc ();
+ if (c == EOF)
+ break;
+ else if (c >= '0' && c <= '9')
+ n = (n << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'F')
+ n = (n << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'f')
+ n = (n << 4) + (c - 'a' + 10);
+ else
+ {
+ do_ungetc (c);
+ break;
+ }
+ }
+ return (unsigned char) n;
+ }
+
+ default:
+ /* Ignore Emacs multibyte character stuff. All the strings we are
+ interested in are ASCII strings. */
+ return c;
+ }
+}
+
+/* Read the next object.
+ 'first_in_list' and 'new_backquote_flag' are used for reading old
+ backquote syntax and new backquote syntax. */
+static void
+read_object (struct object *op, bool first_in_list, bool new_backquote_flag,
+ flag_context_ty outer_context)
+{
+ for (;;)
+ {
+ int c;
+
+ c = do_getc ();
+
+ switch (c)
+ {
+ case EOF:
+ op->type = t_eof;
+ return;
+
+ case '\n':
+ /* Comments assumed to be grouped with a message must immediately
+ precede it, with no non-whitespace token on a line between
+ both. */
+ if (last_non_comment_line > last_comment_line)
+ savable_comment_reset ();
+ continue;
+
+ case '(':
+ {
+ int arg = 0; /* Current argument number. */
+ flag_context_list_iterator_ty context_iter;
+ const struct callshapes *shapes = NULL;
+ struct arglist_parser *argparser = NULL;
+
+ for (;; arg++)
+ {
+ struct object inner;
+ flag_context_ty inner_context;
+
+ if (arg == 0)
+ inner_context = null_context;
+ else
+ inner_context =
+ inherited_context (outer_context,
+ flag_context_list_iterator_advance (
+ &context_iter));
+
+ read_object (&inner, arg == 0, new_backquote_flag,
+ inner_context);
+
+ /* Recognize end of list. */
+ if (inner.type == t_listclose)
+ {
+ op->type = t_other;
+ /* Don't bother converting "()" to "NIL". */
+ last_non_comment_line = line_number;
+ if (argparser != NULL)
+ arglist_parser_done (argparser, arg);
+ return;
+ }
+
+ /* Dots are not allowed in every position. ']' is not allowed.
+ But be tolerant. */
+
+ /* EOF inside list is illegal. But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ if (arg == 0)
+ {
+ /* This is the function position. */
+ if (inner.type == t_symbol)
+ {
+ char *symbol_name = string_of_object (&inner);
+ void *keyword_value;
+
+ if (hash_find_entry (&keywords,
+ symbol_name, strlen (symbol_name),
+ &keyword_value)
+ == 0)
+ shapes = (const struct callshapes *) keyword_value;
+
+ argparser = arglist_parser_alloc (mlp, shapes);
+
+ context_iter =
+ flag_context_list_iterator (
+ flag_context_list_table_lookup (
+ flag_context_list_table,
+ symbol_name, strlen (symbol_name)));
+
+ free (symbol_name);
+ }
+ else
+ context_iter = null_context_list_iterator;
+ }
+ else
+ {
+ /* These are the argument positions. */
+ if (argparser != NULL && inner.type == t_string)
+ arglist_parser_remember (argparser, arg,
+ string_of_object (&inner),
+ inner_context,
+ logical_file_name,
+ inner.line_number_at_start,
+ savable_comment);
+ }
+
+ free_object (&inner);
+ }
+
+ if (argparser != NULL)
+ arglist_parser_done (argparser, arg);
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case ')':
+ /* Tell the caller about the end of list.
+ Unmatched closing parenthesis is illegal. But be tolerant. */
+ op->type = t_listclose;
+ last_non_comment_line = line_number;
+ return;
+
+ case '[':
+ {
+ for (;;)
+ {
+ struct object inner;
+
+ read_object (&inner, false, new_backquote_flag, null_context);
+
+ /* Recognize end of vector. */
+ if (inner.type == t_vectorclose)
+ {
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* Dots and ')' are not allowed. But be tolerant. */
+
+ /* EOF inside vector is illegal. But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ free_object (&inner);
+ }
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case ']':
+ /* Tell the caller about the end of vector.
+ Unmatched closing bracket is illegal. But be tolerant. */
+ op->type = t_vectorclose;
+ last_non_comment_line = line_number;
+ return;
+
+ case '\'':
+ {
+ struct object inner;
+
+ read_object (&inner, false, new_backquote_flag, null_context);
+
+ /* Dots and EOF are not allowed here. But be tolerant. */
+
+ free_object (&inner);
+
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '`':
+ if (first_in_list)
+ goto default_label;
+ {
+ struct object inner;
+
+ read_object (&inner, false, true, null_context);
+
+ /* Dots and EOF are not allowed here. But be tolerant. */
+
+ free_object (&inner);
+
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case ',':
+ if (!new_backquote_flag)
+ goto default_label;
+ {
+ int c = do_getc ();
+ /* The ,@ handling inside lists is wrong anyway, because
+ ,@form expands to an unknown number of elements. */
+ if (c != EOF && c != '@' && c != '.')
+ do_ungetc (c);
+ }
+ {
+ struct object inner;
+
+ read_object (&inner, false, false, null_context);
+
+ /* Dots and EOF are not allowed here. But be tolerant. */
+
+ free_object (&inner);
+
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case ';':
+ {
+ bool all_semicolons = true;
+
+ last_comment_line = line_number;
+ comment_start ();
+ for (;;)
+ {
+ int c = do_getc ();
+ if (c == EOF || c == '\n')
+ break;
+ if (c != ';')
+ all_semicolons = false;
+ if (!all_semicolons)
+ {
+ /* We skip all leading white space, but not EOLs. */
+ if (!(buflen == 0 && (c == ' ' || c == '\t')))
+ comment_add (c);
+ }
+ }
+ comment_line_end (0);
+ continue;
+ }
+
+ case '"':
+ {
+ op->token = XMALLOC (struct token);
+ init_token (op->token);
+ op->line_number_at_start = line_number;
+ for (;;)
+ {
+ int c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ if (c == '"')
+ break;
+ if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ c = do_getc_escaped (c, true);
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ break;
+ if (c == IGNORABLE_ESCAPE)
+ /* Ignore escaped newline and escaped space. */
+ ;
+ else
+ {
+ grow_token (op->token);
+ op->token->chars[op->token->charcount++] = c;
+ }
+ }
+ else
+ {
+ grow_token (op->token);
+ op->token->chars[op->token->charcount++] = c;
+ }
+ }
+ op->type = t_string;
+
+ if (extract_all)
+ {
+ lex_pos_ty pos;
+
+ pos.file_name = logical_file_name;
+ pos.line_number = op->line_number_at_start;
+ remember_a_message (mlp, NULL, string_of_object (op),
+ null_context, &pos, NULL, savable_comment);
+ }
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '?':
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ else if (c == '\\')
+ {
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ else
+ {
+ c = do_getc_escaped (c, false);
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ ;
+ }
+ }
+ /* Impossible to deal with Emacs multibyte character stuff here. */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case '#':
+ /* Dispatch macro handling. */
+ c = do_getc ();
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ {
+ op->type = t_other;
+ return;
+ }
+
+ switch (c)
+ {
+ case '^':
+ c = do_getc ();
+ if (c == '^')
+ c = do_getc ();
+ if (c == '[')
+ {
+ /* Read a char table, same syntax as a vector. */
+ for (;;)
+ {
+ struct object inner;
+
+ read_object (&inner, false, new_backquote_flag,
+ null_context);
+
+ /* Recognize end of vector. */
+ if (inner.type == t_vectorclose)
+ {
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /* Dots and ')' are not allowed. But be tolerant. */
+
+ /* EOF inside vector is illegal. But be tolerant. */
+ if (inner.type == t_eof)
+ break;
+
+ free_object (&inner);
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ else
+ /* Invalid input. Be tolerant, no error message. */
+ {
+ op->type = t_other;
+ if (c != EOF)
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '&':
+ /* Read a bit vector. */
+ {
+ struct object length;
+ read_object (&length, first_in_list, new_backquote_flag,
+ null_context);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&length);
+ }
+ c = do_getc ();
+ if (c == '"')
+ {
+ struct object string;
+ read_object (&string, first_in_list, new_backquote_flag,
+ null_context);
+ free_object (&string);
+ }
+ else
+ /* Invalid input. Be tolerant, no error message. */
+ do_ungetc (c);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case '[':
+ /* Read a compiled function, same syntax as a vector. */
+ case '(':
+ /* Read a string with properties, same syntax as a list. */
+ {
+ struct object inner;
+ do_ungetc (c);
+ read_object (&inner, false, new_backquote_flag, null_context);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '@':
+ /* Read a comment consisting of a given number of bytes. */
+ {
+ unsigned int nskip = 0;
+
+ for (;;)
+ {
+ c = do_getc ();
+ if (!(c >= '0' && c <= '9'))
+ break;
+ nskip = 10 * nskip + (c - '0');
+ }
+ if (c != EOF)
+ {
+ do_ungetc (c);
+ for (; nskip > 0; nskip--)
+ if (do_getc () == EOF)
+ break;
+ }
+ continue;
+ }
+
+ case '$':
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case '\'':
+ case ':':
+ case 'S': case 's': /* XEmacs only */
+ {
+ struct object inner;
+ read_object (&inner, false, new_backquote_flag, null_context);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ /* Read Common Lisp style #n# or #n=. */
+ for (;;)
+ {
+ c = do_getc ();
+ if (!(c >= '0' && c <= '9'))
+ break;
+ }
+ if (c == EOF)
+ /* Invalid input. Be tolerant, no error message. */
+ {
+ op->type = t_other;
+ return;
+ }
+ if (c == '=')
+ {
+ read_object (op, false, new_backquote_flag, outer_context);
+ last_non_comment_line = line_number;
+ return;
+ }
+ if (c == '#')
+ {
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ if (c == 'R' || c == 'r')
+ {
+ /* Read an integer. */
+ c = do_getc ();
+ if (c == '+' || c == '-')
+ c = do_getc ();
+ for (; c != EOF; c = do_getc ())
+ if (!c_isalnum (c))
+ {
+ do_ungetc (c);
+ break;
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ /* Invalid input. Be tolerant, no error message. */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+
+ case 'X': case 'x':
+ case 'O': case 'o':
+ case 'B': case 'b':
+ {
+ /* Read an integer. */
+ c = do_getc ();
+ if (c == '+' || c == '-')
+ c = do_getc ();
+ for (; c != EOF; c = do_getc ())
+ if (!c_isalnum (c))
+ {
+ do_ungetc (c);
+ break;
+ }
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '*': /* XEmacs only */
+ {
+ /* Read a bit-vector. */
+ do
+ c = do_getc ();
+ while (c == '0' || c == '1');
+ if (c != EOF)
+ do_ungetc (c);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ case '+': /* XEmacs only */
+ case '-': /* XEmacs only */
+ /* Simply assume every feature expression is true. */
+ {
+ struct object inner;
+ read_object (&inner, false, new_backquote_flag, null_context);
+ /* Dots and EOF are not allowed here.
+ But be tolerant. */
+ free_object (&inner);
+ continue;
+ }
+
+ default:
+ /* Invalid input. Be tolerant, no error message. */
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+
+ /*NOTREACHED*/
+ abort ();
+
+ case '.':
+ c = do_getc ();
+ if (c != EOF)
+ {
+ do_ungetc (c);
+ if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */
+ || strchr ("\"'`,(", c) != NULL)
+ {
+ op->type = t_dot;
+ last_non_comment_line = line_number;
+ return;
+ }
+ }
+ c = '.';
+ /*FALLTHROUGH*/
+ default:
+ default_label:
+ if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */
+ continue;
+ /* Read a token. */
+ {
+ bool symbol;
+
+ op->token = XMALLOC (struct token);
+ symbol = read_token (op->token, c);
+ if (symbol)
+ {
+ op->type = t_symbol;
+ last_non_comment_line = line_number;
+ return;
+ }
+ else
+ {
+ free_token (op->token);
+ free (op->token);
+ op->type = t_other;
+ last_non_comment_line = line_number;
+ return;
+ }
+ }
+ }
+ }
+}
+
+
+void
+extract_elisp (FILE *f,
+ const char *real_filename, const char *logical_filename,
+ flag_context_list_table_ty *flag_table,
+ msgdomain_list_ty *mdlp)
+{
+ mlp = mdlp->item[0]->messages;
+
+ fp = f;
+ real_file_name = real_filename;
+ logical_file_name = xstrdup (logical_filename);
+ line_number = 1;
+
+ last_comment_line = -1;
+ last_non_comment_line = -1;
+
+ flag_context_list_table = flag_table;
+
+ init_keywords ();
+
+ /* Eat tokens until eof is seen. When read_object returns
+ due to an unbalanced closing parenthesis, just restart it. */
+ do
+ {
+ struct object toplevel_object;
+
+ read_object (&toplevel_object, false, false, null_context);
+
+ if (toplevel_object.type == t_eof)
+ break;
+
+ free_object (&toplevel_object);
+ }
+ while (!feof (fp));
+
+ /* Close scanner. */
+ fp = NULL;
+ real_file_name = NULL;
+ logical_file_name = NULL;
+ line_number = 0;
+}