diff options
Diffstat (limited to 'gettext-tools/src/x-elisp.c')
-rw-r--r-- | gettext-tools/src/x-elisp.c | 1255 |
1 files changed, 1255 insertions, 0 deletions
diff --git a/gettext-tools/src/x-elisp.c b/gettext-tools/src/x-elisp.c new file mode 100644 index 0000000..50c3d55 --- /dev/null +++ b/gettext-tools/src/x-elisp.c @@ -0,0 +1,1255 @@ +/* xgettext Emacs Lisp backend. + Copyright (C) 2001-2003, 2005-2009 Free Software Foundation, Inc. + + This file was written by Bruno Haible <haible@clisp.cons.org>, 2001-2002. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* Specification. */ +#include "x-elisp.h" + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "message.h" +#include "xgettext.h" +#include "error.h" +#include "xalloc.h" +#include "hash.h" +#include "c-ctype.h" +#include "gettext.h" + +#define _(s) gettext(s) + + +/* Summary of Emacs Lisp syntax: + - ';' starts a comment until end of line. + - '#@nn' starts a comment of nn bytes. + - Integers are constituted of an optional prefix (#b, #B for binary, + #o, #O for octal, #x, #X for hexadecimal, #nnr, #nnR for any radix), + an optional sign (+ or -), the digits, and an optional trailing dot. + - Characters are written as '?' followed by the character, possibly + with an escape sequence, for examples '?a', '?\n', '?\177'. + - Strings are delimited by double quotes. Backslash introduces an escape + sequence. The following are understood: '\n', '\r', '\f', '\t', '\a', + '\\', '\^C', '\012' (octal), '\x12' (hexadecimal). + - Symbols: can contain meta-characters if preceded by backslash. + - Uninterned symbols: written as #:SYMBOL. + - () delimit lists. + - [] delimit vectors. + The reader is implemented in emacs-21.1/src/lread.c. */ + + +/* ====================== Keyword set customization. ====================== */ + +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + + +void +x_elisp_extract_all () +{ + extract_all = true; +} + + +void +x_elisp_keyword (const char *name) +{ + if (name == NULL) + default_keywords = false; + else + { + const char *end; + struct callshape shape; + const char *colon; + + if (keywords.table == NULL) + hash_init (&keywords, 100); + + split_keywordspec (name, &end, &shape); + + /* The characters between name and end should form a valid Lisp + symbol. */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + insert_keyword_callshape (&keywords, name, end - name, &shape); + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + /* When adding new keywords here, also update the documentation in + xgettext.texi! */ + x_elisp_keyword ("_"); + default_keywords = false; + } +} + +void +init_flag_table_elisp () +{ + xgettext_record_flag ("_:1:pass-elisp-format"); + xgettext_record_flag ("format:1:elisp-format"); +} + + +/* ======================== Reading of characters. ======================== */ + +/* Real filename, used in error messages about the input file. */ +static const char *real_file_name; + +/* Logical filename and line number, used to label the extracted messages. */ +static char *logical_file_name; +static int line_number; + +/* The input file stream. */ +static FILE *fp; + + +/* Fetch the next character from the input file. */ +static int +do_getc () +{ + int c = getc (fp); + + if (c == EOF) + { + if (ferror (fp)) + error (EXIT_FAILURE, errno, _("\ +error while reading \"%s\""), real_file_name); + } + else if (c == '\n') + line_number++; + + return c; +} + +/* Put back the last fetched character, not EOF. */ +static void +do_ungetc (int c) +{ + if (c == '\n') + line_number--; + ungetc (c, fp); +} + + +/* ========================== Reading of tokens. ========================== */ + + +/* A token consists of a sequence of characters. */ +struct token +{ + int allocated; /* number of allocated 'token_char's */ + int charcount; /* number of used 'token_char's */ + char *chars; /* the token's constituents */ +}; + +/* Initialize a 'struct token'. */ +static inline void +init_token (struct token *tp) +{ + tp->allocated = 10; + tp->chars = XNMALLOC (tp->allocated, char); + tp->charcount = 0; +} + +/* Free the memory pointed to by a 'struct token'. */ +static inline void +free_token (struct token *tp) +{ + free (tp->chars); +} + +/* Ensure there is enough room in the token for one more character. */ +static inline void +grow_token (struct token *tp) +{ + if (tp->charcount == tp->allocated) + { + tp->allocated *= 2; + tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char)); + } +} + +/* Test whether a token has integer syntax. */ +static inline bool +is_integer (const char *p) +{ + /* NB: Yes, '+.' and '-.' both designate the integer 0. */ + const char *p_start = p; + + if (*p == '+' || *p == '-') + p++; + if (*p == '\0') + return false; + while (*p >= '0' && *p <= '9') + p++; + if (p > p_start && *p == '.') + p++; + return (*p == '\0'); +} + +/* Test whether a token has float syntax. */ +static inline bool +is_float (const char *p) +{ + enum { LEAD_INT = 1, DOT_CHAR = 2, TRAIL_INT = 4, E_CHAR = 8, EXP_INT = 16 }; + int state; + + state = 0; + if (*p == '+' || *p == '-') + p++; + if (*p >= '0' && *p <= '9') + { + state |= LEAD_INT; + do + p++; + while (*p >= '0' && *p <= '9'); + } + if (*p == '.') + { + state |= DOT_CHAR; + p++; + } + if (*p >= '0' && *p <= '9') + { + state |= TRAIL_INT; + do + p++; + while (*p >= '0' && *p <= '9'); + } + if (*p == 'e' || *p == 'E') + { + state |= E_CHAR; + p++; + if (*p == '+' || *p == '-') + p++; + if (*p >= '0' && *p <= '9') + { + state |= EXP_INT; + do + p++; + while (*p >= '0' && *p <= '9'); + } + else if (p[-1] == '+' + && ((p[0] == 'I' && p[1] == 'N' && p[2] == 'F') + || (p[0] == 'N' && p[1] == 'a' && p[2] == 'N'))) + { + state |= EXP_INT; + p += 3; + } + } + return (*p == '\0') + && (state == (LEAD_INT | DOT_CHAR | TRAIL_INT) + || state == (DOT_CHAR | TRAIL_INT) + || state == (LEAD_INT | E_CHAR | EXP_INT) + || state == (LEAD_INT | DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT) + || state == (DOT_CHAR | TRAIL_INT | E_CHAR | EXP_INT)); +} + +/* Read the next token. 'first' is the first character, which has already + been read. Returns true for a symbol, false for a number. */ +static bool +read_token (struct token *tp, int first) +{ + int c; + bool quoted = false; + + init_token (tp); + + c = first; + + for (;; c = do_getc ()) + { + if (c == EOF) + break; + if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */ + break; + if (c == '\"' || c == '\'' || c == ';' || c == '(' || c == ')' + || c == '[' || c == ']' || c == '#') + break; + if (c == '\\') + { + quoted = true; + c = do_getc (); + if (c == EOF) + /* Invalid, but be tolerant. */ + break; + } + grow_token (tp); + tp->chars[tp->charcount++] = c; + } + if (c != EOF) + do_ungetc (c); + + if (quoted) + return true; /* symbol */ + + /* Add a NUL byte at the end, for is_integer and is_float. */ + grow_token (tp); + tp->chars[tp->charcount] = '\0'; + + if (is_integer (tp->chars) || is_float (tp->chars)) + return false; /* number */ + else + return true; /* symbol */ +} + + +/* ========================= Accumulating comments ========================= */ + + +static char *buffer; +static size_t bufmax; +static size_t buflen; + +static inline void +comment_start () +{ + buflen = 0; +} + +static inline void +comment_add (int c) +{ + if (buflen >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen++] = c; +} + +static inline void +comment_line_end (size_t chars_to_remove) +{ + buflen -= chars_to_remove; + while (buflen >= 1 + && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t')) + --buflen; + if (chars_to_remove == 0 && buflen >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[buflen] = '\0'; + savable_comment_add (buffer); +} + + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + + +/* ========================= Accumulating messages ========================= */ + + +static message_list_ty *mlp; + + +/* ============== Reading of objects. See CLHS 2 "Syntax". ============== */ + + +/* We are only interested in symbols (e.g. GETTEXT or NGETTEXT) and strings. + Other objects need not to be represented precisely. */ +enum object_type +{ + t_symbol, /* symbol */ + t_string, /* string */ + t_other, /* other kind of real object */ + t_dot, /* '.' pseudo object */ + t_listclose, /* ')' pseudo object */ + t_vectorclose,/* ']' pseudo object */ + t_eof /* EOF marker */ +}; + +struct object +{ + enum object_type type; + struct token *token; /* for t_symbol and t_string */ + int line_number_at_start; /* for t_string */ +}; + +/* Free the memory pointed to by a 'struct object'. */ +static inline void +free_object (struct object *op) +{ + if (op->type == t_symbol || op->type == t_string) + { + free_token (op->token); + free (op->token); + } +} + +/* Convert a t_symbol/t_string token to a char*. */ +static char * +string_of_object (const struct object *op) +{ + char *str; + int n; + + if (!(op->type == t_symbol || op->type == t_string)) + abort (); + n = op->token->charcount; + str = XNMALLOC (n + 1, char); + memcpy (str, op->token->chars, n); + str[n] = '\0'; + return str; +} + +/* Context lookup table. */ +static flag_context_list_table_ty *flag_context_list_table; + +/* Returns the character represented by an escape sequence. */ +#define IGNORABLE_ESCAPE (EOF - 1) +static int +do_getc_escaped (int c, bool in_string) +{ + switch (c) + { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'd': + return 0x7F; + case 'e': + return 0x1B; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + + case '\n': + return IGNORABLE_ESCAPE; + + case ' ': + return (in_string ? IGNORABLE_ESCAPE : ' '); + + case 'M': /* meta */ + c = do_getc (); + if (c == EOF) + return EOF; + if (c != '-') + /* Invalid input. But be tolerant. */ + return c; + c = do_getc (); + if (c == EOF) + return EOF; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + return EOF; + c = do_getc_escaped (c, false); + } + return c | 0x80; + + case 'S': /* shift */ + c = do_getc (); + if (c == EOF) + return EOF; + if (c != '-') + /* Invalid input. But be tolerant. */ + return c; + c = do_getc (); + if (c == EOF) + return EOF; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + return EOF; + c = do_getc_escaped (c, false); + } + return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c); + + case 'H': /* hyper */ + case 'A': /* alt */ + case 's': /* super */ + c = do_getc (); + if (c == EOF) + return EOF; + if (c != '-') + /* Invalid input. But be tolerant. */ + return c; + c = do_getc (); + if (c == EOF) + return EOF; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + return EOF; + c = do_getc_escaped (c, false); + } + return c; + + case 'C': /* ctrl */ + c = do_getc (); + if (c == EOF) + return EOF; + if (c != '-') + /* Invalid input. But be tolerant. */ + return c; + /*FALLTHROUGH*/ + case '^': + c = do_getc (); + if (c == EOF) + return EOF; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + return EOF; + c = do_getc_escaped (c, false); + } + if (c == '?') + return 0x7F; + if ((c & 0x5F) >= 0x41 && (c & 0x5F) <= 0x5A) + return c & 0x9F; + if ((c & 0x7F) >= 0x40 && (c & 0x7F) <= 0x5F) + return c & 0x9F; +#if 0 /* We cannot handle NUL bytes in strings. */ + if (c == ' ') + return 0x00; +#endif + return c; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': + /* An octal escape, as in ANSI C. */ + { + int n = c - '0'; + + c = do_getc (); + if (c != EOF) + { + if (c >= '0' && c <= '7') + { + n = (n << 3) + (c - '0'); + c = do_getc (); + if (c != EOF) + { + if (c >= '0' && c <= '7') + n = (n << 3) + (c - '0'); + else + do_ungetc (c); + } + } + else + do_ungetc (c); + } + return (unsigned char) n; + } + + case 'x': + /* A hexadecimal escape, as in ANSI C. */ + { + int n = 0; + + for (;;) + { + c = do_getc (); + if (c == EOF) + break; + else if (c >= '0' && c <= '9') + n = (n << 4) + (c - '0'); + else if (c >= 'A' && c <= 'F') + n = (n << 4) + (c - 'A' + 10); + else if (c >= 'a' && c <= 'f') + n = (n << 4) + (c - 'a' + 10); + else + { + do_ungetc (c); + break; + } + } + return (unsigned char) n; + } + + default: + /* Ignore Emacs multibyte character stuff. All the strings we are + interested in are ASCII strings. */ + return c; + } +} + +/* Read the next object. + 'first_in_list' and 'new_backquote_flag' are used for reading old + backquote syntax and new backquote syntax. */ +static void +read_object (struct object *op, bool first_in_list, bool new_backquote_flag, + flag_context_ty outer_context) +{ + for (;;) + { + int c; + + c = do_getc (); + + switch (c) + { + case EOF: + op->type = t_eof; + return; + + case '\n': + /* Comments assumed to be grouped with a message must immediately + precede it, with no non-whitespace token on a line between + both. */ + if (last_non_comment_line > last_comment_line) + savable_comment_reset (); + continue; + + case '(': + { + int arg = 0; /* Current argument number. */ + flag_context_list_iterator_ty context_iter; + const struct callshapes *shapes = NULL; + struct arglist_parser *argparser = NULL; + + for (;; arg++) + { + struct object inner; + flag_context_ty inner_context; + + if (arg == 0) + inner_context = null_context; + else + inner_context = + inherited_context (outer_context, + flag_context_list_iterator_advance ( + &context_iter)); + + read_object (&inner, arg == 0, new_backquote_flag, + inner_context); + + /* Recognize end of list. */ + if (inner.type == t_listclose) + { + op->type = t_other; + /* Don't bother converting "()" to "NIL". */ + last_non_comment_line = line_number; + if (argparser != NULL) + arglist_parser_done (argparser, arg); + return; + } + + /* Dots are not allowed in every position. ']' is not allowed. + But be tolerant. */ + + /* EOF inside list is illegal. But be tolerant. */ + if (inner.type == t_eof) + break; + + if (arg == 0) + { + /* This is the function position. */ + if (inner.type == t_symbol) + { + char *symbol_name = string_of_object (&inner); + void *keyword_value; + + if (hash_find_entry (&keywords, + symbol_name, strlen (symbol_name), + &keyword_value) + == 0) + shapes = (const struct callshapes *) keyword_value; + + argparser = arglist_parser_alloc (mlp, shapes); + + context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + symbol_name, strlen (symbol_name))); + + free (symbol_name); + } + else + context_iter = null_context_list_iterator; + } + else + { + /* These are the argument positions. */ + if (argparser != NULL && inner.type == t_string) + arglist_parser_remember (argparser, arg, + string_of_object (&inner), + inner_context, + logical_file_name, + inner.line_number_at_start, + savable_comment); + } + + free_object (&inner); + } + + if (argparser != NULL) + arglist_parser_done (argparser, arg); + } + op->type = t_other; + last_non_comment_line = line_number; + return; + + case ')': + /* Tell the caller about the end of list. + Unmatched closing parenthesis is illegal. But be tolerant. */ + op->type = t_listclose; + last_non_comment_line = line_number; + return; + + case '[': + { + for (;;) + { + struct object inner; + + read_object (&inner, false, new_backquote_flag, null_context); + + /* Recognize end of vector. */ + if (inner.type == t_vectorclose) + { + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + /* Dots and ')' are not allowed. But be tolerant. */ + + /* EOF inside vector is illegal. But be tolerant. */ + if (inner.type == t_eof) + break; + + free_object (&inner); + } + } + op->type = t_other; + last_non_comment_line = line_number; + return; + + case ']': + /* Tell the caller about the end of vector. + Unmatched closing bracket is illegal. But be tolerant. */ + op->type = t_vectorclose; + last_non_comment_line = line_number; + return; + + case '\'': + { + struct object inner; + + read_object (&inner, false, new_backquote_flag, null_context); + + /* Dots and EOF are not allowed here. But be tolerant. */ + + free_object (&inner); + + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '`': + if (first_in_list) + goto default_label; + { + struct object inner; + + read_object (&inner, false, true, null_context); + + /* Dots and EOF are not allowed here. But be tolerant. */ + + free_object (&inner); + + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case ',': + if (!new_backquote_flag) + goto default_label; + { + int c = do_getc (); + /* The ,@ handling inside lists is wrong anyway, because + ,@form expands to an unknown number of elements. */ + if (c != EOF && c != '@' && c != '.') + do_ungetc (c); + } + { + struct object inner; + + read_object (&inner, false, false, null_context); + + /* Dots and EOF are not allowed here. But be tolerant. */ + + free_object (&inner); + + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case ';': + { + bool all_semicolons = true; + + last_comment_line = line_number; + comment_start (); + for (;;) + { + int c = do_getc (); + if (c == EOF || c == '\n') + break; + if (c != ';') + all_semicolons = false; + if (!all_semicolons) + { + /* We skip all leading white space, but not EOLs. */ + if (!(buflen == 0 && (c == ' ' || c == '\t'))) + comment_add (c); + } + } + comment_line_end (0); + continue; + } + + case '"': + { + op->token = XMALLOC (struct token); + init_token (op->token); + op->line_number_at_start = line_number; + for (;;) + { + int c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + break; + if (c == '"') + break; + if (c == '\\') + { + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + break; + c = do_getc_escaped (c, true); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + break; + if (c == IGNORABLE_ESCAPE) + /* Ignore escaped newline and escaped space. */ + ; + else + { + grow_token (op->token); + op->token->chars[op->token->charcount++] = c; + } + } + else + { + grow_token (op->token); + op->token->chars[op->token->charcount++] = c; + } + } + op->type = t_string; + + if (extract_all) + { + lex_pos_ty pos; + + pos.file_name = logical_file_name; + pos.line_number = op->line_number_at_start; + remember_a_message (mlp, NULL, string_of_object (op), + null_context, &pos, NULL, savable_comment); + } + last_non_comment_line = line_number; + return; + } + + case '?': + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + ; + else if (c == '\\') + { + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + ; + else + { + c = do_getc_escaped (c, false); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + ; + } + } + /* Impossible to deal with Emacs multibyte character stuff here. */ + op->type = t_other; + last_non_comment_line = line_number; + return; + + case '#': + /* Dispatch macro handling. */ + c = do_getc (); + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + { + op->type = t_other; + return; + } + + switch (c) + { + case '^': + c = do_getc (); + if (c == '^') + c = do_getc (); + if (c == '[') + { + /* Read a char table, same syntax as a vector. */ + for (;;) + { + struct object inner; + + read_object (&inner, false, new_backquote_flag, + null_context); + + /* Recognize end of vector. */ + if (inner.type == t_vectorclose) + { + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + /* Dots and ')' are not allowed. But be tolerant. */ + + /* EOF inside vector is illegal. But be tolerant. */ + if (inner.type == t_eof) + break; + + free_object (&inner); + } + op->type = t_other; + last_non_comment_line = line_number; + return; + } + else + /* Invalid input. Be tolerant, no error message. */ + { + op->type = t_other; + if (c != EOF) + last_non_comment_line = line_number; + return; + } + + case '&': + /* Read a bit vector. */ + { + struct object length; + read_object (&length, first_in_list, new_backquote_flag, + null_context); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&length); + } + c = do_getc (); + if (c == '"') + { + struct object string; + read_object (&string, first_in_list, new_backquote_flag, + null_context); + free_object (&string); + } + else + /* Invalid input. Be tolerant, no error message. */ + do_ungetc (c); + op->type = t_other; + last_non_comment_line = line_number; + return; + + case '[': + /* Read a compiled function, same syntax as a vector. */ + case '(': + /* Read a string with properties, same syntax as a list. */ + { + struct object inner; + do_ungetc (c); + read_object (&inner, false, new_backquote_flag, null_context); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '@': + /* Read a comment consisting of a given number of bytes. */ + { + unsigned int nskip = 0; + + for (;;) + { + c = do_getc (); + if (!(c >= '0' && c <= '9')) + break; + nskip = 10 * nskip + (c - '0'); + } + if (c != EOF) + { + do_ungetc (c); + for (; nskip > 0; nskip--) + if (do_getc () == EOF) + break; + } + continue; + } + + case '$': + op->type = t_other; + last_non_comment_line = line_number; + return; + + case '\'': + case ':': + case 'S': case 's': /* XEmacs only */ + { + struct object inner; + read_object (&inner, false, new_backquote_flag, null_context); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + /* Read Common Lisp style #n# or #n=. */ + for (;;) + { + c = do_getc (); + if (!(c >= '0' && c <= '9')) + break; + } + if (c == EOF) + /* Invalid input. Be tolerant, no error message. */ + { + op->type = t_other; + return; + } + if (c == '=') + { + read_object (op, false, new_backquote_flag, outer_context); + last_non_comment_line = line_number; + return; + } + if (c == '#') + { + op->type = t_other; + last_non_comment_line = line_number; + return; + } + if (c == 'R' || c == 'r') + { + /* Read an integer. */ + c = do_getc (); + if (c == '+' || c == '-') + c = do_getc (); + for (; c != EOF; c = do_getc ()) + if (!c_isalnum (c)) + { + do_ungetc (c); + break; + } + op->type = t_other; + last_non_comment_line = line_number; + return; + } + /* Invalid input. Be tolerant, no error message. */ + op->type = t_other; + last_non_comment_line = line_number; + return; + + case 'X': case 'x': + case 'O': case 'o': + case 'B': case 'b': + { + /* Read an integer. */ + c = do_getc (); + if (c == '+' || c == '-') + c = do_getc (); + for (; c != EOF; c = do_getc ()) + if (!c_isalnum (c)) + { + do_ungetc (c); + break; + } + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '*': /* XEmacs only */ + { + /* Read a bit-vector. */ + do + c = do_getc (); + while (c == '0' || c == '1'); + if (c != EOF) + do_ungetc (c); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + case '+': /* XEmacs only */ + case '-': /* XEmacs only */ + /* Simply assume every feature expression is true. */ + { + struct object inner; + read_object (&inner, false, new_backquote_flag, null_context); + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + continue; + } + + default: + /* Invalid input. Be tolerant, no error message. */ + op->type = t_other; + last_non_comment_line = line_number; + return; + } + + /*NOTREACHED*/ + abort (); + + case '.': + c = do_getc (); + if (c != EOF) + { + do_ungetc (c); + if (c <= ' ' /* FIXME: Assumes ASCII compatible encoding */ + || strchr ("\"'`,(", c) != NULL) + { + op->type = t_dot; + last_non_comment_line = line_number; + return; + } + } + c = '.'; + /*FALLTHROUGH*/ + default: + default_label: + if (c <= ' ') /* FIXME: Assumes ASCII compatible encoding */ + continue; + /* Read a token. */ + { + bool symbol; + + op->token = XMALLOC (struct token); + symbol = read_token (op->token, c); + if (symbol) + { + op->type = t_symbol; + last_non_comment_line = line_number; + return; + } + else + { + free_token (op->token); + free (op->token); + op->type = t_other; + last_non_comment_line = line_number; + return; + } + } + } + } +} + + +void +extract_elisp (FILE *f, + const char *real_filename, const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp) +{ + mlp = mdlp->item[0]->messages; + + fp = f; + real_file_name = real_filename; + logical_file_name = xstrdup (logical_filename); + line_number = 1; + + last_comment_line = -1; + last_non_comment_line = -1; + + flag_context_list_table = flag_table; + + init_keywords (); + + /* Eat tokens until eof is seen. When read_object returns + due to an unbalanced closing parenthesis, just restart it. */ + do + { + struct object toplevel_object; + + read_object (&toplevel_object, false, false, null_context); + + if (toplevel_object.type == t_eof) + break; + + free_object (&toplevel_object); + } + while (!feof (fp)); + + /* Close scanner. */ + fp = NULL; + real_file_name = NULL; + logical_file_name = NULL; + line_number = 0; +} |