diff options
Diffstat (limited to 'gettext-tools/src/xgettext.h')
-rw-r--r-- | gettext-tools/src/xgettext.h | 416 |
1 files changed, 416 insertions, 0 deletions
diff --git a/gettext-tools/src/xgettext.h b/gettext-tools/src/xgettext.h new file mode 100644 index 0000000..c852ae3 --- /dev/null +++ b/gettext-tools/src/xgettext.h @@ -0,0 +1,416 @@ +/* xgettext common functions. + Copyright (C) 2001-2003, 2005-2006, 2008-2009, 2011 Free Software Foundation, Inc. + Written by Peter Miller <millerp@canb.auug.org.au> + and Bruno Haible <haible@clisp.cons.org>, 2001. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _XGETTEXT_H +#define _XGETTEXT_H + +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> + +#if HAVE_ICONV +#include <iconv.h> +#endif + +#include "message.h" +#include "pos.h" +#include "str-list.h" + +/* Declare 'line_comment' and 'input_syntax'. */ +#include "read-catalog.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* If true, omit the header entry. + If false, keep the header entry present in the input. */ +extern int xgettext_omit_header; + +extern bool substring_match; + + +/* Calling convention for a given keyword. */ +struct callshape +{ + int argnum1; /* argument number to use for msgid */ + int argnum2; /* argument number to use for msgid_plural */ + int argnumc; /* argument number to use for msgctxt */ + bool argnum1_glib_context; /* argument argnum1 has the syntax "ctxt|msgid" */ + bool argnum2_glib_context; /* argument argnum2 has the syntax "ctxt|msgid" */ + int argtotal; /* total number of arguments */ + string_list_ty xcomments; /* auto-extracted comments */ +}; + +/* Split keyword spec into keyword, argnum1, argnum2, argnumc. */ +extern void split_keywordspec (const char *spec, const char **endp, + struct callshape *shapep); + +/* Set of alternative calling conventions for a given keyword. */ +struct callshapes +{ + const char *keyword; /* the keyword, not NUL terminated */ + size_t keyword_len; /* the keyword's length */ + size_t nshapes; + struct callshape shapes[1]; /* actually nshapes elements */ +}; + +/* Insert a (keyword, callshape) pair into a hash table mapping keyword to + 'struct callshapes *'. */ +extern void insert_keyword_callshape (hash_table *table, + const char *keyword, size_t keyword_len, + const struct callshape *shape); + + +/* Context representing some flags. */ +typedef struct flag_context_ty flag_context_ty; +struct flag_context_ty +{ + /* Regarding the primary formatstring type. */ + /*enum is_format*/ unsigned int is_format1 : 3; + /*bool*/ unsigned int pass_format1 : 1; + /* Regarding the secondary formatstring type. */ + /*enum is_format*/ unsigned int is_format2 : 3; + /*bool*/ unsigned int pass_format2 : 1; + /* Regarding the tertiary formatstring type. */ + /*enum is_format*/ unsigned int is_format3 : 3; + /*bool*/ unsigned int pass_format3 : 1; +}; +/* Null context. */ +extern flag_context_ty null_context; +/* Transparent context. */ +extern flag_context_ty passthrough_context; +/* Compute an inherited context. + The outer_context is assumed to have all pass_format* flags = false. + The result will then also have all pass_format* flags = false. */ +extern flag_context_ty + inherited_context (flag_context_ty outer_context, + flag_context_ty modifier_context); + +/* Context representing some flags, for each possible argument number. + This is a linked list, sorted according to the argument number. */ +typedef struct flag_context_list_ty flag_context_list_ty; +struct flag_context_list_ty +{ + int argnum; /* current argument number, > 0 */ + flag_context_ty flags; /* flags for current argument */ + flag_context_list_ty *next; +}; + +/* Iterator through a flag_context_list_ty. */ +typedef struct flag_context_list_iterator_ty flag_context_list_iterator_ty; +struct flag_context_list_iterator_ty +{ + int argnum; /* current argument number, > 0 */ + const flag_context_list_ty* head; /* tail of list */ +}; +extern flag_context_list_iterator_ty null_context_list_iterator; +extern flag_context_list_iterator_ty passthrough_context_list_iterator; +extern flag_context_list_iterator_ty + flag_context_list_iterator (flag_context_list_ty *list); +extern flag_context_ty + flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter); + +/* For nearly each backend, we have a separate table mapping a keyword to + a flag_context_list_ty *. */ +typedef hash_table /* char[] -> flag_context_list_ty * */ + flag_context_list_table_ty; +extern flag_context_list_ty * + flag_context_list_table_lookup (flag_context_list_table_ty *flag_table, + const void *key, size_t keylen); +/* Record a flag in the appropriate backend's table. */ +extern void xgettext_record_flag (const char *optionstring); + + +/* Context while building up lexical tokens. */ +typedef enum + { + lc_outside, /* Initial context: outside of comments and strings. */ + lc_comment, /* Inside a comment. */ + lc_string, /* Inside a string literal. */ + + /* For embedded XML in programming code, like E4X in JavaScript. */ + lc_xml_open_tag, /* Inside an opening tag of an XML element. */ + lc_xml_close_tag, /* Inside a closing tag of an XML element. */ + lc_xml_content /* Inside an XML text node. */ + } + lexical_context_ty; + +/* Error message about non-ASCII character in a specific lexical context. */ +extern char *non_ascii_error_message (lexical_context_ty lcontext, + const char *file_name, + size_t line_number); + + +/* Canonicalized encoding name for all input files. */ +extern const char *xgettext_global_source_encoding; + +#if HAVE_ICONV +/* Converter from xgettext_global_source_encoding to UTF-8 (except from + ASCII or UTF-8, when this conversion is a no-op). */ +extern iconv_t xgettext_global_source_iconv; +#endif + +/* Canonicalized encoding name for the current input file. */ +extern const char *xgettext_current_source_encoding; + +#if HAVE_ICONV +/* Converter from xgettext_current_source_encoding to UTF-8 (except from + ASCII or UTF-8, when this conversion is a no-op). */ +extern iconv_t xgettext_current_source_iconv; +#endif + +/* Convert the given string from xgettext_current_source_encoding to + the output file encoding (i.e. ASCII or UTF-8). + The resulting string is either the argument string, or freshly allocated. + The lcontext, file_name and line_number are only used for error message + purposes. */ +extern char *from_current_source_encoding (const char *string, + lexical_context_ty lcontext, + const char *file_name, + size_t line_number); + + +/* List of messages whose msgids must not be extracted, or NULL. + Used by remember_a_message(). */ +extern message_list_ty *exclude; + + +/* Comment handling for backends which support combining adjacent strings + even across lines. + In these backends we cannot use the xgettext_comment* functions directly, + because in multiline string expressions like + "string1" + + "string2" + the newline between "string1" and "string2" would cause a call to + xgettext_comment_reset(), thus destroying the accumulated comments + that we need a little later, when we have concatenated the two strings + and pass them to remember_a_message(). + Instead, we do the bookkeeping of the accumulated comments directly, + and save a pointer to the accumulated comments when we read "string1". + In order to avoid excessive copying of strings, we use reference + counting. */ + +typedef struct refcounted_string_list_ty refcounted_string_list_ty; +struct refcounted_string_list_ty +{ + unsigned int refcount; + struct string_list_ty contents; +}; + +static inline refcounted_string_list_ty * +add_reference (refcounted_string_list_ty *rslp) +{ + if (rslp != NULL) + rslp->refcount++; + return rslp; +} + +static inline void +drop_reference (refcounted_string_list_ty *rslp) +{ + if (rslp != NULL) + { + if (rslp->refcount > 1) + rslp->refcount--; + else + { + string_list_destroy (&rslp->contents); + free (rslp); + } + } +} + +extern refcounted_string_list_ty *savable_comment; +extern void savable_comment_add (const char *str); +extern void savable_comment_reset (void); + +/* Convert character encoding of COMMENT according to the current + source encoding. Returns a new refcounted_string_list_ty. */ +extern refcounted_string_list_ty * + savable_comment_convert_encoding (refcounted_string_list_ty *comment, + lex_pos_ty *pos); + + +enum literalstring_escape_type +{ + LET_NONE = 0, + LET_ANSI_C = 1 << 0, + LET_UNICODE = 1 << 1 +}; + +struct literalstring_parser +{ + char * (*parse) (const char *string, lex_pos_ty *pos, + enum literalstring_escape_type type); +}; + +/* Add a message to the list of extracted messages. + msgctxt must be either NULL or a malloc()ed string; its ownership is passed + to the callee. + MSGID must be a malloc()ed string; its ownership is passed to the callee. + POS->file_name must be allocated with indefinite extent. + EXTRACTED_COMMENT is a comment that needs to be copied into the POT file, + or NULL. + COMMENT may be savable_comment, or it may be a saved copy of savable_comment + (then add_reference must be used when saving it, and drop_reference while + dropping it). Clear savable_comment. + Return the new or found message, or NULL if the message is excluded. */ +extern message_ty *remember_a_message (message_list_ty *mlp, + char *msgctxt, + char *msgid, + flag_context_ty context, + lex_pos_ty *pos, + const char *extracted_comment, + refcounted_string_list_ty *comment); + +/* Add an msgid_plural to a message previously returned by + remember_a_message. + STRING must be a malloc()ed string; its ownership is passed to the callee. + POS->file_name must be allocated with indefinite extent. + COMMENT may be savable_comment, or it may be a saved copy of savable_comment + (then add_reference must be used when saving it, and drop_reference while + dropping it). Clear savable_comment. */ +extern void remember_a_message_plural (message_ty *mp, + char *string, + flag_context_ty context, + lex_pos_ty *pos, + refcounted_string_list_ty *comment); + +/* Represents the progressive parsing of an argument list w.r.t. a single + 'struct callshape'. */ +struct partial_call +{ + int argnumc; /* number of context argument, 0 when seen */ + int argnum1; /* number of singular argument, 0 when seen */ + int argnum2; /* number of plural argument, 0 when seen */ + bool argnum1_glib_context; /* argument argnum1 has the syntax "ctxt|msgid" */ + bool argnum2_glib_context; /* argument argnum2 has the syntax "ctxt|msgid" */ + int argtotal; /* total number of arguments, 0 if unspecified */ + string_list_ty xcomments; /* auto-extracted comments */ + char *msgctxt; /* context - owned string, or NULL */ + enum literalstring_escape_type msgctxt_escape; + lex_pos_ty msgctxt_pos; + char *msgid; /* msgid - owned string, or NULL */ + enum literalstring_escape_type msgid_escape; + flag_context_ty msgid_context; + lex_pos_ty msgid_pos; + refcounted_string_list_ty *msgid_comment; + char *msgid_plural; /* msgid_plural - owned string, or NULL */ + enum literalstring_escape_type msgid_plural_escape; + flag_context_ty msgid_plural_context; + lex_pos_ty msgid_plural_pos; +}; + +/* Represents the progressive parsing of an argument list w.r.t. an entire + 'struct callshapes'. */ +struct arglist_parser +{ + message_list_ty *mlp; /* list where the message shall be added */ + const char *keyword; /* the keyword, not NUL terminated */ + size_t keyword_len; /* the keyword's length */ + size_t nalternatives; /* number of partial_call alternatives */ + struct partial_call alternative[1]; /* partial_call alternatives */ +}; + +/* Creates a fresh arglist_parser recognizing calls. + You can pass shapes = NULL for a parser not recognizing any calls. */ +extern struct arglist_parser * arglist_parser_alloc (message_list_ty *mlp, + const struct callshapes *shapes); +/* Clones an arglist_parser. */ +extern struct arglist_parser * arglist_parser_clone (struct arglist_parser *ap); +/* Adds a string argument to an arglist_parser. ARGNUM must be > 0. + STRING must be malloc()ed string; its ownership is passed to the callee. + FILE_NAME must be allocated with indefinite extent. + COMMENT may be savable_comment, or it may be a saved copy of savable_comment + (then add_reference must be used when saving it, and drop_reference while + dropping it). Clear savable_comment. */ +extern void arglist_parser_remember (struct arglist_parser *ap, + int argnum, char *string, + flag_context_ty context, + char *file_name, size_t line_number, + refcounted_string_list_ty *comment); +/* Adds an uninterpreted string argument to an arglist_parser. ARGNUM + must be > 0. + STRING is must be malloc()ed string; its ownership is passed to the callee. + FILE_NAME must be allocated with indefinite extent. + COMMENT may be savable_comment, or it may be a saved copy of savable_comment + (then add_reference must be used when saving it, and drop_reference while + dropping it). Clear savable_comment. */ +extern void arglist_parser_remember_literal (struct arglist_parser *ap, + int argnum, char *string, + flag_context_ty context, + char *file_name, size_t line_number, + refcounted_string_list_ty *comment, + enum literalstring_escape_type type); +/* Tests whether an arglist_parser has is not waiting for more arguments after + argument ARGNUM. */ +extern bool arglist_parser_decidedp (struct arglist_parser *ap, int argnum); +/* Terminates the processing of an arglist_parser after argument ARGNUM and + deletes it. */ +extern void arglist_parser_done (struct arglist_parser *ap, int argnum); + + +/* A string buffer type that allows appending bytes (in the + xgettext_current_source_encoding) or Unicode characters. + Returns the entire string in UTF-8 encoding. */ + +struct mixed_string_buffer +{ + /* The part of the string that has already been converted to UTF-8. */ + char *utf8_buffer; + size_t utf8_buflen; + size_t utf8_allocated; + /* The first half of an UTF-16 surrogate character. */ + unsigned short utf16_surr; + /* The part of the string that is still in the source encoding. */ + char *curr_buffer; + size_t curr_buflen; + size_t curr_allocated; + /* The lexical context. Used only for error message purposes. */ + lexical_context_ty lcontext; + const char *logical_file_name; + int line_number; +}; + +/* Creates a fresh mixed_string_buffer. */ +extern struct mixed_string_buffer * + mixed_string_buffer_alloc (lexical_context_ty lcontext, + const char *logical_file_name, + int line_number); + +/* Appends a character to a mixed_string_buffer. */ +extern void mixed_string_buffer_append_char (struct mixed_string_buffer *bp, + int c); + +/* Appends a Unicode character to a mixed_string_buffer. */ +extern void mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, + int c); + +/* Frees mixed_string_buffer and returns the accumulated string in UTF-8. */ +extern char * mixed_string_buffer_done (struct mixed_string_buffer *bp); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _XGETTEXT_H */ |