diff options
Diffstat (limited to 'gettext-tools/src/recode-sr-latin.c')
-rw-r--r-- | gettext-tools/src/recode-sr-latin.c | 395 |
1 files changed, 395 insertions, 0 deletions
diff --git a/gettext-tools/src/recode-sr-latin.c b/gettext-tools/src/recode-sr-latin.c new file mode 100644 index 0000000..25b88f6 --- /dev/null +++ b/gettext-tools/src/recode-sr-latin.c @@ -0,0 +1,395 @@ +/* Recode Serbian text from Cyrillic to Latin script. + Copyright (C) 2006-2007, 2010, 2012 Free Software Foundation, Inc. + Written by Bruno Haible <bruno@clisp.org>, 2006. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <errno.h> +#include <getopt.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <locale.h> + +#if HAVE_ICONV +#include <iconv.h> +#endif + +#include "closeout.h" +#include "error.h" +#include "progname.h" +#include "relocatable.h" +#include "basename.h" +#include "xalloc.h" +#include "localcharset.h" +#include "c-strcase.h" +#include "xstriconv.h" +#include "filters.h" +#include "propername.h" +#include "gettext.h" + +#define _(str) gettext (str) + + +/* Long options. */ +static const struct option long_options[] = +{ + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } +}; + +/* Forward declaration of local functions. */ +static void usage (int status) +#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2) + __attribute__ ((noreturn)) +#endif +; +static void process (FILE *stream); + +int +main (int argc, char *argv[]) +{ + /* Default values for command line options. */ + bool do_help = false; + bool do_version = false; + + int opt; + + /* Set program name for message texts. */ + set_program_name (argv[0]); + +#ifdef HAVE_SETLOCALE + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); +#endif + + /* Set the text message domain. */ + bindtextdomain (PACKAGE, relocate (LOCALEDIR)); + textdomain (PACKAGE); + + /* Ensure that write errors on stdout are detected. */ + atexit (close_stdout); + + /* Parse command line options. */ + while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF) + switch (opt) + { + case '\0': /* Long option. */ + break; + case 'h': + do_help = true; + break; + case 'V': + do_version = true; + break; + default: + usage (EXIT_FAILURE); + } + + /* Version information is requested. */ + if (do_version) + { + printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION); + /* xgettext: no-wrap */ + printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\ +License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\ +This is free software: you are free to change and redistribute it.\n\ +There is NO WARRANTY, to the extent permitted by law.\n\ +"), + "2006-2007"); + printf (_("Written by %s and %s.\n"), + /* TRANSLATORS: This is a proper name. The last name is + (with Unicode escapes) "\u0160egan" or (with HTML entities) + "Šegan". */ + proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"), + proper_name ("Bruno Haible")); + exit (EXIT_SUCCESS); + } + + /* Help is requested. */ + if (do_help) + usage (EXIT_SUCCESS); + + if (argc - optind > 0) + error (EXIT_FAILURE, 0, _("too many arguments")); + + process (stdin); + + exit (EXIT_SUCCESS); +} + + +/* Display usage information and exit. */ +static void +usage (int status) +{ + if (status != EXIT_SUCCESS) + fprintf (stderr, _("Try '%s --help' for more information.\n"), + program_name); + else + { + /* xgettext: no-wrap */ + printf (_("\ +Usage: %s [OPTION]\n\ +"), program_name); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Recode Serbian text from Cyrillic to Latin script.\n")); + /* xgettext: no-wrap */ + printf (_("\ +The input text is read from standard input. The converted text is output to\n\ +standard output.\n")); + printf ("\n"); + /* xgettext: no-wrap */ + printf (_("\ +Informative output:\n")); + /* xgettext: no-wrap */ + printf (_("\ + -h, --help display this help and exit\n")); + /* xgettext: no-wrap */ + printf (_("\ + -V, --version output version information and exit\n")); + printf ("\n"); + /* TRANSLATORS: The placeholder indicates the bug-reporting address + for this package. Please add _another line_ saying + "Report translation bugs to <...>\n" with the address for translation + bugs (typically your translation team's web or email address). */ + fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout); + } + + exit (status); +} + + +/* Routines for reading a line. + Don't use routines that drop NUL bytes. Don't use getline(), because it + doesn't provide a good error message in case of memory allocation failure. + The gnulib module 'linebuffer' is nearly the right thing, except that we + don't want an extra newline at the end of file. */ + +/* A 'struct linebuffer' holds a line of text. */ + +struct linebuffer +{ + size_t size; /* Allocated. */ + size_t length; /* Used. */ + char *buffer; +}; + +/* Initialize linebuffer LINEBUFFER for use. */ +static inline void +init_linebuffer (struct linebuffer *lb) +{ + lb->size = 0; + lb->length = 0; + lb->buffer = NULL; +} + +/* Read an arbitrarily long line of text from STREAM into linebuffer LB. + Keep the newline. Do not NUL terminate. + Return LINEBUFFER, except at end of file return NULL. */ +static struct linebuffer * +read_linebuffer (struct linebuffer *lb, FILE *stream) +{ + if (feof (stream)) + return NULL; + else + { + char *p = lb->buffer; + char *end = lb->buffer + lb->size; + + for (;;) + { + int c = getc (stream); + if (c == EOF) + { + if (p == lb->buffer || ferror (stream)) + return NULL; + break; + } + if (p == end) + { + size_t oldsize = lb->size; /* = p - lb->buffer */ + size_t newsize = 2 * oldsize + 40; + lb->buffer = (char *) xrealloc (lb->buffer, newsize); + lb->size = newsize; + p = lb->buffer + oldsize; + end = lb->buffer + newsize; + } + *p++ = c; + if (c == '\n') + break; + } + + lb->length = p - lb->buffer; + return lb; + } +} + +/* Free linebuffer LB and its data, all allocated with malloc. */ +static inline void +destroy_linebuffer (struct linebuffer *lb) +{ + if (lb->buffer != NULL) + free (lb->buffer); +} + + +/* Process the input and produce the output. */ +static void +process (FILE *stream) +{ + struct linebuffer lb; + const char *locale_code = locale_charset (); + bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0); +#if HAVE_ICONV + iconv_t conv_to_utf8 = (iconv_t)(-1); + iconv_t conv_from_utf8 = (iconv_t)(-1); + char *last_utf8_line; + size_t last_utf8_line_len; + char *last_backconv_line; + size_t last_backconv_line_len; +#endif + + init_linebuffer (&lb); + + /* Initialize the conversion descriptors. */ + if (need_code_conversion) + { +#if HAVE_ICONV + /* Avoid glibc-2.1 bug with EUC-KR. */ +# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ + && !defined _LIBICONV_VERSION + if (strcmp (locale_code, "EUC-KR") != 0) +# endif + { + conv_to_utf8 = iconv_open ("UTF-8", locale_code); + /* TODO: Maybe append //TRANSLIT here? */ + conv_from_utf8 = iconv_open (locale_code, "UTF-8"); + } + if (conv_to_utf8 == (iconv_t)(-1)) + error (EXIT_FAILURE, 0, _("\ +Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \ +and iconv() does not support this conversion."), + locale_code, "UTF-8", basename (program_name)); + if (conv_from_utf8 == (iconv_t)(-1)) + error (EXIT_FAILURE, 0, _("\ +Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \ +and iconv() does not support this conversion."), + "UTF-8", locale_code, basename (program_name)); + last_utf8_line = NULL; + last_utf8_line_len = 0; + last_backconv_line = NULL; + last_backconv_line_len = 0; +#else + error (EXIT_FAILURE, 0, _("\ +Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \ +This version was built without iconv()."), + locale_code, "UTF-8", basename (program_name)); +#endif + } + + /* Read the input line by line. + Processing it character by character is not possible, because some + filters need to look at adjacent characters. Processing the entire file + in a whole chunk would take an excessive amount of memory. */ + for (;;) + { + char *line; + size_t line_len; + char *filtered_line; + size_t filtered_line_len; + + /* Read a line. */ + if (read_linebuffer (&lb, stream) == NULL) + break; + line = lb.buffer; + line_len = lb.length; + /* read_linebuffer always returns a non-void result. */ + if (line_len == 0) + abort (); + +#if HAVE_ICONV + /* Convert it to UTF-8. */ + if (need_code_conversion) + { + char *utf8_line = last_utf8_line; + size_t utf8_line_len = last_utf8_line_len; + + if (xmem_cd_iconv (line, line_len, conv_to_utf8, + &utf8_line, &utf8_line_len) != 0) + error (EXIT_FAILURE, errno, + _("input is not valid in \"%s\" encoding"), + locale_code); + if (utf8_line != last_utf8_line) + { + if (last_utf8_line != NULL) + free (last_utf8_line); + last_utf8_line = utf8_line; + last_utf8_line_len = utf8_line_len; + } + + line = utf8_line; + line_len = utf8_line_len; + } +#endif + + /* Apply the filter. */ + serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len); + +#if HAVE_ICONV + /* Convert it back to the original encoding. */ + if (need_code_conversion) + { + char *backconv_line = last_backconv_line; + size_t backconv_line_len = last_backconv_line_len; + + if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8, + &backconv_line, &backconv_line_len) != 0) + error (EXIT_FAILURE, errno, + _("error while converting from \"%s\" encoding to \"%s\" encoding"), + "UTF-8", locale_code); + if (backconv_line != last_backconv_line) + { + if (last_backconv_line != NULL) + free (last_backconv_line); + last_backconv_line = backconv_line; + last_backconv_line_len = backconv_line_len; + } + + fwrite (backconv_line, 1, backconv_line_len, stdout); + } + else +#endif + fwrite (filtered_line, 1, filtered_line_len, stdout); + + free (filtered_line); + } + +#if HAVE_ICONV + if (need_code_conversion) + { + iconv_close (conv_from_utf8); + iconv_close (conv_to_utf8); + } +#endif + + destroy_linebuffer (&lb); +} |