summaryrefslogtreecommitdiff
path: root/gettext-tools/src/recode-sr-latin.c
diff options
context:
space:
mode:
Diffstat (limited to 'gettext-tools/src/recode-sr-latin.c')
-rw-r--r--gettext-tools/src/recode-sr-latin.c395
1 files changed, 395 insertions, 0 deletions
diff --git a/gettext-tools/src/recode-sr-latin.c b/gettext-tools/src/recode-sr-latin.c
new file mode 100644
index 0000000..25b88f6
--- /dev/null
+++ b/gettext-tools/src/recode-sr-latin.c
@@ -0,0 +1,395 @@
+/* Recode Serbian text from Cyrillic to Latin script.
+ Copyright (C) 2006-2007, 2010, 2012 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2006.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <locale.h>
+
+#if HAVE_ICONV
+#include <iconv.h>
+#endif
+
+#include "closeout.h"
+#include "error.h"
+#include "progname.h"
+#include "relocatable.h"
+#include "basename.h"
+#include "xalloc.h"
+#include "localcharset.h"
+#include "c-strcase.h"
+#include "xstriconv.h"
+#include "filters.h"
+#include "propername.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+
+/* Long options. */
+static const struct option long_options[] =
+{
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+};
+
+/* Forward declaration of local functions. */
+static void usage (int status)
+#if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
+ __attribute__ ((noreturn))
+#endif
+;
+static void process (FILE *stream);
+
+int
+main (int argc, char *argv[])
+{
+ /* Default values for command line options. */
+ bool do_help = false;
+ bool do_version = false;
+
+ int opt;
+
+ /* Set program name for message texts. */
+ set_program_name (argv[0]);
+
+#ifdef HAVE_SETLOCALE
+ /* Set locale via LC_ALL. */
+ setlocale (LC_ALL, "");
+#endif
+
+ /* Set the text message domain. */
+ bindtextdomain (PACKAGE, relocate (LOCALEDIR));
+ textdomain (PACKAGE);
+
+ /* Ensure that write errors on stdout are detected. */
+ atexit (close_stdout);
+
+ /* Parse command line options. */
+ while ((opt = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
+ switch (opt)
+ {
+ case '\0': /* Long option. */
+ break;
+ case 'h':
+ do_help = true;
+ break;
+ case 'V':
+ do_version = true;
+ break;
+ default:
+ usage (EXIT_FAILURE);
+ }
+
+ /* Version information is requested. */
+ if (do_version)
+ {
+ printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
+ /* xgettext: no-wrap */
+ printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
+License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
+This is free software: you are free to change and redistribute it.\n\
+There is NO WARRANTY, to the extent permitted by law.\n\
+"),
+ "2006-2007");
+ printf (_("Written by %s and %s.\n"),
+ /* TRANSLATORS: This is a proper name. The last name is
+ (with Unicode escapes) "\u0160egan" or (with HTML entities)
+ "&Scaron;egan". */
+ proper_name_utf8 ("Danilo Segan", "Danilo \305\240egan"),
+ proper_name ("Bruno Haible"));
+ exit (EXIT_SUCCESS);
+ }
+
+ /* Help is requested. */
+ if (do_help)
+ usage (EXIT_SUCCESS);
+
+ if (argc - optind > 0)
+ error (EXIT_FAILURE, 0, _("too many arguments"));
+
+ process (stdin);
+
+ exit (EXIT_SUCCESS);
+}
+
+
+/* Display usage information and exit. */
+static void
+usage (int status)
+{
+ if (status != EXIT_SUCCESS)
+ fprintf (stderr, _("Try '%s --help' for more information.\n"),
+ program_name);
+ else
+ {
+ /* xgettext: no-wrap */
+ printf (_("\
+Usage: %s [OPTION]\n\
+"), program_name);
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Recode Serbian text from Cyrillic to Latin script.\n"));
+ /* xgettext: no-wrap */
+ printf (_("\
+The input text is read from standard input. The converted text is output to\n\
+standard output.\n"));
+ printf ("\n");
+ /* xgettext: no-wrap */
+ printf (_("\
+Informative output:\n"));
+ /* xgettext: no-wrap */
+ printf (_("\
+ -h, --help display this help and exit\n"));
+ /* xgettext: no-wrap */
+ printf (_("\
+ -V, --version output version information and exit\n"));
+ printf ("\n");
+ /* TRANSLATORS: The placeholder indicates the bug-reporting address
+ for this package. Please add _another line_ saying
+ "Report translation bugs to <...>\n" with the address for translation
+ bugs (typically your translation team's web or email address). */
+ fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
+ }
+
+ exit (status);
+}
+
+
+/* Routines for reading a line.
+ Don't use routines that drop NUL bytes. Don't use getline(), because it
+ doesn't provide a good error message in case of memory allocation failure.
+ The gnulib module 'linebuffer' is nearly the right thing, except that we
+ don't want an extra newline at the end of file. */
+
+/* A 'struct linebuffer' holds a line of text. */
+
+struct linebuffer
+{
+ size_t size; /* Allocated. */
+ size_t length; /* Used. */
+ char *buffer;
+};
+
+/* Initialize linebuffer LINEBUFFER for use. */
+static inline void
+init_linebuffer (struct linebuffer *lb)
+{
+ lb->size = 0;
+ lb->length = 0;
+ lb->buffer = NULL;
+}
+
+/* Read an arbitrarily long line of text from STREAM into linebuffer LB.
+ Keep the newline. Do not NUL terminate.
+ Return LINEBUFFER, except at end of file return NULL. */
+static struct linebuffer *
+read_linebuffer (struct linebuffer *lb, FILE *stream)
+{
+ if (feof (stream))
+ return NULL;
+ else
+ {
+ char *p = lb->buffer;
+ char *end = lb->buffer + lb->size;
+
+ for (;;)
+ {
+ int c = getc (stream);
+ if (c == EOF)
+ {
+ if (p == lb->buffer || ferror (stream))
+ return NULL;
+ break;
+ }
+ if (p == end)
+ {
+ size_t oldsize = lb->size; /* = p - lb->buffer */
+ size_t newsize = 2 * oldsize + 40;
+ lb->buffer = (char *) xrealloc (lb->buffer, newsize);
+ lb->size = newsize;
+ p = lb->buffer + oldsize;
+ end = lb->buffer + newsize;
+ }
+ *p++ = c;
+ if (c == '\n')
+ break;
+ }
+
+ lb->length = p - lb->buffer;
+ return lb;
+ }
+}
+
+/* Free linebuffer LB and its data, all allocated with malloc. */
+static inline void
+destroy_linebuffer (struct linebuffer *lb)
+{
+ if (lb->buffer != NULL)
+ free (lb->buffer);
+}
+
+
+/* Process the input and produce the output. */
+static void
+process (FILE *stream)
+{
+ struct linebuffer lb;
+ const char *locale_code = locale_charset ();
+ bool need_code_conversion = (c_strcasecmp (locale_code, "UTF-8") != 0);
+#if HAVE_ICONV
+ iconv_t conv_to_utf8 = (iconv_t)(-1);
+ iconv_t conv_from_utf8 = (iconv_t)(-1);
+ char *last_utf8_line;
+ size_t last_utf8_line_len;
+ char *last_backconv_line;
+ size_t last_backconv_line_len;
+#endif
+
+ init_linebuffer (&lb);
+
+ /* Initialize the conversion descriptors. */
+ if (need_code_conversion)
+ {
+#if HAVE_ICONV
+ /* Avoid glibc-2.1 bug with EUC-KR. */
+# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
+ && !defined _LIBICONV_VERSION
+ if (strcmp (locale_code, "EUC-KR") != 0)
+# endif
+ {
+ conv_to_utf8 = iconv_open ("UTF-8", locale_code);
+ /* TODO: Maybe append //TRANSLIT here? */
+ conv_from_utf8 = iconv_open (locale_code, "UTF-8");
+ }
+ if (conv_to_utf8 == (iconv_t)(-1))
+ error (EXIT_FAILURE, 0, _("\
+Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
+and iconv() does not support this conversion."),
+ locale_code, "UTF-8", basename (program_name));
+ if (conv_from_utf8 == (iconv_t)(-1))
+ error (EXIT_FAILURE, 0, _("\
+Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
+and iconv() does not support this conversion."),
+ "UTF-8", locale_code, basename (program_name));
+ last_utf8_line = NULL;
+ last_utf8_line_len = 0;
+ last_backconv_line = NULL;
+ last_backconv_line_len = 0;
+#else
+ error (EXIT_FAILURE, 0, _("\
+Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
+This version was built without iconv()."),
+ locale_code, "UTF-8", basename (program_name));
+#endif
+ }
+
+ /* Read the input line by line.
+ Processing it character by character is not possible, because some
+ filters need to look at adjacent characters. Processing the entire file
+ in a whole chunk would take an excessive amount of memory. */
+ for (;;)
+ {
+ char *line;
+ size_t line_len;
+ char *filtered_line;
+ size_t filtered_line_len;
+
+ /* Read a line. */
+ if (read_linebuffer (&lb, stream) == NULL)
+ break;
+ line = lb.buffer;
+ line_len = lb.length;
+ /* read_linebuffer always returns a non-void result. */
+ if (line_len == 0)
+ abort ();
+
+#if HAVE_ICONV
+ /* Convert it to UTF-8. */
+ if (need_code_conversion)
+ {
+ char *utf8_line = last_utf8_line;
+ size_t utf8_line_len = last_utf8_line_len;
+
+ if (xmem_cd_iconv (line, line_len, conv_to_utf8,
+ &utf8_line, &utf8_line_len) != 0)
+ error (EXIT_FAILURE, errno,
+ _("input is not valid in \"%s\" encoding"),
+ locale_code);
+ if (utf8_line != last_utf8_line)
+ {
+ if (last_utf8_line != NULL)
+ free (last_utf8_line);
+ last_utf8_line = utf8_line;
+ last_utf8_line_len = utf8_line_len;
+ }
+
+ line = utf8_line;
+ line_len = utf8_line_len;
+ }
+#endif
+
+ /* Apply the filter. */
+ serbian_to_latin (line, line_len, &filtered_line, &filtered_line_len);
+
+#if HAVE_ICONV
+ /* Convert it back to the original encoding. */
+ if (need_code_conversion)
+ {
+ char *backconv_line = last_backconv_line;
+ size_t backconv_line_len = last_backconv_line_len;
+
+ if (xmem_cd_iconv (filtered_line, filtered_line_len, conv_from_utf8,
+ &backconv_line, &backconv_line_len) != 0)
+ error (EXIT_FAILURE, errno,
+ _("error while converting from \"%s\" encoding to \"%s\" encoding"),
+ "UTF-8", locale_code);
+ if (backconv_line != last_backconv_line)
+ {
+ if (last_backconv_line != NULL)
+ free (last_backconv_line);
+ last_backconv_line = backconv_line;
+ last_backconv_line_len = backconv_line_len;
+ }
+
+ fwrite (backconv_line, 1, backconv_line_len, stdout);
+ }
+ else
+#endif
+ fwrite (filtered_line, 1, filtered_line_len, stdout);
+
+ free (filtered_line);
+ }
+
+#if HAVE_ICONV
+ if (need_code_conversion)
+ {
+ iconv_close (conv_from_utf8);
+ iconv_close (conv_to_utf8);
+ }
+#endif
+
+ destroy_linebuffer (&lb);
+}