summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArjun Shankar <arjun@redhat.com>2020-09-25 14:47:06 +0200
committerFangrui Song <i@maskray.me>2021-08-27 17:26:08 -0700
commit453aafef166abc5f3dab268b5bf0959cec8fb92b (patch)
treeeb424ce1ca8171ac6e6bf958280bd1668ddfd90c
parent804887a0c8d546cb4ffc7a57f3623e37c7102ac8 (diff)
downloadglibc-453aafef166abc5f3dab268b5bf0959cec8fb92b.tar.gz
intl: Handle translation output codesets with suffixes [BZ #26383]
Commit 91927b7c7643 (Rewrite iconv option parsing [BZ #19519]) did not handle cases where the output codeset for translations (via the `gettext' family of functions) might have a caller specified encoding suffix such as TRANSLIT or IGNORE. This led to a regression where translations did not work when the codeset had a suffix. This commit fixes the above issue by parsing any suffixes passed to __dcigettext and adds two new test-cases to intl/tst-codeset.c to verify correct behaviour. The iconv-internal function __gconv_create_spec and the static iconv-internal function gconv_destroy_spec are now visible internally within glibc and used in intl/dcigettext.c.
-rw-r--r--iconv/Versions4
-rw-r--r--iconv/gconv_charset.c10
-rw-r--r--iconv/gconv_charset.h27
-rw-r--r--iconv/gconv_int.h21
-rw-r--r--iconv/iconv_open.c2
-rw-r--r--iconv/iconv_prog.c2
-rw-r--r--intl/dcigettext.c17
-rw-r--r--intl/tst-codeset.c34
8 files changed, 60 insertions, 57 deletions
diff --git a/iconv/Versions b/iconv/Versions
index 8a5f4cf780..d51af52fa3 100644
--- a/iconv/Versions
+++ b/iconv/Versions
@@ -6,7 +6,9 @@ libc {
GLIBC_PRIVATE {
# functions shared with iconv program
__gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db;
- __gconv_open; __gconv_create_spec;
+
+ # functions used elsewhere in glibc
+ __gconv_open; __gconv_create_spec; __gconv_destroy_spec;
# function used by the gconv modules
__gconv_transliterate;
diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c
index 6ccd0773cc..4ba0aa99f5 100644
--- a/iconv/gconv_charset.c
+++ b/iconv/gconv_charset.c
@@ -216,3 +216,13 @@ out:
return ret;
}
libc_hidden_def (__gconv_create_spec)
+
+
+void
+__gconv_destroy_spec (struct gconv_spec *conv_spec)
+{
+ free (conv_spec->fromcode);
+ free (conv_spec->tocode);
+ return;
+}
+libc_hidden_def (__gconv_destroy_spec)
diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h
index b85d803130..4b98073389 100644
--- a/iconv/gconv_charset.h
+++ b/iconv/gconv_charset.h
@@ -48,33 +48,6 @@
#define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE"
-/* This function accepts the charset names of the source and destination of the
- conversion and populates *conv_spec with an equivalent conversion
- specification that may later be used by __gconv_open. The charset names
- might contain options in the form of suffixes that alter the conversion,
- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
- and truncating any suffix options in fromcode, and processing and truncating
- any suffix options in tocode. Supported suffix options ("TRANSLIT" or
- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
- to be set to true. Unrecognized suffix options are silently discarded. If
- the function succeeds, it returns conv_spec back to the caller. It returns
- NULL upon failure. */
-struct gconv_spec *
-__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
- const char *tocode);
-libc_hidden_proto (__gconv_create_spec)
-
-
-/* This function frees all heap memory allocated by __gconv_create_spec. */
-static void __attribute__ ((unused))
-gconv_destroy_spec (struct gconv_spec *conv_spec)
-{
- free (conv_spec->fromcode);
- free (conv_spec->tocode);
- return;
-}
-
-
/* This function copies in-order, characters from the source 's' that are
either alpha-numeric or one in one of these: "_-.,:/" - into the destination
'wp' while dropping all other characters. In the process, it converts all
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 4748e9b1fa..8067a341b0 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -170,6 +170,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec,
__gconv_t *handle, int flags);
libc_hidden_proto (__gconv_open)
+/* This function accepts the charset names of the source and destination of the
+ conversion and populates *conv_spec with an equivalent conversion
+ specification that may later be used by __gconv_open. The charset names
+ might contain options in the form of suffixes that alter the conversion,
+ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
+ and truncating any suffix options in fromcode, and processing and truncating
+ any suffix options in tocode. Supported suffix options ("TRANSLIT" or
+ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
+ to be set to true. Unrecognized suffix options are silently discarded. If
+ the function succeeds, it returns conv_spec back to the caller. It returns
+ NULL upon failure. */
+extern struct gconv_spec *
+__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
+ const char *tocode);
+libc_hidden_proto (__gconv_create_spec)
+
+/* This function frees all heap memory allocated by __gconv_create_spec. */
+extern void
+__gconv_destroy_spec (struct gconv_spec *conv_spec);
+libc_hidden_proto (__gconv_destroy_spec)
+
/* Free resources associated with transformation descriptor CD. */
extern int __gconv_close (__gconv_t cd)
attribute_hidden;
diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c
index 59d1ef4f07..46da33bca6 100644
--- a/iconv/iconv_open.c
+++ b/iconv/iconv_open.c
@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode)
int res = __gconv_open (&conv_spec, &cd, 0);
- gconv_destroy_spec (&conv_spec);
+ __gconv_destroy_spec (&conv_spec);
if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK)
{
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 552efac816..e26e9d02ca 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -184,7 +184,7 @@ main (int argc, char *argv[])
/* Let's see whether we have these coded character sets. */
res = __gconv_open (&conv_spec, &cd, 0);
- gconv_destroy_spec (&conv_spec);
+ __gconv_destroy_spec (&conv_spec);
if (res != __GCONV_OK)
{
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index 975312a106..b65d2447a4 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -1121,15 +1121,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
# ifdef _LIBC
- struct gconv_spec conv_spec
- = { .fromcode = norm_add_slashes (charset, ""),
- .tocode = norm_add_slashes (outcharset, ""),
- /* We always want to use transliteration. */
- .translit = true,
- .ignore = false
- };
+ struct gconv_spec conv_spec;
+
+ __gconv_create_spec (&conv_spec, charset, outcharset);
+
+ /* We always want to use transliteration. */
+ conv_spec.translit = true;
+
int r = __gconv_open (&conv_spec, &convd->conv,
GCONV_AVOID_NOCONV);
+
+ __gconv_destroy_spec (&conv_spec);
+
if (__builtin_expect (r != __GCONV_OK, 0))
{
/* If the output encoding is the same there is
diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c
index e71382aeee..52e4aaa6ff 100644
--- a/intl/tst-codeset.c
+++ b/intl/tst-codeset.c
@@ -22,13 +22,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <support/check.h>
static int
do_test (void)
{
- char *s;
- int result = 0;
-
unsetenv ("LANGUAGE");
unsetenv ("OUTPUT_CHARSET");
setlocale (LC_ALL, "de_DE.ISO-8859-1");
@@ -36,25 +34,21 @@ do_test (void)
bindtextdomain ("codeset", OBJPFX "domaindir");
/* Here we expect output in ISO-8859-1. */
- s = gettext ("cheese");
- if (strcmp (s, "K\344se"))
- {
- printf ("call 1 returned: %s\n", s);
- result = 1;
- }
+ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se");
+ /* Here we expect output in UTF-8. */
bind_textdomain_codeset ("codeset", "UTF-8");
+ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se");
- /* Here we expect output in UTF-8. */
- s = gettext ("cheese");
- if (strcmp (s, "K\303\244se"))
- {
- printf ("call 2 returned: %s\n", s);
- result = 1;
- }
-
- return result;
+ /* `a with umlaut' is transliterated to `ae'. */
+ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT");
+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
+
+ /* Transliteration also works by default even if not set. */
+ bind_textdomain_codeset ("codeset", "ASCII");
+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
+
+ return 0;
}
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
+#include <support/test-driver.c>