summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <bonzini@gnu.org>2011-12-18 15:33:53 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2012-01-07 15:22:09 +0100
commita7b66296691d754d26a3cdd829814e892e71a4d3 (patch)
treedc74a3e88b55190ba512e4fa6587307a034009e0
parentcacad9e8e34b4e8caac6bc8863a78734e08d9ee8 (diff)
downloadgnulib-a7b66296691d754d26a3cdd829814e892e71a4d3.tar.gz
quotearg: fall back to Unicode single quotes in UTF-8 and GB-18030 locales
Most programs do not have translation catalogs for English and much less separate catalogs for British and American English. Drop the suggestion to translators about these two, and provide it automatically for Unicode locales. Like most programs, even those using American English, we use single quotation marks. This conflicts with the American typographic convention, but works better when you cite the entire error message within double quotes. It also tries not to clash with established practice and with what non-gnulib programs will usually do. * lib/quotearg.c (gettext_quote): Hard-code U+2018 and U+2019 when using an UTF-8 or GB-18030 locale. The list of other locales with quotes was provided by Bruno Haible. (quotearg_buffer_restyled): Adjust instructions to translators. * lib/quotearg.h (locale_quoting_style): Do not put an example in the text, since this would be wrong when using Unicode. * modules/quotearg: Depend on c-strcaseeq.
-rw-r--r--lib/quotearg.c64
-rw-r--r--lib/quotearg.h6
-rw-r--r--modules/quotearg2
3 files changed, 56 insertions, 16 deletions
diff --git a/lib/quotearg.c b/lib/quotearg.c
index 60b399e5ab..e43b0c137e 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -29,6 +29,8 @@
#include "quotearg.h"
#include "xalloc.h"
+#include "c-strcaseeq.h"
+#include "localcharset.h"
#include <ctype.h>
#include <errno.h>
@@ -183,13 +185,45 @@ quoting_options_from_style (enum quoting_style style)
}
/* MSGID approximates a quotation mark. Return its translation if it
- has one; otherwise, return either it or "\"", depending on S. */
+ has one; otherwise, return either it or "\"", depending on S.
+
+ S is either clocale_quoting_style or locale_quoting_style. */
static char const *
gettext_quote (char const *msgid, enum quoting_style s)
{
char const *translation = _(msgid);
- if (translation == msgid && s == clocale_quoting_style)
- translation = "\"";
+ char const *locale_code;
+
+ if (translation != msgid)
+ return translation;
+
+ /* For UTF-8 and GB-18030, use single quotes U+2018 and U+2019.
+ Here is a list of other locales that include U+2018 and U+2019:
+
+ ISO-8859-7 0xA1 KOI8-T 0x91
+ CP869 0x8B CP874 0x91
+ CP932 0x81 0x65 CP936 0xA1 0xAE
+ CP949 0xA1 0xAE CP950 0xA1 0xA5
+ CP1250 0x91 CP1251 0x91
+ CP1252 0x91 CP1253 0x91
+ CP1254 0x91 CP1255 0x91
+ CP1256 0x91 CP1257 0x91
+ EUC-JP 0xA1 0xC6 EUC-KR 0xA1 0xAE
+ EUC-TW 0xA1 0xE4 BIG5 0xA1 0xA5
+ BIG5-HKSCS 0xA1 0xA5 EUC-CN 0xA1 0xAE
+ GBK 0xA1 0xAE Georgian-PS 0x91
+ PT154 0x91
+
+ None of these is still in wide use; using iconv is overkill. */
+ locale_code = locale_charset ();
+ if (STRCASEEQ (locale_code, "UTF-8", 'U','T','F','-','8',0,0,0,0))
+ return msgid[0] == '`' ? "\xe2\x80\x98": "\xe2\x80\x99";
+ if (STRCASEEQ (locale_code, "GB18030", 'G','B','1','8','0','3','0',0,0))
+ return msgid[0] == '`' ? "\xa1\ae": "\xa1\xaf";
+
+ if (s == clocale_quoting_style)
+ return "\"";
+
return translation;
}
@@ -258,19 +292,21 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
{
/* TRANSLATORS:
Get translations for open and closing quotation marks.
-
The message catalog should translate "`" to a left
quotation mark suitable for the locale, and similarly for
- "'". If the catalog has no translation,
- locale_quoting_style quotes `like this', and
- clocale_quoting_style quotes "like this".
-
- For example, an American English Unicode locale should
- translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
- should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
- MARK). A British English Unicode locale should instead
- translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
- and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
+ "'". For example, a French Unicode local should translate
+ these to U+00AB (LEFT-POINTING DOUBLE ANGLE
+ QUOTATION MARK), and U+00BB (RIGHT-POINTING DOUBLE ANGLE
+ QUOTATION MARK), respectively.
+
+ If the catalog has no translation, we will try to
+ use Unicode U+2018 (LEFT SINGLE QUOTATION MARK) and
+ Unicode U+2019 (RIGHT SINGLE QUOTATION MARK). If the
+ current locale is not Unicode, locale_quoting_style
+ will quote `like this', and clocale_quoting_style will
+ quote "like this". You should always include translations
+ for "`" and "'" even if U+2018 and U+2019 are appropriate
+ for your locale.
If you don't know what to put here, please see
<http://en.wikipedia.org/wiki/Quotation_marks_in_other_languages>
diff --git a/lib/quotearg.h b/lib/quotearg.h
index 24ec230509..e6b08ab230 100644
--- a/lib/quotearg.h
+++ b/lib/quotearg.h
@@ -112,8 +112,10 @@ enum quoting_style
*/
escape_quoting_style,
- /* Like clocale_quoting_style, but quote `like this' instead of
- "like this" in the default C locale (ls --quoting-style=locale).
+ /* Like clocale_quoting_style, but use single quotes in the
+ default C locale or if the program does not use gettext
+ (ls --quoting-style=locale). For UTF-8 locales, quote
+ characters will use Unicode.
LC_MESSAGES=C
quotearg_buffer:
diff --git a/modules/quotearg b/modules/quotearg
index b3f1ad17fe..176ff707f6 100644
--- a/modules/quotearg
+++ b/modules/quotearg
@@ -9,12 +9,14 @@ m4/mbrtowc.m4
m4/quotearg.m4
Depends-on:
+c-strcaseeq
extensions
gettext-h
mbrtowc
mbsinit
memcmp
quotearg-simple
+localcharset
stdbool
wchar
wctype-h