summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorKO Myung-Hun <komh@chollian.net>2015-01-07 14:36:52 +0900
committerDaiki Ueno <ueno@gnu.org>2015-01-07 14:39:52 +0900
commit9bb70e04d414eed9fd805788083dc24d01dcac62 (patch)
treeb98c446a0ab87abcdd82ab6da5929fb7ca68e795 /lib
parent987cb086f36e1f39644589c19b88d306b9476c82 (diff)
downloadgnulib-9bb70e04d414eed9fd805788083dc24d01dcac62.tar.gz
localcharset: improve charset detection on OS/2
Use system codepage when appropriate. Map OS/2 codepages to GNU canonical charset names if possible. * lib/config.charset: Don't output aliases if "$os" is os2*. * lib/localcharset.c (get_charset_aliases) [OS2]: Hardcode the result for OS/2. (locale_charset) [OS2]: Use system codepage if codeset is omitted from the locale name which is neither "C" nor "POSIX".
Diffstat (limited to 'lib')
-rw-r--r--lib/config.charset4
-rw-r--r--lib/localcharset.c42
2 files changed, 39 insertions, 7 deletions
diff --git a/lib/config.charset b/lib/config.charset
index 1b8a3f71d8..ac10e4bef4 100644
--- a/lib/config.charset
+++ b/lib/config.charset
@@ -348,12 +348,10 @@ case "$os" in
#echo "sun_eu_greek ?" # what is this?
echo "UTF-8 UTF-8"
;;
- freebsd* | os2*)
+ freebsd*)
# FreeBSD 4.2 doesn't have nl_langinfo(CODESET); therefore
# localcharset.c falls back to using the full locale name
# from the environment variables.
- # Likewise for OS/2. OS/2 has XFree86 just like FreeBSD. Just
- # reuse FreeBSD's locale data for OS/2.
echo "C ASCII"
echo "US-ASCII ASCII"
for l in la_LN lt_LN; do
diff --git a/lib/localcharset.c b/lib/localcharset.c
index d54dbfb0a1..b4af28cd95 100644
--- a/lib/localcharset.c
+++ b/lib/localcharset.c
@@ -128,7 +128,7 @@ get_charset_aliases (void)
cp = charset_aliases;
if (cp == NULL)
{
-#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__)
+#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2)
const char *dir;
const char *base = "charset.alias";
char *file_name;
@@ -342,6 +342,36 @@ get_charset_aliases (void)
"CP54936" "\0" "GB18030" "\0"
"CP65001" "\0" "UTF-8" "\0";
# endif
+# if defined OS2
+ /* To avoid the troubles of installing a separate file in the same
+ directory as the DLL and of retrieving the DLL's directory at
+ runtime, simply inline the aliases here. */
+
+ /* The list of encodings is taken from "List of OS/2 Codepages"
+ by Alex Taylor:
+ <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
+ See also "IBM Globalization - Code page identifiers":
+ <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */
+ cp = "CP813" "\0" "ISO-8859-7" "\0"
+ "CP878" "\0" "KOI8-R" "\0"
+ "CP819" "\0" "ISO-8859-1" "\0"
+ "CP912" "\0" "ISO-8859-2" "\0"
+ "CP913" "\0" "ISO-8859-3" "\0"
+ "CP914" "\0" "ISO-8859-4" "\0"
+ "CP915" "\0" "ISO-8859-5" "\0"
+ "CP916" "\0" "ISO-8859-8" "\0"
+ "CP920" "\0" "ISO-8859-9" "\0"
+ "CP921" "\0" "ISO-8859-13" "\0"
+ "CP923" "\0" "ISO-8859-15" "\0"
+ "CP954" "\0" "EUC-JP" "\0"
+ "CP964" "\0" "EUC-TW" "\0"
+ "CP970" "\0" "EUC-KR" "\0"
+ "CP1089" "\0" "ISO-8859-6" "\0"
+ "CP1208" "\0" "UTF-8" "\0"
+ "CP1381" "\0" "GB2312" "\0"
+ "CP1386" "\0" "GBK" "\0"
+ "CP3372" "\0" "EUC-JP" "\0";
+# endif
#endif
charset_aliases = cp;
@@ -499,6 +529,8 @@ locale_charset (void)
ULONG cp[3];
ULONG cplen;
+ codeset = NULL;
+
/* Allow user to override the codeset, as set in the operating system,
with standard language environment variables. */
locale = getenv ("LC_ALL");
@@ -530,10 +562,12 @@ locale_charset (void)
}
}
- /* Resolve through the charset.alias file. */
- codeset = locale;
+ /* For the POSIX locale, don't use the system's codepage. */
+ if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
+ codeset = "";
}
- else
+
+ if (codeset == NULL)
{
/* OS/2 has a function returning the locale's codepage as a number. */
if (DosQueryCp (sizeof (cp), cp, &cplen))