diff options
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 42 |
1 files changed, 42 insertions, 0 deletions
@@ -27,6 +27,7 @@ # include <langinfo.h> #endif #include <limits.h> +#include <locale.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -288,3 +289,44 @@ mprintf(const char *fmt, ...) va_end(ap); return ret; } + +/* + * Set up libc for multibyte output in the user's chosen locale. + * + * XXX: we are known to have problems with Turkish (i/I confusion) so we + * deliberately fall back to the C locale for now. Longer term we should + * always prefer to select C.[encoding] if possible, but there's no + * standardisation in locales between systems, so we'll need to survey + * what's out there first. + */ +void +msetlocale(void) +{ + const char *vars[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL }; + char *cp; + int i; + + /* + * We can't yet cope with dotless/dotted I in Turkish locales, + * so fall back to the C locale for these. + */ + for (i = 0; vars[i] != NULL; i++) { + if ((cp = getenv(vars[i])) == NULL) + continue; + if (strncasecmp(cp, "TR", 2) != 0) + break; + /* + * If we're in a UTF-8 locale then prefer to use + * the C.UTF-8 locale (or equivalent) if it exists. + */ + if ((strcasestr(cp, "UTF-8") != NULL || + strcasestr(cp, "UTF8") != NULL) && + (setlocale(LC_CTYPE, "C.UTF-8") != NULL || + setlocale(LC_CTYPE, "POSIX.UTF-8") != NULL)) + return; + setlocale(LC_CTYPE, "C"); + return; + } + /* We can handle this locale */ + setlocale(LC_CTYPE, ""); +} |