summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c42
1 files changed, 42 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index f563d373..87fa9e89 100644
--- a/utf8.c
+++ b/utf8.c
@@ -27,6 +27,7 @@
# include <langinfo.h>
#endif
#include <limits.h>
+#include <locale.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -288,3 +289,44 @@ mprintf(const char *fmt, ...)
va_end(ap);
return ret;
}
+
+/*
+ * Set up libc for multibyte output in the user's chosen locale.
+ *
+ * XXX: we are known to have problems with Turkish (i/I confusion) so we
+ * deliberately fall back to the C locale for now. Longer term we should
+ * always prefer to select C.[encoding] if possible, but there's no
+ * standardisation in locales between systems, so we'll need to survey
+ * what's out there first.
+ */
+void
+msetlocale(void)
+{
+ const char *vars[] = { "LC_ALL", "LC_CTYPE", "LANG", NULL };
+ char *cp;
+ int i;
+
+ /*
+ * We can't yet cope with dotless/dotted I in Turkish locales,
+ * so fall back to the C locale for these.
+ */
+ for (i = 0; vars[i] != NULL; i++) {
+ if ((cp = getenv(vars[i])) == NULL)
+ continue;
+ if (strncasecmp(cp, "TR", 2) != 0)
+ break;
+ /*
+ * If we're in a UTF-8 locale then prefer to use
+ * the C.UTF-8 locale (or equivalent) if it exists.
+ */
+ if ((strcasestr(cp, "UTF-8") != NULL ||
+ strcasestr(cp, "UTF8") != NULL) &&
+ (setlocale(LC_CTYPE, "C.UTF-8") != NULL ||
+ setlocale(LC_CTYPE, "POSIX.UTF-8") != NULL))
+ return;
+ setlocale(LC_CTYPE, "C");
+ return;
+ }
+ /* We can handle this locale */
+ setlocale(LC_CTYPE, "");
+}