diff options
Diffstat (limited to 'locale.c')
-rw-r--r-- | locale.c | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/locale.c b/locale.c new file mode 100644 index 0000000000..aefcc344a5 --- /dev/null +++ b/locale.c @@ -0,0 +1,549 @@ +/* locale.c + * + * Copyright (c) 2001, Larry Wall + * + * You may distribute under the terms of either the GNU General Public + * License or the Artistic License, as specified in the README file. + * + */ + +/* + * A Elbereth Gilthoniel, + * silivren penna míriel + * o menel aglar elenath! + * Na-chaered palan-díriel + * o galadhremmin ennorath, + * Fanuilos, le linnathon + * nef aear, si nef aearon! + */ + +#include "EXTERN.h" +#define PERL_IN_LOCALE_C +#include "perl.h" + +#ifdef I_LOCALE +# include <locale.h> +#endif + +/* + * Standardize the locale name from a string returned by 'setlocale'. + * + * The standard return value of setlocale() is either + * (1) "xx_YY" if the first argument of setlocale() is not LC_ALL + * (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL + * (the space-separated values represent the various sublocales, + * in some unspecificed order) + * + * In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", + * which is harmful for further use of the string in setlocale(). + * + */ +STATIC char * +S_stdize_locale(pTHX_ char *locs) +{ + char *s; + bool okay = TRUE; + + if ((s = strchr(locs, '='))) { + char *t; + + okay = FALSE; + if ((t = strchr(s, '.'))) { + char *u; + + if ((u = strchr(t, '\n'))) { + + if (u[1] == 0) { + STRLEN len = u - s; + Move(s + 1, locs, len, char); + locs[len] = 0; + okay = TRUE; + } + } + } + } + + if (!okay) + Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs); + + return locs; +} + +void +Perl_set_numeric_radix(pTHX) +{ +#ifdef USE_LOCALE_NUMERIC +# ifdef HAS_LOCALECONV + struct lconv* lc; + + lc = localeconv(); + if (lc && lc->decimal_point) { + if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) { + SvREFCNT_dec(PL_numeric_radix_sv); + PL_numeric_radix_sv = Nullsv; + } + else { + if (PL_numeric_radix_sv) + sv_setpv(PL_numeric_radix_sv, lc->decimal_point); + else + PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); + } + } + else + PL_numeric_radix_sv = Nullsv; +# endif /* HAS_LOCALECONV */ +#endif /* USE_LOCALE_NUMERIC */ +} + +/* + * Set up for a new numeric locale. + */ +void +Perl_new_numeric(pTHX_ char *newnum) +{ +#ifdef USE_LOCALE_NUMERIC + + if (! newnum) { + if (PL_numeric_name) { + Safefree(PL_numeric_name); + PL_numeric_name = NULL; + } + PL_numeric_standard = TRUE; + PL_numeric_local = TRUE; + return; + } + + if (! PL_numeric_name || strNE(PL_numeric_name, newnum)) { + Safefree(PL_numeric_name); + PL_numeric_name = stdize_locale(savepv(newnum)); + PL_numeric_standard = (strEQ(newnum, "C") || strEQ(newnum, "POSIX")); + PL_numeric_local = TRUE; + set_numeric_radix(); + } + +#endif /* USE_LOCALE_NUMERIC */ +} + +void +Perl_set_numeric_standard(pTHX) +{ +#ifdef USE_LOCALE_NUMERIC + + if (! PL_numeric_standard) { + setlocale(LC_NUMERIC, "C"); + PL_numeric_standard = TRUE; + PL_numeric_local = FALSE; + set_numeric_radix(); + } + +#endif /* USE_LOCALE_NUMERIC */ +} + +void +Perl_set_numeric_local(pTHX) +{ +#ifdef USE_LOCALE_NUMERIC + + if (! PL_numeric_local) { + setlocale(LC_NUMERIC, PL_numeric_name); + PL_numeric_standard = FALSE; + PL_numeric_local = TRUE; + set_numeric_radix(); + } + +#endif /* USE_LOCALE_NUMERIC */ +} + +/* + * Set up for a new ctype locale. + */ +void +Perl_new_ctype(pTHX_ char *newctype) +{ +#ifdef USE_LOCALE_CTYPE + + int i; + + for (i = 0; i < 256; i++) { + if (isUPPER_LC(i)) + PL_fold_locale[i] = toLOWER_LC(i); + else if (isLOWER_LC(i)) + PL_fold_locale[i] = toUPPER_LC(i); + else + PL_fold_locale[i] = i; + } + +#endif /* USE_LOCALE_CTYPE */ +} + +/* + * Set up for a new collation locale. + */ +void +Perl_new_collate(pTHX_ char *newcoll) +{ +#ifdef USE_LOCALE_COLLATE + + if (! newcoll) { + if (PL_collation_name) { + ++PL_collation_ix; + Safefree(PL_collation_name); + PL_collation_name = NULL; + } + PL_collation_standard = TRUE; + PL_collxfrm_base = 0; + PL_collxfrm_mult = 2; + return; + } + + if (! PL_collation_name || strNE(PL_collation_name, newcoll)) { + ++PL_collation_ix; + Safefree(PL_collation_name); + PL_collation_name = stdize_locale(savepv(newcoll)); + PL_collation_standard = (strEQ(newcoll, "C") || strEQ(newcoll, "POSIX")); + + { + /* 2: at most so many chars ('a', 'b'). */ + /* 50: surely no system expands a char more. */ +#define XFRMBUFSIZE (2 * 50) + char xbuf[XFRMBUFSIZE]; + Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); + Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); + SSize_t mult = fb - fa; + if (mult < 1) + Perl_croak(aTHX_ "strxfrm() gets absurd"); + PL_collxfrm_base = (fa > mult) ? (fa - mult) : 0; + PL_collxfrm_mult = mult; + } + } + +#endif /* USE_LOCALE_COLLATE */ +} + +/* + * Initialize locale awareness. + */ +int +Perl_init_i18nl10n(pTHX_ int printwarn) +{ + int ok = 1; + /* returns + * 1 = set ok or not applicable, + * 0 = fallback to C locale, + * -1 = fallback to C locale failed + */ + +#if defined(USE_LOCALE) + +#ifdef USE_LOCALE_CTYPE + char *curctype = NULL; +#endif /* USE_LOCALE_CTYPE */ +#ifdef USE_LOCALE_COLLATE + char *curcoll = NULL; +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE_NUMERIC + char *curnum = NULL; +#endif /* USE_LOCALE_NUMERIC */ +#ifdef __GLIBC__ + char *language = PerlEnv_getenv("LANGUAGE"); +#endif + char *lc_all = PerlEnv_getenv("LC_ALL"); + char *lang = PerlEnv_getenv("LANG"); + bool setlocale_failure = FALSE; + +#ifdef LOCALE_ENVIRON_REQUIRED + + /* + * Ultrix setlocale(..., "") fails if there are no environment + * variables from which to get a locale name. + */ + + bool done = FALSE; + +#ifdef LC_ALL + if (lang) { + if (setlocale(LC_ALL, "")) + done = TRUE; + else + setlocale_failure = TRUE; + } + if (!setlocale_failure) { +#ifdef USE_LOCALE_CTYPE + if (! (curctype = + setlocale(LC_CTYPE, + (!done && (lang || PerlEnv_getenv("LC_CTYPE"))) + ? "" : Nullch))) + setlocale_failure = TRUE; + else + curctype = savepv(curctype); +#endif /* USE_LOCALE_CTYPE */ +#ifdef USE_LOCALE_COLLATE + if (! (curcoll = + setlocale(LC_COLLATE, + (!done && (lang || PerlEnv_getenv("LC_COLLATE"))) + ? "" : Nullch))) + setlocale_failure = TRUE; + else + curcoll = savepv(curcoll); +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE_NUMERIC + if (! (curnum = + setlocale(LC_NUMERIC, + (!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) + ? "" : Nullch))) + setlocale_failure = TRUE; + else + curnum = savepv(curnum); +#endif /* USE_LOCALE_NUMERIC */ + } + +#endif /* LC_ALL */ + +#endif /* !LOCALE_ENVIRON_REQUIRED */ + +#ifdef LC_ALL + if (! setlocale(LC_ALL, "")) + setlocale_failure = TRUE; +#endif /* LC_ALL */ + + if (!setlocale_failure) { +#ifdef USE_LOCALE_CTYPE + if (! (curctype = setlocale(LC_CTYPE, ""))) + setlocale_failure = TRUE; + else + curctype = savepv(curctype); +#endif /* USE_LOCALE_CTYPE */ +#ifdef USE_LOCALE_COLLATE + if (! (curcoll = setlocale(LC_COLLATE, ""))) + setlocale_failure = TRUE; + else + curcoll = savepv(curcoll); +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE_NUMERIC + if (! (curnum = setlocale(LC_NUMERIC, ""))) + setlocale_failure = TRUE; + else + curnum = savepv(curnum); +#endif /* USE_LOCALE_NUMERIC */ + } + + if (setlocale_failure) { + char *p; + bool locwarn = (printwarn > 1 || + (printwarn && + (!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); + + if (locwarn) { +#ifdef LC_ALL + + PerlIO_printf(Perl_error_log, + "perl: warning: Setting locale failed.\n"); + +#else /* !LC_ALL */ + + PerlIO_printf(Perl_error_log, + "perl: warning: Setting locale failed for the categories:\n\t"); +#ifdef USE_LOCALE_CTYPE + if (! curctype) + PerlIO_printf(Perl_error_log, "LC_CTYPE "); +#endif /* USE_LOCALE_CTYPE */ +#ifdef USE_LOCALE_COLLATE + if (! curcoll) + PerlIO_printf(Perl_error_log, "LC_COLLATE "); +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE_NUMERIC + if (! curnum) + PerlIO_printf(Perl_error_log, "LC_NUMERIC "); +#endif /* USE_LOCALE_NUMERIC */ + PerlIO_printf(Perl_error_log, "\n"); + +#endif /* LC_ALL */ + + PerlIO_printf(Perl_error_log, + "perl: warning: Please check that your locale settings:\n"); + +#ifdef __GLIBC__ + PerlIO_printf(Perl_error_log, + "\tLANGUAGE = %c%s%c,\n", + language ? '"' : '(', + language ? language : "unset", + language ? '"' : ')'); +#endif + + PerlIO_printf(Perl_error_log, + "\tLC_ALL = %c%s%c,\n", + lc_all ? '"' : '(', + lc_all ? lc_all : "unset", + lc_all ? '"' : ')'); + +#if defined(USE_ENVIRON_ARRAY) + { + char **e; + for (e = environ; *e; e++) { + if (strnEQ(*e, "LC_", 3) + && strnNE(*e, "LC_ALL=", 7) + && (p = strchr(*e, '='))) + PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n", + (int)(p - *e), *e, p + 1); + } + } +#else + PerlIO_printf(Perl_error_log, + "\t(possibly more locale environment variables)\n"); +#endif + + PerlIO_printf(Perl_error_log, + "\tLANG = %c%s%c\n", + lang ? '"' : '(', + lang ? lang : "unset", + lang ? '"' : ')'); + + PerlIO_printf(Perl_error_log, + " are supported and installed on your system.\n"); + } + +#ifdef LC_ALL + + if (setlocale(LC_ALL, "C")) { + if (locwarn) + PerlIO_printf(Perl_error_log, + "perl: warning: Falling back to the standard locale (\"C\").\n"); + ok = 0; + } + else { + if (locwarn) + PerlIO_printf(Perl_error_log, + "perl: warning: Failed to fall back to the standard locale (\"C\").\n"); + ok = -1; + } + +#else /* ! LC_ALL */ + + if (0 +#ifdef USE_LOCALE_CTYPE + || !(curctype || setlocale(LC_CTYPE, "C")) +#endif /* USE_LOCALE_CTYPE */ +#ifdef USE_LOCALE_COLLATE + || !(curcoll || setlocale(LC_COLLATE, "C")) +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE_NUMERIC + || !(curnum || setlocale(LC_NUMERIC, "C")) +#endif /* USE_LOCALE_NUMERIC */ + ) + { + if (locwarn) + PerlIO_printf(Perl_error_log, + "perl: warning: Cannot fall back to the standard locale (\"C\").\n"); + ok = -1; + } + +#endif /* ! LC_ALL */ + +#ifdef USE_LOCALE_CTYPE + curctype = savepv(setlocale(LC_CTYPE, Nullch)); +#endif /* USE_LOCALE_CTYPE */ +#ifdef USE_LOCALE_COLLATE + curcoll = savepv(setlocale(LC_COLLATE, Nullch)); +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE_NUMERIC + curnum = savepv(setlocale(LC_NUMERIC, Nullch)); +#endif /* USE_LOCALE_NUMERIC */ + } + else { + +#ifdef USE_LOCALE_CTYPE + new_ctype(curctype); +#endif /* USE_LOCALE_CTYPE */ + +#ifdef USE_LOCALE_COLLATE + new_collate(curcoll); +#endif /* USE_LOCALE_COLLATE */ + +#ifdef USE_LOCALE_NUMERIC + new_numeric(curnum); +#endif /* USE_LOCALE_NUMERIC */ + } + +#endif /* USE_LOCALE */ + +#ifdef USE_LOCALE_CTYPE + if (curctype != NULL) + Safefree(curctype); +#endif /* USE_LOCALE_CTYPE */ +#ifdef USE_LOCALE_COLLATE + if (curcoll != NULL) + Safefree(curcoll); +#endif /* USE_LOCALE_COLLATE */ +#ifdef USE_LOCALE_NUMERIC + if (curnum != NULL) + Safefree(curnum); +#endif /* USE_LOCALE_NUMERIC */ + return ok; +} + +/* Backwards compatibility. */ +int +Perl_init_i18nl14n(pTHX_ int printwarn) +{ + return init_i18nl10n(printwarn); +} + +#ifdef USE_LOCALE_COLLATE + +/* + * mem_collxfrm() is a bit like strxfrm() but with two important + * differences. First, it handles embedded NULs. Second, it allocates + * a bit more memory than needed for the transformed data itself. + * The real transformed data begins at offset sizeof(collationix). + * Please see sv_collxfrm() to see how this is used. + */ +char * +Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) +{ + char *xbuf; + STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */ + + /* the first sizeof(collationix) bytes are used by sv_collxfrm(). */ + /* the +1 is for the terminating NUL. */ + + xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1; + New(171, xbuf, xAlloc, char); + if (! xbuf) + goto bad; + + *(U32*)xbuf = PL_collation_ix; + xout = sizeof(PL_collation_ix); + for (xin = 0; xin < len; ) { + SSize_t xused; + + for (;;) { + xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); + if (xused == -1) + goto bad; + if (xused < xAlloc - xout) + break; + xAlloc = (2 * xAlloc) + 1; + Renew(xbuf, xAlloc, char); + if (! xbuf) + goto bad; + } + + xin += strlen(s + xin) + 1; + xout += xused; + + /* Embedded NULs are understood but silently skipped + * because they make no sense in locale collation. */ + } + + xbuf[xout] = '\0'; + *xlen = xout - sizeof(PL_collation_ix); + return xbuf; + + bad: + Safefree(xbuf); + *xlen = 0; + return NULL; +} + +#endif /* USE_LOCALE_COLLATE */ + |