diff options
author | Karl Williamson <khw@cpan.org> | 2018-03-07 22:48:55 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-03-12 10:17:14 -0600 |
commit | 472be41b7bb1dab634c9b2b1655a206eea17f7d6 (patch) | |
tree | 7e43a322e9acb791facc32c9cc457683c28be1c6 /ext/I18N-Langinfo/Langinfo.xs | |
parent | 13a5f6feb6a027d1f26e17b55ba95120cacaf024 (diff) | |
download | perl-472be41b7bb1dab634c9b2b1655a206eea17f7d6.tar.gz |
PATCH: [perl #127288] I18N::Langinfo sets UTF-8 bit
This commit will turn UTF-8 on in the returned SV if its string is legal
UTF-8 containing something besides ASCII, and the locale is a UTF-8 one.
It is based on the patch included in the ticket, but is generalized to
handle edge cases.
Diffstat (limited to 'ext/I18N-Langinfo/Langinfo.xs')
-rw-r--r-- | ext/I18N-Langinfo/Langinfo.xs | 74 |
1 files changed, 68 insertions, 6 deletions
diff --git a/ext/I18N-Langinfo/Langinfo.xs b/ext/I18N-Langinfo/Langinfo.xs index 663cb2a665..904b424b19 100644 --- a/ext/I18N-Langinfo/Langinfo.xs +++ b/ext/I18N-Langinfo/Langinfo.xs @@ -1,4 +1,6 @@ #define PERL_NO_GET_CONTEXT +#define PERL_EXT +#define PERL_EXT_LANGINFO #include "EXTERN.h" #include "perl.h" @@ -22,17 +24,77 @@ INCLUDE: const-xs.inc SV* langinfo(code) int code + PREINIT: + const char * value; + STRLEN len; PROTOTYPE: _ CODE: #ifdef HAS_NL_LANGINFO if (code < 0) { SETERRNO(EINVAL, LIB_INVARG); RETVAL = &PL_sv_undef; - } else { - RETVAL = newSVpv(Perl_langinfo(code), 0); - } -#else - RETVAL = newSVpv(Perl_langinfo(code), 0); + } else #endif + { + value = Perl_langinfo(code); + len = strlen(value); + RETVAL = newSVpvn(Perl_langinfo(code), len); + + /* Now see if the UTF-8 flag should be turned on */ +#ifdef USE_LOCALE_CTYPE /* No utf8 strings if not using LC_CTYPE */ + + /* If 'value' is ASCII or not legal UTF-8, the flag doesn't get + * turned on, so skip the followin code */ + if (is_utf8_non_invariant_string((U8 *) value, len)) { + int category; + + /* Check if the locale is a UTF-8 one. The returns from + * Perl_langinfo() are in different locale categories, so check the + * category corresponding to this item */ + switch (code) { + + /* This should always return ASCII, so we could instead + * legitimately panic here, but soldier on */ + case CODESET: + category = LC_CTYPE; + break; + + case RADIXCHAR: + case THOUSEP: +# ifdef USE_LOCALE_NUMERIC + category = LC_NUMERIC; +# else + /* Not ideal, but the best we can do on such a platform */ + category = LC_CTYPE; +# endif + break; + + case CRNCYSTR: +# ifdef USE_LOCALE_MONETARY + category = LC_MONETARY; +# else + category = LC_CTYPE; +# endif + break; + + default: +# ifdef USE_LOCALE_TIME + category = LC_TIME; +# else + category = LC_CTYPE; +# endif + break; + } + + /* Here the return is legal UTF-8. Turn on that flag if the + * locale is UTF-8. (Otherwise, could just be a coincidence.) + * */ + if (_is_cur_LC_category_utf8(category)) { + SvUTF8_on(RETVAL); + } + } +#endif /* USE_LOCALE_CTYPE */ + } + OUTPUT: - RETVAL + RETVAL |