diff options
author | Karl Williamson <khw@cpan.org> | 2018-03-07 22:48:55 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-03-12 10:17:14 -0600 |
commit | 472be41b7bb1dab634c9b2b1655a206eea17f7d6 (patch) | |
tree | 7e43a322e9acb791facc32c9cc457683c28be1c6 /ext/I18N-Langinfo | |
parent | 13a5f6feb6a027d1f26e17b55ba95120cacaf024 (diff) | |
download | perl-472be41b7bb1dab634c9b2b1655a206eea17f7d6.tar.gz |
PATCH: [perl #127288] I18N::Langinfo sets UTF-8 bit
This commit will turn UTF-8 on in the returned SV if its string is legal
UTF-8 containing something besides ASCII, and the locale is a UTF-8 one.
It is based on the patch included in the ticket, but is generalized to
handle edge cases.
Diffstat (limited to 'ext/I18N-Langinfo')
-rw-r--r-- | ext/I18N-Langinfo/Langinfo.xs | 74 | ||||
-rw-r--r-- | ext/I18N-Langinfo/t/Langinfo.t | 80 |
2 files changed, 145 insertions, 9 deletions
diff --git a/ext/I18N-Langinfo/Langinfo.xs b/ext/I18N-Langinfo/Langinfo.xs index 663cb2a665..904b424b19 100644 --- a/ext/I18N-Langinfo/Langinfo.xs +++ b/ext/I18N-Langinfo/Langinfo.xs @@ -1,4 +1,6 @@ #define PERL_NO_GET_CONTEXT +#define PERL_EXT +#define PERL_EXT_LANGINFO #include "EXTERN.h" #include "perl.h" @@ -22,17 +24,77 @@ INCLUDE: const-xs.inc SV* langinfo(code) int code + PREINIT: + const char * value; + STRLEN len; PROTOTYPE: _ CODE: #ifdef HAS_NL_LANGINFO if (code < 0) { SETERRNO(EINVAL, LIB_INVARG); RETVAL = &PL_sv_undef; - } else { - RETVAL = newSVpv(Perl_langinfo(code), 0); - } -#else - RETVAL = newSVpv(Perl_langinfo(code), 0); + } else #endif + { + value = Perl_langinfo(code); + len = strlen(value); + RETVAL = newSVpvn(Perl_langinfo(code), len); + + /* Now see if the UTF-8 flag should be turned on */ +#ifdef USE_LOCALE_CTYPE /* No utf8 strings if not using LC_CTYPE */ + + /* If 'value' is ASCII or not legal UTF-8, the flag doesn't get + * turned on, so skip the followin code */ + if (is_utf8_non_invariant_string((U8 *) value, len)) { + int category; + + /* Check if the locale is a UTF-8 one. The returns from + * Perl_langinfo() are in different locale categories, so check the + * category corresponding to this item */ + switch (code) { + + /* This should always return ASCII, so we could instead + * legitimately panic here, but soldier on */ + case CODESET: + category = LC_CTYPE; + break; + + case RADIXCHAR: + case THOUSEP: +# ifdef USE_LOCALE_NUMERIC + category = LC_NUMERIC; +# else + /* Not ideal, but the best we can do on such a platform */ + category = LC_CTYPE; +# endif + break; + + case CRNCYSTR: +# ifdef USE_LOCALE_MONETARY + category = LC_MONETARY; +# else + category = LC_CTYPE; +# endif + break; + + default: +# ifdef USE_LOCALE_TIME + category = LC_TIME; +# else + category = LC_CTYPE; +# endif + break; + } + + /* Here the return is legal UTF-8. Turn on that flag if the + * locale is UTF-8. (Otherwise, could just be a coincidence.) + * */ + if (_is_cur_LC_category_utf8(category)) { + SvUTF8_on(RETVAL); + } + } +#endif /* USE_LOCALE_CTYPE */ + } + OUTPUT: - RETVAL + RETVAL diff --git a/ext/I18N-Langinfo/t/Langinfo.t b/ext/I18N-Langinfo/t/Langinfo.t index 10a660e6d6..a26abb5ac6 100644 --- a/ext/I18N-Langinfo/t/Langinfo.t +++ b/ext/I18N-Langinfo/t/Langinfo.t @@ -7,7 +7,12 @@ require "../../t/loc_tools.pl"; plan skip_all => "I18N::Langinfo or POSIX unavailable" if $Config{'extensions'} !~ m!\bI18N/Langinfo\b!; -my @constants = qw(ABDAY_1 DAY_1 ABMON_1 MON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT D_FMT T_FMT); +my @times = qw( MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7 + MON_8 MON_9 MON_10 MON_11 MON_12 + DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7); +my @constants = qw(ABDAY_1 DAY_1 ABMON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT + D_FMT T_FMT); +push @constants, @times; my %want = ( @@ -21,9 +26,9 @@ my %want = my @want = sort keys %want; -plan tests => 1 + 3 * @constants + keys(@want) + 1; +plan tests => 1 + 3 * @constants + keys(@want) + 1 + 2; -use_ok('I18N::Langinfo', 'langinfo', @constants); +use_ok('I18N::Langinfo', 'langinfo', @constants, 'CRNCYSTR'); use POSIX; setlocale(LC_ALL, "C"); @@ -69,3 +74,72 @@ SKIP: { is (langinfo(&RADIXCHAR), ",", "Returns ',' for decimal pt for locale '$comma_locale'"); } + +SKIP: { + + my $found_time = 0; + my $found_monetary = 0; + my @locales = find_locales( [ 'LC_TIME', 'LC_CTYPE', 'LC_MONETARY' ]); + + while (defined (my $utf8_locale = find_utf8_ctype_locale(\@locales))) { + if (! $found_time) { + setlocale(&LC_TIME, $utf8_locale); + foreach my $time_item (@times) { + my $eval_string = "langinfo(&$time_item)"; + my $time_name = eval $eval_string; + if ($@) { + fail("'$eval_string' failed: $@"); + last SKIP; + } + if (! defined $time_name) { + fail("'$eval_string' returned undef"); + last SKIP; + } + if ($time_name eq "") { + fail("'$eval_string' returned an empty name"); + last SKIP; + } + + if ($time_name =~ /\P{ASCII}/) { + ok(utf8::is_utf8($time_name), "The name for '$time_item' in $utf8_locale is a UTF8 string"); + $found_time = 1; + last; + } + } + } + + if (! $found_monetary) { + setlocale(&LC_MONETARY, $utf8_locale); + my $eval_string = "langinfo(&CRNCYSTR)"; + my $symbol = eval $eval_string; + if ($@) { + fail("'$eval_string' failed: $@"); + last SKIP; + } + if (! defined $symbol) { + fail("'$eval_string' returned undef"); + last SKIP; + } + if ($symbol =~ /\P{ASCII}/) { + ok(utf8::is_utf8($symbol), "The name for 'CRNCYSTR' in $utf8_locale is a UTF8 string"); + $found_monetary = 1; + } + } + + last if $found_monetary && $found_time; + + # Remove this locale from the list, and loop to find another utf8 + # locale + @locales = grep { $_ ne $utf8_locale } @locales; + } + + if ($found_time + $found_monetary < 2) { + my $message = ""; + $message .= "time name" unless $found_time; + if (! $found_monetary) { + $message .= " nor" if $message; + "monetary name"; + } + skip("Couldn't find a locale with a non-ascii $message", 2 - $found_time - $found_monetary); + } +} |