summaryrefslogtreecommitdiff
path: root/ext/I18N-Langinfo
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-03-07 22:48:55 -0700
committerKarl Williamson <khw@cpan.org>2018-03-12 10:17:14 -0600
commit472be41b7bb1dab634c9b2b1655a206eea17f7d6 (patch)
tree7e43a322e9acb791facc32c9cc457683c28be1c6 /ext/I18N-Langinfo
parent13a5f6feb6a027d1f26e17b55ba95120cacaf024 (diff)
downloadperl-472be41b7bb1dab634c9b2b1655a206eea17f7d6.tar.gz
PATCH: [perl #127288] I18N::Langinfo sets UTF-8 bit
This commit will turn UTF-8 on in the returned SV if its string is legal UTF-8 containing something besides ASCII, and the locale is a UTF-8 one. It is based on the patch included in the ticket, but is generalized to handle edge cases.
Diffstat (limited to 'ext/I18N-Langinfo')
-rw-r--r--ext/I18N-Langinfo/Langinfo.xs74
-rw-r--r--ext/I18N-Langinfo/t/Langinfo.t80
2 files changed, 145 insertions, 9 deletions
diff --git a/ext/I18N-Langinfo/Langinfo.xs b/ext/I18N-Langinfo/Langinfo.xs
index 663cb2a665..904b424b19 100644
--- a/ext/I18N-Langinfo/Langinfo.xs
+++ b/ext/I18N-Langinfo/Langinfo.xs
@@ -1,4 +1,6 @@
#define PERL_NO_GET_CONTEXT
+#define PERL_EXT
+#define PERL_EXT_LANGINFO
#include "EXTERN.h"
#include "perl.h"
@@ -22,17 +24,77 @@ INCLUDE: const-xs.inc
SV*
langinfo(code)
int code
+ PREINIT:
+ const char * value;
+ STRLEN len;
PROTOTYPE: _
CODE:
#ifdef HAS_NL_LANGINFO
if (code < 0) {
SETERRNO(EINVAL, LIB_INVARG);
RETVAL = &PL_sv_undef;
- } else {
- RETVAL = newSVpv(Perl_langinfo(code), 0);
- }
-#else
- RETVAL = newSVpv(Perl_langinfo(code), 0);
+ } else
#endif
+ {
+ value = Perl_langinfo(code);
+ len = strlen(value);
+ RETVAL = newSVpvn(Perl_langinfo(code), len);
+
+ /* Now see if the UTF-8 flag should be turned on */
+#ifdef USE_LOCALE_CTYPE /* No utf8 strings if not using LC_CTYPE */
+
+ /* If 'value' is ASCII or not legal UTF-8, the flag doesn't get
+ * turned on, so skip the followin code */
+ if (is_utf8_non_invariant_string((U8 *) value, len)) {
+ int category;
+
+ /* Check if the locale is a UTF-8 one. The returns from
+ * Perl_langinfo() are in different locale categories, so check the
+ * category corresponding to this item */
+ switch (code) {
+
+ /* This should always return ASCII, so we could instead
+ * legitimately panic here, but soldier on */
+ case CODESET:
+ category = LC_CTYPE;
+ break;
+
+ case RADIXCHAR:
+ case THOUSEP:
+# ifdef USE_LOCALE_NUMERIC
+ category = LC_NUMERIC;
+# else
+ /* Not ideal, but the best we can do on such a platform */
+ category = LC_CTYPE;
+# endif
+ break;
+
+ case CRNCYSTR:
+# ifdef USE_LOCALE_MONETARY
+ category = LC_MONETARY;
+# else
+ category = LC_CTYPE;
+# endif
+ break;
+
+ default:
+# ifdef USE_LOCALE_TIME
+ category = LC_TIME;
+# else
+ category = LC_CTYPE;
+# endif
+ break;
+ }
+
+ /* Here the return is legal UTF-8. Turn on that flag if the
+ * locale is UTF-8. (Otherwise, could just be a coincidence.)
+ * */
+ if (_is_cur_LC_category_utf8(category)) {
+ SvUTF8_on(RETVAL);
+ }
+ }
+#endif /* USE_LOCALE_CTYPE */
+ }
+
OUTPUT:
- RETVAL
+ RETVAL
diff --git a/ext/I18N-Langinfo/t/Langinfo.t b/ext/I18N-Langinfo/t/Langinfo.t
index 10a660e6d6..a26abb5ac6 100644
--- a/ext/I18N-Langinfo/t/Langinfo.t
+++ b/ext/I18N-Langinfo/t/Langinfo.t
@@ -7,7 +7,12 @@ require "../../t/loc_tools.pl";
plan skip_all => "I18N::Langinfo or POSIX unavailable"
if $Config{'extensions'} !~ m!\bI18N/Langinfo\b!;
-my @constants = qw(ABDAY_1 DAY_1 ABMON_1 MON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT D_FMT T_FMT);
+my @times = qw( MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7
+ MON_8 MON_9 MON_10 MON_11 MON_12
+ DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7);
+my @constants = qw(ABDAY_1 DAY_1 ABMON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT
+ D_FMT T_FMT);
+push @constants, @times;
my %want =
(
@@ -21,9 +26,9 @@ my %want =
my @want = sort keys %want;
-plan tests => 1 + 3 * @constants + keys(@want) + 1;
+plan tests => 1 + 3 * @constants + keys(@want) + 1 + 2;
-use_ok('I18N::Langinfo', 'langinfo', @constants);
+use_ok('I18N::Langinfo', 'langinfo', @constants, 'CRNCYSTR');
use POSIX;
setlocale(LC_ALL, "C");
@@ -69,3 +74,72 @@ SKIP: {
is (langinfo(&RADIXCHAR), ",",
"Returns ',' for decimal pt for locale '$comma_locale'");
}
+
+SKIP: {
+
+ my $found_time = 0;
+ my $found_monetary = 0;
+ my @locales = find_locales( [ 'LC_TIME', 'LC_CTYPE', 'LC_MONETARY' ]);
+
+ while (defined (my $utf8_locale = find_utf8_ctype_locale(\@locales))) {
+ if (! $found_time) {
+ setlocale(&LC_TIME, $utf8_locale);
+ foreach my $time_item (@times) {
+ my $eval_string = "langinfo(&$time_item)";
+ my $time_name = eval $eval_string;
+ if ($@) {
+ fail("'$eval_string' failed: $@");
+ last SKIP;
+ }
+ if (! defined $time_name) {
+ fail("'$eval_string' returned undef");
+ last SKIP;
+ }
+ if ($time_name eq "") {
+ fail("'$eval_string' returned an empty name");
+ last SKIP;
+ }
+
+ if ($time_name =~ /\P{ASCII}/) {
+ ok(utf8::is_utf8($time_name), "The name for '$time_item' in $utf8_locale is a UTF8 string");
+ $found_time = 1;
+ last;
+ }
+ }
+ }
+
+ if (! $found_monetary) {
+ setlocale(&LC_MONETARY, $utf8_locale);
+ my $eval_string = "langinfo(&CRNCYSTR)";
+ my $symbol = eval $eval_string;
+ if ($@) {
+ fail("'$eval_string' failed: $@");
+ last SKIP;
+ }
+ if (! defined $symbol) {
+ fail("'$eval_string' returned undef");
+ last SKIP;
+ }
+ if ($symbol =~ /\P{ASCII}/) {
+ ok(utf8::is_utf8($symbol), "The name for 'CRNCYSTR' in $utf8_locale is a UTF8 string");
+ $found_monetary = 1;
+ }
+ }
+
+ last if $found_monetary && $found_time;
+
+ # Remove this locale from the list, and loop to find another utf8
+ # locale
+ @locales = grep { $_ ne $utf8_locale } @locales;
+ }
+
+ if ($found_time + $found_monetary < 2) {
+ my $message = "";
+ $message .= "time name" unless $found_time;
+ if (! $found_monetary) {
+ $message .= " nor" if $message;
+ "monetary name";
+ }
+ skip("Couldn't find a locale with a non-ascii $message", 2 - $found_time - $found_monetary);
+ }
+}