PATCH: [perl #127288] I18N::Langinfo sets UTF-8 bit

This commit will turn UTF-8 on in the returned SV if its string is legal UTF-8 containing something besides ASCII, and the locale is a UTF-8 one. It is based on the patch included in the ticket, but is generalized to handle edge cases.
author: Karl Williamson <khw@cpan.org> 2018-03-07 22:48:55 -0700
committer: Karl Williamson <khw@cpan.org> 2018-03-12 10:17:14 -0600
commit: 472be41b7bb1dab634c9b2b1655a206eea17f7d6 (patch)
tree: 7e43a322e9acb791facc32c9cc457683c28be1c6 /ext/I18N-Langinfo
parent: 13a5f6feb6a027d1f26e17b55ba95120cacaf024 (diff)
download: perl-472be41b7bb1dab634c9b2b1655a206eea17f7d6.tar.gz
2 files changed, 145 insertions, 9 deletions
diff --git a/ext/I18N-Langinfo/Langinfo.xs b/ext/I18N-Langinfo/Langinfo.xs
index 663cb2a665..904b424b19 100644
--- a/ext/I18N-Langinfo/Langinfo.xs
+++ b/ext/I18N-Langinfo/Langinfo.xs
@@ -1,4 +1,6 @@
 #define PERL_NO_GET_CONTEXT
+#define PERL_EXT
+#define PERL_EXT_LANGINFO
 
 #include "EXTERN.h"
 #include "perl.h"
@@ -22,17 +24,77 @@ INCLUDE: const-xs.inc
 SV*
 langinfo(code)
 	int	code
+  PREINIT:
+        const   char * value;
+        STRLEN  len;
   PROTOTYPE: _
   CODE:
 #ifdef HAS_NL_LANGINFO
 	if (code < 0) {
 	    SETERRNO(EINVAL, LIB_INVARG);
 	    RETVAL = &PL_sv_undef;
-	} else {
-            RETVAL = newSVpv(Perl_langinfo(code), 0);
-        }
-#else
-        RETVAL = newSVpv(Perl_langinfo(code), 0);
+	} else
 #endif
+        {
+            value = Perl_langinfo(code);
+            len = strlen(value);
+            RETVAL = newSVpvn(Perl_langinfo(code), len);
+
+            /* Now see if the UTF-8 flag should be turned on */
+#ifdef USE_LOCALE_CTYPE     /* No utf8 strings if not using LC_CTYPE */
+
+            /* If 'value' is ASCII or not legal UTF-8, the flag doesn't get
+             * turned on, so skip the followin code */
+            if (is_utf8_non_invariant_string((U8 *) value, len)) {
+                int category;
+
+                /* Check if the locale is a UTF-8 one.  The returns from
+                 * Perl_langinfo() are in different locale categories, so check the
+                 * category corresponding to this item */
+                switch (code) {
+
+                    /* This should always return ASCII, so we could instead
+                     * legitimately panic here, but soldier on */
+                    case CODESET:
+                        category = LC_CTYPE;
+                        break;
+
+                    case RADIXCHAR:
+                    case THOUSEP:
+#  ifdef USE_LOCALE_NUMERIC
+                        category = LC_NUMERIC;
+#  else
+                        /* Not ideal, but the best we can do on such a platform */
+                        category = LC_CTYPE;
+#  endif
+                        break;
+
+                    case CRNCYSTR:
+#  ifdef USE_LOCALE_MONETARY
+                        category = LC_MONETARY;
+#  else
+                        category = LC_CTYPE;
+#  endif
+                        break;
+
+                    default:
+#  ifdef USE_LOCALE_TIME
+                        category = LC_TIME;
+#  else
+                        category = LC_CTYPE;
+#  endif
+                        break;
+                }
+
+                /* Here the return is legal UTF-8.  Turn on that flag if the
+                 * locale is UTF-8.  (Otherwise, could just be a coincidence.)
+                 * */
+                if (_is_cur_LC_category_utf8(category)) {
+                    SvUTF8_on(RETVAL);
+                }
+            }
+#endif /* USE_LOCALE_CTYPE */
+        }
+
   OUTPUT:
-	RETVAL
+        RETVAL
diff --git a/ext/I18N-Langinfo/t/Langinfo.t b/ext/I18N-Langinfo/t/Langinfo.t
index 10a660e6d6..a26abb5ac6 100644
--- a/ext/I18N-Langinfo/t/Langinfo.t
+++ b/ext/I18N-Langinfo/t/Langinfo.t
@@ -7,7 +7,12 @@ require "../../t/loc_tools.pl";
 plan skip_all => "I18N::Langinfo or POSIX unavailable" 
     if $Config{'extensions'} !~ m!\bI18N/Langinfo\b!;
 
-my @constants = qw(ABDAY_1 DAY_1 ABMON_1 MON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT D_FMT T_FMT);
+my @times  = qw( MON_1 MON_2 MON_3 MON_4 MON_5 MON_6 MON_7
+                 MON_8 MON_9 MON_10 MON_11 MON_12
+                 DAY_1 DAY_2 DAY_3 DAY_4 DAY_5 DAY_6 DAY_7);
+my @constants = qw(ABDAY_1 DAY_1 ABMON_1 RADIXCHAR AM_STR THOUSEP D_T_FMT
+                   D_FMT T_FMT);
+push @constants, @times;
 
 my %want =
     (
@@ -21,9 +26,9 @@ my %want =
 
 my @want = sort keys %want;
 
-plan tests => 1 + 3 * @constants + keys(@want) + 1;
+plan tests => 1 + 3 * @constants + keys(@want) + 1 + 2;
 
-use_ok('I18N::Langinfo', 'langinfo', @constants);
+use_ok('I18N::Langinfo', 'langinfo', @constants, 'CRNCYSTR');
 
 use POSIX;
 setlocale(LC_ALL, "C");
@@ -69,3 +74,72 @@ SKIP: {
     is (langinfo(&RADIXCHAR), ",",
         "Returns ',' for decimal pt for locale '$comma_locale'");
 }
+
+SKIP: {
+
+    my $found_time = 0;
+    my $found_monetary = 0;
+    my @locales = find_locales( [ 'LC_TIME', 'LC_CTYPE', 'LC_MONETARY' ]);
+
+    while (defined (my $utf8_locale = find_utf8_ctype_locale(\@locales))) {
+        if (! $found_time) {
+            setlocale(&LC_TIME, $utf8_locale);
+            foreach my $time_item (@times) {
+                my $eval_string = "langinfo(&$time_item)";
+                my $time_name = eval $eval_string;
+                if ($@) {
+                    fail("'$eval_string' failed: $@");
+                    last SKIP;
+                }
+                if (! defined $time_name) {
+                    fail("'$eval_string' returned undef");
+                    last SKIP;
+                }
+                if ($time_name eq "") {
+                    fail("'$eval_string' returned an empty name");
+                    last SKIP;
+                }
+
+                if ($time_name =~ /\P{ASCII}/) {
+                    ok(utf8::is_utf8($time_name), "The name for '$time_item' in $utf8_locale is a UTF8 string");
+                    $found_time = 1;
+                    last;
+                }
+            }
+        }
+
+        if (! $found_monetary) {
+            setlocale(&LC_MONETARY, $utf8_locale);
+            my $eval_string = "langinfo(&CRNCYSTR)";
+            my $symbol = eval $eval_string;
+            if ($@) {
+                fail("'$eval_string' failed: $@");
+                last SKIP;
+            }
+            if (! defined $symbol) {
+                fail("'$eval_string' returned undef");
+                last SKIP;
+            }
+            if ($symbol =~ /\P{ASCII}/) {
+                ok(utf8::is_utf8($symbol), "The name for 'CRNCYSTR' in $utf8_locale is a UTF8 string");
+                $found_monetary = 1;
+            }
+        }
+
+        last if $found_monetary && $found_time;
+
+        # Remove this locale from the list, and loop to find another utf8
+        # locale
+        @locales = grep { $_ ne $utf8_locale } @locales;
+    }
+
+    if ($found_time + $found_monetary < 2) {
+        my $message = "";
+        $message .= "time name" unless $found_time;
+        if (! $found_monetary) {
+            $message .= " nor" if $message;
+            "monetary name";
+        }
+        skip("Couldn't find a locale with a non-ascii $message", 2 - $found_time - $found_monetary);
+    }
+}
author	Karl Williamson <khw@cpan.org>	2018-03-07 22:48:55 -0700
committer	Karl Williamson <khw@cpan.org>	2018-03-12 10:17:14 -0600
commit	472be41b7bb1dab634c9b2b1655a206eea17f7d6 (patch)
tree	7e43a322e9acb791facc32c9cc457683c28be1c6 /ext/I18N-Langinfo
parent	13a5f6feb6a027d1f26e17b55ba95120cacaf024 (diff)
download	perl-472be41b7bb1dab634c9b2b1655a206eea17f7d6.tar.gz