Change core to use is_invariant_string()

is_ascii_string's name has misled me in the past; the new name is clearer.
author: Karl Williamson <khw@cpan.org> 2014-11-25 12:18:42 -0700
committer: Karl Williamson <khw@cpan.org> 2014-11-26 09:18:33 -0700
commit: 9f10db87f54f2a85594e3661927a9b6128c1f425 (patch)
tree: fbb32d9c0629ccea6b57538b401f577288b4b35f
parent: 09902b1fc2af1bca16bf40b6f6dd1e420562d9e2 (diff)
download: perl-9f10db87f54f2a85594e3661927a9b6128c1f425.tar.gz
5 files changed, 22 insertions, 21 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index 9d41bf04e7..756e027c24 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -1975,9 +1975,9 @@ localeconv()
                                       strlen(value),
 
                                       /* We mark it as UTF-8 if a utf8 locale
-                                       * and is valid, non-ascii UTF-8 */
+                                       * and is valid and variant under UTF-8 */
                                       is_utf8_locale
-                                        && ! is_ascii_string((U8 *) value, 0)
+                                        && ! is_invariant_string((U8 *) value, 0)
                                         && is_utf8_string((U8 *) value, 0)),
                         0);
                   }
@@ -3317,7 +3317,7 @@ strftime(fmt, sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1)
                 STRLEN len = strlen(buf);
 		sv_usepvn_flags(sv, buf, len, SV_HAS_TRAILING_NUL);
 		if (SvUTF8(fmt)
-                    || (! is_ascii_string((U8*) buf, len)
+                    || (! is_invariant_string((U8*) buf, len)
                         && is_utf8_string((U8*) buf, len)
 #ifdef USE_LOCALE_TIME
                         && _is_cur_LC_category_utf8(LC_TIME)
diff --git a/locale.c b/locale.c
index c8460462a1..a5a2cb3a83 100644
--- a/locale.c
+++ b/locale.c
@@ -106,7 +106,7 @@ Perl_set_numeric_radix(pTHX)
 		sv_setpv(PL_numeric_radix_sv, lc->decimal_point);
 	    else
 		PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0);
-            if (! is_ascii_string((U8 *) lc->decimal_point, 0)
+            if (! is_invariant_string((U8 *) lc->decimal_point, 0)
                 && is_utf8_string((U8 *) lc->decimal_point, 0)
                 && _is_cur_LC_category_utf8(LC_NUMERIC))
             {
@@ -1300,7 +1300,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
         lc = localeconv();
         if (! lc
             || ! lc->currency_symbol
-            || is_ascii_string((U8 *) lc->currency_symbol, 0))
+            || is_invariant_string((U8 *) lc->currency_symbol, 0))
         {
             DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
             only_ascii = TRUE;
@@ -1373,14 +1373,14 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
 
         /* Here the current LC_TIME is set to the locale of the category
          * whose information is desired.  Look at all the days of the week and
-         * month names, and the timezone and am/pm indicator for non-ASCII
+         * month names, and the timezone and am/pm indicator for UTF-8 variant
          * characters.  The first such a one found will tell us if the locale
          * is UTF-8 or not */
 
         for (i = 0; i < 7 + 12; i++) {  /* 7 days; 12 months */
             formatted_time = my_strftime("%A %B %Z %p",
                                     0, 0, hour, dom, month, 112, 0, 0, is_dst);
-            if (! formatted_time || is_ascii_string((U8 *) formatted_time, 0)) {
+            if (! formatted_time || is_invariant_string((U8 *) formatted_time, 0)) {
 
                 /* Here, we didn't find a non-ASCII.  Try the next time through
                  * with the complemented dst and am/pm, and try with the next
@@ -1481,7 +1481,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
                 break;
             }
             errmsg = savepv(errmsg);
-            if (! is_ascii_string((U8 *) errmsg, 0)) {
+            if (! is_invariant_string((U8 *) errmsg, 0)) {
                 non_ascii = TRUE;
                 is_utf8 = is_utf8_string((U8 *) errmsg, 0);
                 break;
diff --git a/mg.c b/mg.c
index 653800347a..27c276249a 100644
--- a/mg.c
+++ b/mg.c
@@ -751,15 +751,16 @@ S_fixup_errno_string(pTHX_ SV* sv)
          * case we should turn on that flag.  This didn't use to happen, and to
          * avoid as many possible backward compatibility issues as possible, we
          * don't turn on the flag unless we have to.  So the flag stays off for
-         * an entirely ASCII string.  We assume that if the string looks like
-         * UTF-8, it really is UTF-8:  "text in any other encoding that uses
-         * bytes with the high bit set is extremely unlikely to pass a UTF-8
-         * validity test" (http://en.wikipedia.org/wiki/Charset_detection).
-         * There is a potential that we will get it wrong however, especially
-         * on short error message text.  (If it turns out to be necessary, we
-         * could also keep track if the current LC_MESSAGES locale is UTF-8) */
+         * an entirely invariant string.  We assume that if the string looks
+         * like UTF-8, it really is UTF-8:  "text in any other encoding that
+         * uses bytes with the high bit set is extremely unlikely to pass a
+         * UTF-8 validity test"
+         * (http://en.wikipedia.org/wiki/Charset_detection).  There is a
+         * potential that we will get it wrong however, especially on short
+         * error message text.  (If it turns out to be necessary, we could also
+         * keep track if the current LC_MESSAGES locale is UTF-8) */
         if (! IN_BYTES  /* respect 'use bytes' */
-            && ! is_ascii_string((U8*) SvPVX_const(sv), SvCUR(sv))
+            && ! is_invariant_string((U8*) SvPVX_const(sv), SvCUR(sv))
             && is_utf8_string((U8*) SvPVX_const(sv), SvCUR(sv)))
         {
             SvUTF8_on(sv);
diff --git a/pp_sys.c b/pp_sys.c
index b3714696b0..0bc1aa179f 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -3486,12 +3486,12 @@ PP(pp_fttext)
 #endif
 
     assert(len);
-    if (! is_ascii_string((U8 *) s, len)) {
+    if (! is_invariant_string((U8 *) s, len)) {
         const U8 *ep;
 
-        /* Here contains a non-ASCII.  See if the entire string is UTF-8.  But
-         * the buffer may end in a partial character, so consider it UTF-8 if
-         * the first non-UTF8 char is an ending partial */
+        /* Here contains a variant under UTF-8 .  See if the entire string is
+         * UTF-8.  But the buffer may end in a partial character, so consider
+         * it UTF-8 if the first non-UTF8 char is an ending partial */
         if (is_utf8_string_loc((U8 *) s, len, &ep)
             || ep + UTF8SKIP(ep)  > (U8 *) (s + len))
         {
diff --git a/toke.c b/toke.c
index 9a01103439..236acd5adc 100644
--- a/toke.c
+++ b/toke.c
@@ -1968,7 +1968,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
     SV * const sv = newSVpvn_utf8(start, len,
 				  !IN_BYTES
 				  && UTF
-				  && !is_ascii_string((const U8*)start, len)
+				  && !is_invariant_string((const U8*)start, len)
 				  && is_utf8_string((const U8*)start, len));
     return sv;
 }
author	Karl Williamson <khw@cpan.org>	2014-11-25 12:18:42 -0700
committer	Karl Williamson <khw@cpan.org>	2014-11-26 09:18:33 -0700
commit	9f10db87f54f2a85594e3661927a9b6128c1f425 (patch)
tree	fbb32d9c0629ccea6b57538b401f577288b4b35f
parent	09902b1fc2af1bca16bf40b6f6dd1e420562d9e2 (diff)
download	perl-9f10db87f54f2a85594e3661927a9b6128c1f425.tar.gz