diff options
author | Karl Williamson <khw@cpan.org> | 2014-11-25 12:18:42 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-11-26 09:18:33 -0700 |
commit | 9f10db87f54f2a85594e3661927a9b6128c1f425 (patch) | |
tree | fbb32d9c0629ccea6b57538b401f577288b4b35f | |
parent | 09902b1fc2af1bca16bf40b6f6dd1e420562d9e2 (diff) | |
download | perl-9f10db87f54f2a85594e3661927a9b6128c1f425.tar.gz |
Change core to use is_invariant_string()
is_ascii_string's name has misled me in the past; the new name is
clearer.
-rw-r--r-- | ext/POSIX/POSIX.xs | 6 | ||||
-rw-r--r-- | locale.c | 10 | ||||
-rw-r--r-- | mg.c | 17 | ||||
-rw-r--r-- | pp_sys.c | 8 | ||||
-rw-r--r-- | toke.c | 2 |
5 files changed, 22 insertions, 21 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs index 9d41bf04e7..756e027c24 100644 --- a/ext/POSIX/POSIX.xs +++ b/ext/POSIX/POSIX.xs @@ -1975,9 +1975,9 @@ localeconv() strlen(value), /* We mark it as UTF-8 if a utf8 locale - * and is valid, non-ascii UTF-8 */ + * and is valid and variant under UTF-8 */ is_utf8_locale - && ! is_ascii_string((U8 *) value, 0) + && ! is_invariant_string((U8 *) value, 0) && is_utf8_string((U8 *) value, 0)), 0); } @@ -3317,7 +3317,7 @@ strftime(fmt, sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1) STRLEN len = strlen(buf); sv_usepvn_flags(sv, buf, len, SV_HAS_TRAILING_NUL); if (SvUTF8(fmt) - || (! is_ascii_string((U8*) buf, len) + || (! is_invariant_string((U8*) buf, len) && is_utf8_string((U8*) buf, len) #ifdef USE_LOCALE_TIME && _is_cur_LC_category_utf8(LC_TIME) @@ -106,7 +106,7 @@ Perl_set_numeric_radix(pTHX) sv_setpv(PL_numeric_radix_sv, lc->decimal_point); else PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); - if (! is_ascii_string((U8 *) lc->decimal_point, 0) + if (! is_invariant_string((U8 *) lc->decimal_point, 0) && is_utf8_string((U8 *) lc->decimal_point, 0) && _is_cur_LC_category_utf8(LC_NUMERIC)) { @@ -1300,7 +1300,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) lc = localeconv(); if (! lc || ! lc->currency_symbol - || is_ascii_string((U8 *) lc->currency_symbol, 0)) + || is_invariant_string((U8 *) lc->currency_symbol, 0)) { DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale)); only_ascii = TRUE; @@ -1373,14 +1373,14 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) /* Here the current LC_TIME is set to the locale of the category * whose information is desired. Look at all the days of the week and - * month names, and the timezone and am/pm indicator for non-ASCII + * month names, and the timezone and am/pm indicator for UTF-8 variant * characters. The first such a one found will tell us if the locale * is UTF-8 or not */ for (i = 0; i < 7 + 12; i++) { /* 7 days; 12 months */ formatted_time = my_strftime("%A %B %Z %p", 0, 0, hour, dom, month, 112, 0, 0, is_dst); - if (! formatted_time || is_ascii_string((U8 *) formatted_time, 0)) { + if (! formatted_time || is_invariant_string((U8 *) formatted_time, 0)) { /* Here, we didn't find a non-ASCII. Try the next time through * with the complemented dst and am/pm, and try with the next @@ -1481,7 +1481,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category) break; } errmsg = savepv(errmsg); - if (! is_ascii_string((U8 *) errmsg, 0)) { + if (! is_invariant_string((U8 *) errmsg, 0)) { non_ascii = TRUE; is_utf8 = is_utf8_string((U8 *) errmsg, 0); break; @@ -751,15 +751,16 @@ S_fixup_errno_string(pTHX_ SV* sv) * case we should turn on that flag. This didn't use to happen, and to * avoid as many possible backward compatibility issues as possible, we * don't turn on the flag unless we have to. So the flag stays off for - * an entirely ASCII string. We assume that if the string looks like - * UTF-8, it really is UTF-8: "text in any other encoding that uses - * bytes with the high bit set is extremely unlikely to pass a UTF-8 - * validity test" (http://en.wikipedia.org/wiki/Charset_detection). - * There is a potential that we will get it wrong however, especially - * on short error message text. (If it turns out to be necessary, we - * could also keep track if the current LC_MESSAGES locale is UTF-8) */ + * an entirely invariant string. We assume that if the string looks + * like UTF-8, it really is UTF-8: "text in any other encoding that + * uses bytes with the high bit set is extremely unlikely to pass a + * UTF-8 validity test" + * (http://en.wikipedia.org/wiki/Charset_detection). There is a + * potential that we will get it wrong however, especially on short + * error message text. (If it turns out to be necessary, we could also + * keep track if the current LC_MESSAGES locale is UTF-8) */ if (! IN_BYTES /* respect 'use bytes' */ - && ! is_ascii_string((U8*) SvPVX_const(sv), SvCUR(sv)) + && ! is_invariant_string((U8*) SvPVX_const(sv), SvCUR(sv)) && is_utf8_string((U8*) SvPVX_const(sv), SvCUR(sv))) { SvUTF8_on(sv); @@ -3486,12 +3486,12 @@ PP(pp_fttext) #endif assert(len); - if (! is_ascii_string((U8 *) s, len)) { + if (! is_invariant_string((U8 *) s, len)) { const U8 *ep; - /* Here contains a non-ASCII. See if the entire string is UTF-8. But - * the buffer may end in a partial character, so consider it UTF-8 if - * the first non-UTF8 char is an ending partial */ + /* Here contains a variant under UTF-8 . See if the entire string is + * UTF-8. But the buffer may end in a partial character, so consider + * it UTF-8 if the first non-UTF8 char is an ending partial */ if (is_utf8_string_loc((U8 *) s, len, &ep) || ep + UTF8SKIP(ep) > (U8 *) (s + len)) { @@ -1968,7 +1968,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len) SV * const sv = newSVpvn_utf8(start, len, !IN_BYTES && UTF - && !is_ascii_string((const U8*)start, len) + && !is_invariant_string((const U8*)start, len) && is_utf8_string((const U8*)start, len)); return sv; } |