summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-11-25 12:18:42 -0700
committerKarl Williamson <khw@cpan.org>2014-11-26 09:18:33 -0700
commit9f10db87f54f2a85594e3661927a9b6128c1f425 (patch)
treefbb32d9c0629ccea6b57538b401f577288b4b35f
parent09902b1fc2af1bca16bf40b6f6dd1e420562d9e2 (diff)
downloadperl-9f10db87f54f2a85594e3661927a9b6128c1f425.tar.gz
Change core to use is_invariant_string()
is_ascii_string's name has misled me in the past; the new name is clearer.
-rw-r--r--ext/POSIX/POSIX.xs6
-rw-r--r--locale.c10
-rw-r--r--mg.c17
-rw-r--r--pp_sys.c8
-rw-r--r--toke.c2
5 files changed, 22 insertions, 21 deletions
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index 9d41bf04e7..756e027c24 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -1975,9 +1975,9 @@ localeconv()
strlen(value),
/* We mark it as UTF-8 if a utf8 locale
- * and is valid, non-ascii UTF-8 */
+ * and is valid and variant under UTF-8 */
is_utf8_locale
- && ! is_ascii_string((U8 *) value, 0)
+ && ! is_invariant_string((U8 *) value, 0)
&& is_utf8_string((U8 *) value, 0)),
0);
}
@@ -3317,7 +3317,7 @@ strftime(fmt, sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1)
STRLEN len = strlen(buf);
sv_usepvn_flags(sv, buf, len, SV_HAS_TRAILING_NUL);
if (SvUTF8(fmt)
- || (! is_ascii_string((U8*) buf, len)
+ || (! is_invariant_string((U8*) buf, len)
&& is_utf8_string((U8*) buf, len)
#ifdef USE_LOCALE_TIME
&& _is_cur_LC_category_utf8(LC_TIME)
diff --git a/locale.c b/locale.c
index c8460462a1..a5a2cb3a83 100644
--- a/locale.c
+++ b/locale.c
@@ -106,7 +106,7 @@ Perl_set_numeric_radix(pTHX)
sv_setpv(PL_numeric_radix_sv, lc->decimal_point);
else
PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0);
- if (! is_ascii_string((U8 *) lc->decimal_point, 0)
+ if (! is_invariant_string((U8 *) lc->decimal_point, 0)
&& is_utf8_string((U8 *) lc->decimal_point, 0)
&& _is_cur_LC_category_utf8(LC_NUMERIC))
{
@@ -1300,7 +1300,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
lc = localeconv();
if (! lc
|| ! lc->currency_symbol
- || is_ascii_string((U8 *) lc->currency_symbol, 0))
+ || is_invariant_string((U8 *) lc->currency_symbol, 0))
{
DEBUG_L(PerlIO_printf(Perl_debug_log, "Couldn't get currency symbol for %s, or contains only ASCII; can't use for determining if UTF-8 locale\n", save_input_locale));
only_ascii = TRUE;
@@ -1373,14 +1373,14 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
/* Here the current LC_TIME is set to the locale of the category
* whose information is desired. Look at all the days of the week and
- * month names, and the timezone and am/pm indicator for non-ASCII
+ * month names, and the timezone and am/pm indicator for UTF-8 variant
* characters. The first such a one found will tell us if the locale
* is UTF-8 or not */
for (i = 0; i < 7 + 12; i++) { /* 7 days; 12 months */
formatted_time = my_strftime("%A %B %Z %p",
0, 0, hour, dom, month, 112, 0, 0, is_dst);
- if (! formatted_time || is_ascii_string((U8 *) formatted_time, 0)) {
+ if (! formatted_time || is_invariant_string((U8 *) formatted_time, 0)) {
/* Here, we didn't find a non-ASCII. Try the next time through
* with the complemented dst and am/pm, and try with the next
@@ -1481,7 +1481,7 @@ Perl__is_cur_LC_category_utf8(pTHX_ int category)
break;
}
errmsg = savepv(errmsg);
- if (! is_ascii_string((U8 *) errmsg, 0)) {
+ if (! is_invariant_string((U8 *) errmsg, 0)) {
non_ascii = TRUE;
is_utf8 = is_utf8_string((U8 *) errmsg, 0);
break;
diff --git a/mg.c b/mg.c
index 653800347a..27c276249a 100644
--- a/mg.c
+++ b/mg.c
@@ -751,15 +751,16 @@ S_fixup_errno_string(pTHX_ SV* sv)
* case we should turn on that flag. This didn't use to happen, and to
* avoid as many possible backward compatibility issues as possible, we
* don't turn on the flag unless we have to. So the flag stays off for
- * an entirely ASCII string. We assume that if the string looks like
- * UTF-8, it really is UTF-8: "text in any other encoding that uses
- * bytes with the high bit set is extremely unlikely to pass a UTF-8
- * validity test" (http://en.wikipedia.org/wiki/Charset_detection).
- * There is a potential that we will get it wrong however, especially
- * on short error message text. (If it turns out to be necessary, we
- * could also keep track if the current LC_MESSAGES locale is UTF-8) */
+ * an entirely invariant string. We assume that if the string looks
+ * like UTF-8, it really is UTF-8: "text in any other encoding that
+ * uses bytes with the high bit set is extremely unlikely to pass a
+ * UTF-8 validity test"
+ * (http://en.wikipedia.org/wiki/Charset_detection). There is a
+ * potential that we will get it wrong however, especially on short
+ * error message text. (If it turns out to be necessary, we could also
+ * keep track if the current LC_MESSAGES locale is UTF-8) */
if (! IN_BYTES /* respect 'use bytes' */
- && ! is_ascii_string((U8*) SvPVX_const(sv), SvCUR(sv))
+ && ! is_invariant_string((U8*) SvPVX_const(sv), SvCUR(sv))
&& is_utf8_string((U8*) SvPVX_const(sv), SvCUR(sv)))
{
SvUTF8_on(sv);
diff --git a/pp_sys.c b/pp_sys.c
index b3714696b0..0bc1aa179f 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -3486,12 +3486,12 @@ PP(pp_fttext)
#endif
assert(len);
- if (! is_ascii_string((U8 *) s, len)) {
+ if (! is_invariant_string((U8 *) s, len)) {
const U8 *ep;
- /* Here contains a non-ASCII. See if the entire string is UTF-8. But
- * the buffer may end in a partial character, so consider it UTF-8 if
- * the first non-UTF8 char is an ending partial */
+ /* Here contains a variant under UTF-8 . See if the entire string is
+ * UTF-8. But the buffer may end in a partial character, so consider
+ * it UTF-8 if the first non-UTF8 char is an ending partial */
if (is_utf8_string_loc((U8 *) s, len, &ep)
|| ep + UTF8SKIP(ep) > (U8 *) (s + len))
{
diff --git a/toke.c b/toke.c
index 9a01103439..236acd5adc 100644
--- a/toke.c
+++ b/toke.c
@@ -1968,7 +1968,7 @@ S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
SV * const sv = newSVpvn_utf8(start, len,
!IN_BYTES
&& UTF
- && !is_ascii_string((const U8*)start, len)
+ && !is_invariant_string((const U8*)start, len)
&& is_utf8_string((const U8*)start, len));
return sv;
}