summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-11-10 20:30:02 -0700
committerKarl Williamson <public@khwilliamson.com>2011-11-10 21:03:37 -0700
commita410ec234e5c265a69e9f1843e00c227098cffe5 (patch)
treed04196447a299ba476c00c9b843d0e70ad2c52b5 /utf8.c
parent04e9cbbbbda4dbde7526e50b8e55c816341004db (diff)
downloadperl-a410ec234e5c265a69e9f1843e00c227098cffe5.tar.gz
utf8.c: Don't warn on \p{user-defined} for above-Unicode
Perl has allowed user-defined properties to match above-Unicode code points, while falsely warning that it doesn't. This removes that warning.
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c31
1 files changed, 18 insertions, 13 deletions
diff --git a/utf8.c b/utf8.c
index d3c3e02d9e..84e75c5787 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2363,19 +2363,6 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
/* If char is encoded then swatch is for the prefix */
needents = (1 << UTF_ACCUMULATION_SHIFT);
off = NATIVE_TO_UTF(ptr[klen]) & UTF_CONTINUATION_MASK;
- if (UTF8_IS_SUPER(ptr) && ckWARN_d(WARN_NON_UNICODE)) {
- const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0, 0);
-
- /* This outputs warnings for binary properties only, assuming that
- * to_utf8_case() will output any for non-binary. Also, surrogates
- * aren't checked for, as that would warn on things like
- * /\p{Gc=Cs}/ */
- SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE);
- if (SvUV(*bitssvp) == 1) {
- Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
- "Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", code_point);
- }
- }
}
/*
@@ -2432,6 +2419,24 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
Copy(ptr, PL_last_swash_key, klen, U8);
}
+ if (UTF8_IS_SUPER(ptr) && ckWARN_d(WARN_NON_UNICODE)) {
+ SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE);
+
+ /* This outputs warnings for binary properties only, assuming that
+ * to_utf8_case() will output any for non-binary. Also, surrogates
+ * aren't checked for, as that would warn on things like /\p{Gc=Cs}/ */
+
+ if (SvUV(*bitssvp) == 1) {
+ /* User-defined properties can silently match above-Unicode */
+ SV** const user_defined_svp = hv_fetchs(hv, "USER_DEFINED", FALSE);
+ if (! user_defined_svp || ! SvUV(*user_defined_svp)) {
+ const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0, 0);
+ Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
+ "Code point 0x%04"UVXf" is not Unicode, all \\p{} matches fail; all \\P{} matches succeed", code_point);
+ }
+ }
+ }
+
switch ((int)((slen << 3) / needents)) {
case 1:
bit = 1 << (off & 7);