summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regen/unicode_constants.pl16
-rw-r--r--unicode_constants.h3
-rw-r--r--utf8.c10
3 files changed, 29 insertions, 0 deletions
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index acd1f91e73..baf25f1258 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -158,6 +158,22 @@ printf $out_fh "\n/* The number of code points not matching \\pC */\n"
. "#define NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C %d\n",
0x110000 - $count;
+# If this release has both the CWCM and CWCF properties, find the highest code
+# point which changes under any case change. We can use this to short-circuit
+# code
+my @cwcm = prop_invlist('CWCM');
+if (@cwcm) {
+ my @cwcf = prop_invlist('CWCF');
+ if (@cwcf) {
+ my $max = ($cwcm[-1] < $cwcf[-1])
+ ? $cwcf[-1]
+ : $cwcm[-1];
+ printf $out_fh "\n/* The highest code point that has any type of case change */\n"
+ . "#define HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C 0x%X\n",
+ $max - 1;
+ }
+}
+
print $out_fh "\n#endif /* H_UNICODE_CONSTANTS */\n";
read_only_bottom_close_and_rename($out_fh);
diff --git a/unicode_constants.h b/unicode_constants.h
index 71755de7f6..1384873f19 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -182,6 +182,9 @@
/* The number of code points not matching \pC */
#define NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C 120522
+/* The highest code point that has any type of case change */
+#define HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C 0x118DF
+
#endif /* H_UNICODE_CONSTANTS */
/* ex: set ro: */
diff --git a/utf8.c b/utf8.c
index 4c43bdec2f..fa1439bdfc 100644
--- a/utf8.c
+++ b/utf8.c
@@ -1997,6 +1997,16 @@ S__to_utf8_case(pTHX_ const UV uv1, const U8 *p, U8* ustrp, STRLEN *lenp,
}
goto cases_to_self;
}
+#ifdef HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C
+ if (UNLIKELY(uv1
+ > HIGHEST_CASE_CHANGING_CP_FOR_USE_ONLY_BY_UTF8_DOT_C))
+ {
+
+ /* As of this writing, this means we avoid swash creation
+ * for anything beyond low Plane 1 */
+ goto cases_to_self;
+ }
+#endif
}
}