summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc1
-rw-r--r--embed.h1
-rw-r--r--proto.h6
-rw-r--r--regcomp.c42
4 files changed, 40 insertions, 10 deletions
diff --git a/embed.fnc b/embed.fnc
index f2e13901e8..262428b9d8 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1400,6 +1400,7 @@ EiMR |UV* |get_invlist_version_id_addr |NN SV* invlist
EiM |void |invlist_iterinit|NN SV* invlist
EsMR |bool |invlist_iternext|NN SV* invlist|NN UV* start|NN UV* end
EsMR |IV |invlist_search |NN SV* const invlist|const UV cp
+EiMR |UV |invlist_highest|NN SV* const invlist
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i
diff --git a/embed.h b/embed.h
index b0cfe9f28c..2078f9482a 100644
--- a/embed.h
+++ b/embed.h
@@ -920,6 +920,7 @@
#define invlist_array(a) S_invlist_array(aTHX_ a)
#define invlist_clone(a) S_invlist_clone(aTHX_ a)
#define invlist_extend(a,b) S_invlist_extend(aTHX_ a,b)
+#define invlist_highest(a) S_invlist_highest(aTHX_ a)
#define invlist_iterinit(a) S_invlist_iterinit(aTHX_ a)
#define invlist_iternext(a,b,c) S_invlist_iternext(aTHX_ a,b,c)
#define invlist_len(a) S_invlist_len(aTHX_ a)
diff --git a/proto.h b/proto.h
index b45aa806fe..77d891fab4 100644
--- a/proto.h
+++ b/proto.h
@@ -6479,6 +6479,12 @@ STATIC void S_invlist_extend(pTHX_ SV* const invlist, const UV len)
#define PERL_ARGS_ASSERT_INVLIST_EXTEND \
assert(invlist)
+PERL_STATIC_INLINE UV S_invlist_highest(pTHX_ SV* const invlist)
+ __attribute__warn_unused_result__
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_INVLIST_HIGHEST \
+ assert(invlist)
+
PERL_STATIC_INLINE void S_invlist_iterinit(pTHX_ SV* invlist)
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_INVLIST_ITERINIT \
diff --git a/regcomp.c b/regcomp.c
index 89147d3873..6aca8e3047 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -8095,6 +8095,36 @@ S_invlist_iternext(pTHX_ SV* invlist, UV* start, UV* end)
return TRUE;
}
+PERL_STATIC_INLINE UV
+S_invlist_highest(pTHX_ SV* const invlist)
+{
+ /* Returns the highest code point that matches an inversion list. This API
+ * has an ambiguity, as it returns 0 under either the highest is actually
+ * 0, or if the list is empty. If this distinction matters to you, check
+ * for emptiness before calling this function */
+
+ UV len = invlist_len(invlist);
+ UV *array;
+
+ PERL_ARGS_ASSERT_INVLIST_HIGHEST;
+
+ if (len == 0) {
+ return 0;
+ }
+
+ array = invlist_array(invlist);
+
+ /* The last element in the array in the inversion list always starts a
+ * range that goes to infinity. That range may be for code points that are
+ * matched in the inversion list, or it may be for ones that aren't
+ * matched. In the latter case, the highest code point in the set is one
+ * less than the beginning of this range; otherwise it is the final element
+ * of this range: infinity */
+ return (ELEMENT_RANGE_MATCHES_INVLIST(len - 1))
+ ? UV_MAX
+ : array[len - 1] - 1;
+}
+
#ifndef PERL_IN_XSUB_RE
SV *
Perl__invlist_contents(pTHX_ SV* const invlist)
@@ -11822,19 +11852,11 @@ parseit:
SV* fold_intersection = NULL;
- const UV highest_index = invlist_len(cp_list) - 1;
-
/* In the Latin1 range, the characters that can be folded-to or -from
* are precisely the alphabetic characters. If the highest code point
* is within Latin1, we can use the compiled-in list, and not have to
- * go out to disk. If the last element in the array is in the
- * inversion list set, it starts a range that goes to infinity, so the
- * maximum of the inversion list is definitely above Latin1.
- * Otherwise, it starts a range that isn't in the set, so the max is
- * one less than it */
- if (! ELEMENT_RANGE_MATCHES_INVLIST(highest_index)
- && invlist_array(cp_list)[highest_index] <= 256)
- {
+ * go out to disk. */
+ if (invlist_highest(cp_list) < 256) {
_invlist_intersection(PL_L1PosixAlpha, cp_list, &fold_intersection);
}
else {