summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc1
-rw-r--r--embed.h1
-rw-r--r--proto.h6
-rw-r--r--regcomp.c83
4 files changed, 91 insertions, 0 deletions
diff --git a/embed.fnc b/embed.fnc
index 97b9d26b50..9d2f239ccc 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1384,6 +1384,7 @@ EXMpR |HV* |_swash_inversion_hash |NN SV* const swash
EXMpR |SV* |_new_invlist |IV initial_size
EXMpR |SV* |_swash_to_invlist |NN SV* const swash
EXMp |void |_append_range_to_invlist |NN SV* const invlist|const UV start|const UV end
+EXMp |void |_invlist_populate_swatch |NN SV* const invlist|const UV start|const UV end|NN U8* swatch
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || defined(PERL_IN_UTF8_C)
EXp |SV* |_core_swash_init|NN const char* pkg|NN const char* name|NN SV* listsv|I32 minbits|I32 none
diff --git a/embed.h b/embed.h
index e245da59e6..8f0b74e640 100644
--- a/embed.h
+++ b/embed.h
@@ -951,6 +951,7 @@
#define _invlist_intersection(a,b,c) Perl__invlist_intersection(aTHX_ a,b,c)
#define _invlist_invert(a) Perl__invlist_invert(aTHX_ a)
#define _invlist_invert_prop(a) Perl__invlist_invert_prop(aTHX_ a)
+#define _invlist_populate_swatch(a,b,c,d) Perl__invlist_populate_swatch(aTHX_ a,b,c,d)
#define _invlist_subtract(a,b,c) Perl__invlist_subtract(aTHX_ a,b,c)
#define _invlist_union(a,b,c) Perl__invlist_union(aTHX_ a,b,c)
#define _new_invlist(a) Perl__new_invlist(aTHX_ a)
diff --git a/proto.h b/proto.h
index e7ec1548de..c4dc4b326c 100644
--- a/proto.h
+++ b/proto.h
@@ -6561,6 +6561,12 @@ PERL_CALLCONV void Perl__invlist_invert_prop(pTHX_ SV* const invlist)
#define PERL_ARGS_ASSERT__INVLIST_INVERT_PROP \
assert(invlist)
+PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV end, U8* swatch)
+ __attribute__nonnull__(pTHX_1)
+ __attribute__nonnull__(pTHX_4);
+#define PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH \
+ assert(invlist); assert(swatch)
+
PERL_CALLCONV void Perl__invlist_subtract(pTHX_ SV* const a, SV* const b, SV** result)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
diff --git a/regcomp.c b/regcomp.c
index b8a43399d2..347f41908f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6185,6 +6185,89 @@ S_invlist_search(pTHX_ SV* const invlist, const UV cp)
}
void
+Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV end, U8* swatch)
+{
+ /* populates a swatch of a swash the same way swatch_get() does in utf8.c,
+ * but is used when the swash has an inversion list. This makes this much
+ * faster, as it uses a binary search instead of a linear one. This is
+ * intimately tied to that function, and perhaps should be in utf8.c,
+ * except it is intimately tied to inversion lists as well. It assumes
+ * that <swatch> is all 0's on input */
+
+ UV current = start;
+ const IV len = invlist_len(invlist);
+ IV i;
+ const UV * array;
+
+ PERL_ARGS_ASSERT__INVLIST_POPULATE_SWATCH;
+
+ if (len == 0) { /* Empty inversion list */
+ return;
+ }
+
+ array = invlist_array(invlist);
+
+ /* Find which element it is */
+ i = invlist_search(invlist, start);
+
+ /* We populate from <start> to <end> */
+ while (current < end) {
+ UV upper;
+
+ /* The inversion list gives the results for every possible code point
+ * after the first one in the list. Only those ranges whose index is
+ * even are ones that the inversion list matches. For the odd ones,
+ * and if the initial code point is not in the list, we have to skip
+ * forward to the next element */
+ if (i == -1 || ! ELEMENT_RANGE_MATCHES_INVLIST(i)) {
+ i++;
+ if (i >= len) { /* Finished if beyond the end of the array */
+ return;
+ }
+ current = array[i];
+ if (current >= end) { /* Finished if beyond the end of what we
+ are populating */
+ return;
+ }
+ }
+ assert(current >= start);
+
+ /* The current range ends one below the next one, except don't go past
+ * <end> */
+ i++;
+ upper = (i < len && array[i] < end) ? array[i] : end;
+
+ /* Here we are in a range that matches. Populate a bit in the 3-bit U8
+ * for each code point in it */
+ for (; current < upper; current++) {
+ const STRLEN offset = (STRLEN)(current - start);
+ swatch[offset >> 3] |= 1 << (offset & 7);
+ }
+
+ /* Quit if at the end of the list */
+ if (i >= len) {
+
+ /* But first, have to deal with the highest possible code point on
+ * the platform. The previous code assumes that <end> is one
+ * beyond where we want to populate, but that is impossible at the
+ * platform's infinity, so have to handle it specially */
+ if (UNLIKELY(end == UV_MAX && ELEMENT_RANGE_MATCHES_INVLIST(len-1)))
+ {
+ const STRLEN offset = (STRLEN)(end - start);
+ swatch[offset >> 3] |= 1 << (offset & 7);
+ }
+ return;
+ }
+
+ /* Advance to the next range, which will be for code points not in the
+ * inversion list */
+ current = array[i];
+ }
+
+ return;
+}
+
+void
Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
{
/* Take the union of two inversion lists and point 'result' to it. If