summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.fnc3
-rw-r--r--embed.h2
-rw-r--r--proto.h10
-rw-r--r--regcomp.c57
-rw-r--r--regexp.h1
5 files changed, 62 insertions, 11 deletions
diff --git a/embed.fnc b/embed.fnc
index 3ed6ac50dd..4f0206c462 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1378,7 +1378,8 @@ EsMR |IV |invlist_search |NN SV* const invlist|const UV cp
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i
EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i
-EXpM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
+EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output
+EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output
EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result
EXpM |void |_invlist_invert|NN SV* const invlist
EXpM |void |_invlist_invert_prop|NN SV* const invlist
diff --git a/embed.h b/embed.h
index 7d4313fa1d..e41be49e03 100644
--- a/embed.h
+++ b/embed.h
@@ -953,7 +953,7 @@
#define _invlist_invert(a) Perl__invlist_invert(aTHX_ a)
#define _invlist_invert_prop(a) Perl__invlist_invert_prop(aTHX_ a)
#define _invlist_populate_swatch(a,b,c,d) Perl__invlist_populate_swatch(aTHX_ a,b,c,d)
-#define _invlist_union(a,b,c) Perl__invlist_union(aTHX_ a,b,c)
+#define _invlist_union_maybe_complement_2nd(a,b,c,d) Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d)
#define _new_invlist(a) Perl__new_invlist(aTHX_ a)
#define _swash_inversion_hash(a) Perl__swash_inversion_hash(aTHX_ a)
#define _swash_to_invlist(a) Perl__swash_to_invlist(aTHX_ a)
diff --git a/proto.h b/proto.h
index c79be42aed..bae0f9dfb1 100644
--- a/proto.h
+++ b/proto.h
@@ -6582,10 +6582,14 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3); */
-PERL_CALLCONV void Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
+/* PERL_CALLCONV void _invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
__attribute__nonnull__(pTHX_2)
- __attribute__nonnull__(pTHX_3);
-#define PERL_ARGS_ASSERT__INVLIST_UNION \
+ __attribute__nonnull__(pTHX_3); */
+
+PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
+ __attribute__nonnull__(pTHX_2)
+ __attribute__nonnull__(pTHX_4);
+#define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \
assert(b); assert(output)
PERL_CALLCONV SV* Perl__new_invlist(pTHX_ IV initial_size)
diff --git a/regcomp.c b/regcomp.c
index 19ecee62d7..248272b4e0 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6536,12 +6536,14 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV
}
void
-Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
+Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output)
{
/* Take the union of two inversion lists and point <output> to it. *output
* should be defined upon input, and if it points to one of the two lists,
* the reference count to that list will be decremented. The first list,
* <a>, may be NULL, in which case a copy of the second list is returned.
+ * If <complement_b> is TRUE, the union is taken of the complement
+ * (inversion) of <b> instead of b itself.
*
* The basis for this comes from "Unicode Demystified" Chapter 13 by
* Richard Gillam, published by Addison-Wesley, and explained at some
@@ -6577,7 +6579,7 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
*/
UV count = 0;
- PERL_ARGS_ASSERT__INVLIST_UNION;
+ PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
assert(a != b);
/* If either one is empty, the union is the other one */
@@ -6589,6 +6591,9 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
}
if (*output != b) {
*output = invlist_clone(b);
+ if (complement_b) {
+ _invlist_invert(*output);
+ }
} /* else *output already = b; */
return;
}
@@ -6596,10 +6601,20 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
if (*output == b) {
SvREFCNT_dec(b);
}
- if (*output != a) {
- *output = invlist_clone(a);
- }
- /* else *output already = a; */
+
+ /* The complement of an empty list is a list that has everything in it,
+ * so the union with <a> includes everything too */
+ if (complement_b) {
+ if (a == *output) {
+ SvREFCNT_dec(a);
+ }
+ *output = _new_invlist(1);
+ _append_range_to_invlist(*output, 0, UV_MAX);
+ }
+ else if (*output != a) {
+ *output = invlist_clone(a);
+ }
+ /* else *output already = a; */
return;
}
@@ -6607,6 +6622,31 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
array_a = invlist_array(a);
array_b = invlist_array(b);
+ /* If are to take the union of 'a' with the complement of b, set it
+ * up so are looking at b's complement. */
+ if (complement_b) {
+
+ /* To complement, we invert: if the first element is 0, remove it. To
+ * do this, we just pretend the array starts one later, and clear the
+ * flag as we don't have to do anything else later */
+ if (array_b[0] == 0) {
+ array_b++;
+ len_b--;
+ complement_b = FALSE;
+ }
+ else {
+
+ /* But if the first element is not zero, we unshift a 0 before the
+ * array. The data structure reserves a space for that 0 (which
+ * should be a '1' right now), so physical shifting is unneeded,
+ * but temporarily change that element to 0. Before exiting the
+ * routine, we must restore the element to '1' */
+ array_b--;
+ len_b++;
+ array_b[0] = 0;
+ }
+ }
+
/* Size the union for the worst case: that the sets are completely
* disjoint */
u = _new_invlist(len_a + len_b);
@@ -6725,6 +6765,11 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output)
SvREFCNT_dec(*output);
}
+ /* If we've changed b, restore it */
+ if (complement_b) {
+ array_b[0] = 1;
+ }
+
*output = u;
return;
}
diff --git a/regexp.h b/regexp.h
index 68d5830ec7..78fec5781a 100644
--- a/regexp.h
+++ b/regexp.h
@@ -56,6 +56,7 @@ typedef struct regexp_paren_pair {
} regexp_paren_pair;
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C)
+#define _invlist_union(a, b, output) _invlist_union_maybe_complement_2nd(a, b, FALSE, output)
#define _invlist_intersection(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, FALSE, output)
/* Subtracting b from a leaves in a everything that was there that isn't in b,