diff options
-rw-r--r-- | embed.fnc | 3 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | proto.h | 10 | ||||
-rw-r--r-- | regcomp.c | 57 | ||||
-rw-r--r-- | regexp.h | 1 |
5 files changed, 62 insertions, 11 deletions
@@ -1378,7 +1378,8 @@ EsMR |IV |invlist_search |NN SV* const invlist|const UV cp #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) EXmM |void |_invlist_intersection |NN SV* const a|NN SV* const b|NN SV** i EXpM |void |_invlist_intersection_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** i -EXpM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output +EXmM |void |_invlist_union |NULLOK SV* const a|NN SV* const b|NN SV** output +EXpM |void |_invlist_union_maybe_complement_2nd|NULLOK SV* const a|NN SV* const b|bool complement_b|NN SV** output EXmM |void |_invlist_subtract|NN SV* const a|NN SV* const b|NN SV** result EXpM |void |_invlist_invert|NN SV* const invlist EXpM |void |_invlist_invert_prop|NN SV* const invlist @@ -953,7 +953,7 @@ #define _invlist_invert(a) Perl__invlist_invert(aTHX_ a) #define _invlist_invert_prop(a) Perl__invlist_invert_prop(aTHX_ a) #define _invlist_populate_swatch(a,b,c,d) Perl__invlist_populate_swatch(aTHX_ a,b,c,d) -#define _invlist_union(a,b,c) Perl__invlist_union(aTHX_ a,b,c) +#define _invlist_union_maybe_complement_2nd(a,b,c,d) Perl__invlist_union_maybe_complement_2nd(aTHX_ a,b,c,d) #define _new_invlist(a) Perl__new_invlist(aTHX_ a) #define _swash_inversion_hash(a) Perl__swash_inversion_hash(aTHX_ a) #define _swash_to_invlist(a) Perl__swash_to_invlist(aTHX_ a) @@ -6582,10 +6582,14 @@ PERL_CALLCONV void Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); */ -PERL_CALLCONV void Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output) +/* PERL_CALLCONV void _invlist_union(pTHX_ SV* const a, SV* const b, SV** output) __attribute__nonnull__(pTHX_2) - __attribute__nonnull__(pTHX_3); -#define PERL_ARGS_ASSERT__INVLIST_UNION \ + __attribute__nonnull__(pTHX_3); */ + +PERL_CALLCONV void Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output) + __attribute__nonnull__(pTHX_2) + __attribute__nonnull__(pTHX_4); +#define PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND \ assert(b); assert(output) PERL_CALLCONV SV* Perl__new_invlist(pTHX_ IV initial_size) @@ -6536,12 +6536,14 @@ Perl__invlist_populate_swatch(pTHX_ SV* const invlist, const UV start, const UV } void -Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output) +Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, bool complement_b, SV** output) { /* Take the union of two inversion lists and point <output> to it. *output * should be defined upon input, and if it points to one of the two lists, * the reference count to that list will be decremented. The first list, * <a>, may be NULL, in which case a copy of the second list is returned. + * If <complement_b> is TRUE, the union is taken of the complement + * (inversion) of <b> instead of b itself. * * The basis for this comes from "Unicode Demystified" Chapter 13 by * Richard Gillam, published by Addison-Wesley, and explained at some @@ -6577,7 +6579,7 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output) */ UV count = 0; - PERL_ARGS_ASSERT__INVLIST_UNION; + PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND; assert(a != b); /* If either one is empty, the union is the other one */ @@ -6589,6 +6591,9 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output) } if (*output != b) { *output = invlist_clone(b); + if (complement_b) { + _invlist_invert(*output); + } } /* else *output already = b; */ return; } @@ -6596,10 +6601,20 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output) if (*output == b) { SvREFCNT_dec(b); } - if (*output != a) { - *output = invlist_clone(a); - } - /* else *output already = a; */ + + /* The complement of an empty list is a list that has everything in it, + * so the union with <a> includes everything too */ + if (complement_b) { + if (a == *output) { + SvREFCNT_dec(a); + } + *output = _new_invlist(1); + _append_range_to_invlist(*output, 0, UV_MAX); + } + else if (*output != a) { + *output = invlist_clone(a); + } + /* else *output already = a; */ return; } @@ -6607,6 +6622,31 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output) array_a = invlist_array(a); array_b = invlist_array(b); + /* If are to take the union of 'a' with the complement of b, set it + * up so are looking at b's complement. */ + if (complement_b) { + + /* To complement, we invert: if the first element is 0, remove it. To + * do this, we just pretend the array starts one later, and clear the + * flag as we don't have to do anything else later */ + if (array_b[0] == 0) { + array_b++; + len_b--; + complement_b = FALSE; + } + else { + + /* But if the first element is not zero, we unshift a 0 before the + * array. The data structure reserves a space for that 0 (which + * should be a '1' right now), so physical shifting is unneeded, + * but temporarily change that element to 0. Before exiting the + * routine, we must restore the element to '1' */ + array_b--; + len_b++; + array_b[0] = 0; + } + } + /* Size the union for the worst case: that the sets are completely * disjoint */ u = _new_invlist(len_a + len_b); @@ -6725,6 +6765,11 @@ Perl__invlist_union(pTHX_ SV* const a, SV* const b, SV** output) SvREFCNT_dec(*output); } + /* If we've changed b, restore it */ + if (complement_b) { + array_b[0] = 1; + } + *output = u; return; } @@ -56,6 +56,7 @@ typedef struct regexp_paren_pair { } regexp_paren_pair; #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) +#define _invlist_union(a, b, output) _invlist_union_maybe_complement_2nd(a, b, FALSE, output) #define _invlist_intersection(a, b, output) _invlist_intersection_maybe_complement_2nd(a, b, FALSE, output) /* Subtracting b from a leaves in a everything that was there that isn't in b, |