summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-05-29 21:45:09 -0600
committerKarl Williamson <public@khwilliamson.com>2011-07-03 14:05:47 -0600
commit2fde50e118eac35eeed062c93ba08b1e5b2609a7 (patch)
tree6b483766066df9815d5077164344462ea70ebff8
parent25e94a65210bfb047577ed6d937202a078380533 (diff)
downloadperl-2fde50e118eac35eeed062c93ba08b1e5b2609a7.tar.gz
regcomp.c: Do some [^abc] inversion at compile time
The new facilities with inversion lists enables us to do some more compile-time inversions.
-rw-r--r--regcomp.c37
1 files changed, 32 insertions, 5 deletions
diff --git a/regcomp.c b/regcomp.c
index a23134cf41..4e359335ce 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -10555,18 +10555,45 @@ parseit:
* nothing like \w in it; some thought also would have to be given to the
* interaction with above 0x100 chars */
if (! LOC
- && (ANYOF_FLAGS(ret) & ANYOF_FLAGS_ALL) == ANYOF_INVERT
+ && (ANYOF_FLAGS(ret) & ANYOF_INVERT)
&& ! unicode_alternate
- && ! nonbitmap
+ /* In case of /d, there are some things that should match only when in
+ * not in the bitmap, i.e., they require UTF8 to match. These are
+ * listed in nonbitmap. */
+ && (! nonbitmap
+ || ! DEPENDS_SEMANTICS
+ || (ANYOF_FLAGS(ret) & ANYOF_NONBITMAP_NON_UTF8))
&& SvCUR(listsv) == initial_listsv_len)
{
+ if (! nonbitmap) {
for (value = 0; value < ANYOF_BITMAP_SIZE; ++value)
ANYOF_BITMAP(ret)[value] ^= 0xFF;
+ /* The inversion means that everything above 255 is matched */
+ ANYOF_FLAGS(ret) |= ANYOF_UNICODE_ALL;
+ }
+ else {
+ /* Here, also has things outside the bitmap. Go through each bit
+ * individually and add it to the list to get rid of from those
+ * things not in the bitmap */
+ SV *remove_list = _new_invlist(2);
+ invlist_invert(nonbitmap);
+ for (value = 0; value < 256; ++value) {
+ if (ANYOF_BITMAP_TEST(ret, value)) {
+ ANYOF_BITMAP_CLEAR(ret, value);
+ remove_list = add_cp_to_invlist(remove_list, value);
+ }
+ else {
+ ANYOF_BITMAP_SET(ret, value);
+ }
+ }
+ invlist_subtract(nonbitmap, remove_list, &nonbitmap);
+ SvREFCNT_dec(remove_list);
+ }
+
stored = 256 - stored;
- /* The inversion means that everything above 255 is matched; and at the
- * same time we clear the invert flag */
- ANYOF_FLAGS(ret) = ANYOF_UNICODE_ALL;
+ /* Clear the invert flag since have just done it here */
+ ANYOF_FLAGS(ret) &= ~ANYOF_INVERT;
}
/* Folding in the bitmap is taken care of above, but not for locale (for