summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-03-19 18:33:17 -0600
committerKarl Williamson <public@khwilliamson.com>2011-03-19 19:01:37 -0600
commitd94b1d13d0aa079f8a6d2ee1aac4ed342a28901f (patch)
tree48f44f8a568e8422922c059fbaf5d79bb0a9a1fd
parent0b9668eea8042721d06405ae3efbd6d8f34cd410 (diff)
downloadperl-d94b1d13d0aa079f8a6d2ee1aac4ed342a28901f.tar.gz
regcomp.c: Optimizer could lose some info
When ORing two nodes together for the synthetic start class, and one matches outside the 256-char bitmap, we currently don't know what it matches. In some cases it could be some or all of those 256 characters. If so, we have to assume it's all of them.
-rw-r--r--regcomp.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/regcomp.c b/regcomp.c
index d6701ce588..b247ee2c07 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -945,12 +945,19 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
* outside the bitmap, but what they match outside is not the same
* pointer, and hence not easily compared until XXX we extend
* inversion lists this far), give up and allow the start class to
- * match everything outside the bitmap */
+ * match everything outside the bitmap. If that stuff is all above
+ * 255, can just set UNICODE_ALL, otherwise caould be anything. */
if (! ANYOF_NONBITMAP(cl)) {
ARG_SET(cl, ARG(or_with));
}
else if (ARG(cl) != ARG(or_with)) {
- cl->flags |= ANYOF_UNICODE_ALL;
+
+ if ((or_with->flags & ANYOF_NONBITMAP_NON_UTF8)) {
+ cl_anything(pRExC_state, cl);
+ }
+ else {
+ cl->flags |= ANYOF_UNICODE_ALL;
+ }
}
/* Take the union */