summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-02-17 15:39:12 -0700
committerKarl Williamson <public@khwilliamson.com>2014-02-19 08:32:59 -0700
commit3b04b210101efbbbdf1d8095e181c4218cdf59c2 (patch)
tree0ab4999ede9e0c313c5d3b2500d6cd81d6446656 /regexec.c
parent4afbae25415a29a2ea66f300c95436267450769c (diff)
downloadperl-3b04b210101efbbbdf1d8095e181c4218cdf59c2.tar.gz
Change method of passing some info from regcomp to regexec
For the last several releases, the fact that an ANYOF node could match something outside its bitmap has been passed to regexec.c by having its ARG field not be -1 (appropriately cast). A bit was set if the match could occur even if the target string was not UTF-8 encoded. This design was used to save a bit, as previously there was a bit also for it matching UTF-8 strings. That design is no longer tenable, as a future commit will have a third (independent) reason for something to match outside the bitmap, This commits uses the current spare bit flag to indicate if the match can only occur if the target string is UTF-8.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c21
1 files changed, 5 insertions, 16 deletions
diff --git a/regexec.c b/regexec.c
index c31ae762eb..28b0bb9a43 100644
--- a/regexec.c
+++ b/regexec.c
@@ -7532,7 +7532,7 @@ S_core_regclass_swash(pTHX_ const regexp *prog, const regnode* node, bool doinit
PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH;
- assert(ANYOF_NONBITMAP(node));
+ assert(ANYOF_FLAGS(node) & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8));
if (data && data->count) {
const U32 n = ARG(node);
@@ -7720,25 +7720,14 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
}
/* If the bitmap didn't (or couldn't) match, and something outside the
- * bitmap could match, try that. Locale nodes specify completely the
- * behavior of code points in the bit map (otherwise, a utf8 target would
- * cause them to be treated as Unicode and not locale), except in
- * the very unlikely event when this node is a synthetic start class, which
- * could be a combination of locale and non-locale nodes. So allow locale
- * to match for the synthetic start class, which will give a false
- * positive that will be resolved when the match is done again as not part
- * of the synthetic start class */
+ * bitmap could match, try that. */
if (!match) {
if (c >= 256 && (flags & ANYOF_ABOVE_LATIN1_ALL)) {
match = TRUE; /* Everything above 255 matches */
}
- else if (ANYOF_NONBITMAP(n)
- && ((flags & ANYOF_NONBITMAP_NON_UTF8)
- || (utf8_target
- && (c >=256
- || (! (flags & ANYOF_LOCALE_FLAGS))
- || is_ANYOF_SYNTHETIC(n)))))
- {
+ else if ((flags & ANYOF_NONBITMAP_NON_UTF8)
+ || (utf8_target && (flags & ANYOF_UTF8)))
+ {
SV * const sw = core_regclass_swash(prog, n, TRUE, 0);
if (sw) {
U8 * utf8_p;