summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-02-17 15:39:12 -0700
committerKarl Williamson <public@khwilliamson.com>2014-02-19 08:32:59 -0700
commit3b04b210101efbbbdf1d8095e181c4218cdf59c2 (patch)
tree0ab4999ede9e0c313c5d3b2500d6cd81d6446656 /regcomp.c
parent4afbae25415a29a2ea66f300c95436267450769c (diff)
downloadperl-3b04b210101efbbbdf1d8095e181c4218cdf59c2.tar.gz
Change method of passing some info from regcomp to regexec
For the last several releases, the fact that an ANYOF node could match something outside its bitmap has been passed to regexec.c by having its ARG field not be -1 (appropriately cast). A bit was set if the match could occur even if the target string was not UTF-8 encoded. This design was used to save a bit, as previously there was a bit also for it matching UTF-8 strings. That design is no longer tenable, as a future commit will have a third (independent) reason for something to match outside the bitmap, This commits uses the current spare bit flag to indicate if the match can only occur if the target string is UTF-8.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c13
1 files changed, 10 insertions, 3 deletions
diff --git a/regcomp.c b/regcomp.c
index 6382c78c12..f7bac3d6e7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -12351,6 +12351,9 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
if (end == UV_MAX && start <= 256) {
ANYOF_FLAGS(node) |= ANYOF_ABOVE_LATIN1_ALL;
}
+ else if (end >= 256) {
+ ANYOF_FLAGS(node) |= ANYOF_UTF8;
+ }
/* Quit if are above what we should change */
if (start > 255) {
@@ -14792,6 +14795,7 @@ parseit:
else {
cp_list = depends_list;
}
+ ANYOF_FLAGS(ret) |= ANYOF_UTF8;
}
/* If there is a swash and more than one element, we can't use the swash in
@@ -14845,12 +14849,15 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
PERL_ARGS_ASSERT_SET_ANYOF_ARG;
if (! cp_list && ! runtime_defns) {
+ assert(! (ANYOF_FLAGS(node) & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8)));
ARG_SET(node, ANYOF_NONBITMAP_EMPTY);
}
else {
AV * const av = newAV();
SV *rv;
+ assert(ANYOF_FLAGS(node) & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8));
+
av_store(av, 0, (runtime_defns)
? SvREFCNT_inc(runtime_defns) : &PL_sv_undef);
if (swash) {
@@ -15665,8 +15672,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
}
}
- if ((flags & ANYOF_ABOVE_LATIN1_ALL)
- || ANYOF_UTF8_LOCALE_INVLIST(o) || ANYOF_NONBITMAP(o))
+ if ((flags & (ANYOF_ABOVE_LATIN1_ALL|ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8))
+ || ANYOF_UTF8_LOCALE_INVLIST(o))
{
if (do_sep) {
Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
@@ -15682,7 +15689,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
/* output information about the unicode matching */
if (flags & ANYOF_ABOVE_LATIN1_ALL)
sv_catpvs(sv, "{unicode_all}");
- else if (ANYOF_NONBITMAP(o)) {
+ else if (FLAGS(o) & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8)) {
SV *lv; /* Set if there is something outside the bit map. */
bool byte_output = FALSE; /* If something in the bitmap has
been output */