summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-09-20 22:24:59 -0600
committerKarl Williamson <khw@cpan.org>2018-10-20 00:09:54 -0600
commitd156f5cb3264950617984f631f4023cd2482df47 (patch)
tree1ef0da32372c18350707728b1386b7bc1ef77ddc
parent3edce4f5816b41e2829e5e8b9fb794c46d8a1b95 (diff)
downloadperl-d156f5cb3264950617984f631f4023cd2482df47.tar.gz
regcomp.c: Generate new regnode for /[[:posix:]]/l
This follows on to the previous commit and generates code to use the new regnode. This allows some simplifications. The determination of the regnode is moved later in the function that does this; the code that backed this out if we guessed wrong is excised.
-rw-r--r--regcomp.c82
-rw-r--r--t/re/anyof.t4
2 files changed, 38 insertions, 48 deletions
diff --git a/regcomp.c b/regcomp.c
index 92571d9b6f..b0c8db6c5a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2045,9 +2045,9 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
+ OP(ssc) = ANYOFPOSIXL;
}
-
- if (RExC_contains_locale) {
+ else if (RExC_contains_locale) {
OP(ssc) = ANYOFL;
}
@@ -5566,6 +5566,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
case ANYOFD:
case ANYOFL:
+ case ANYOFPOSIXL:
case ANYOF:
if (flags & SCF_DO_STCLASS_AND)
ssc_and(pRExC_state, data->start_class,
@@ -16493,6 +16494,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
AV* posix_warnings = NULL;
const bool do_posix_warnings = return_posix_warnings
|| (PASS2 && ckWARN(WARN_REGEXP));
+ U8 op = END; /* The returned node-type, initialized to an impossible
+ one. */
+ U8 anyof_flags = 0; /* flag bits if the node is an ANYOF-type */
+ U32 posixl = 0; /* bit field of posix classes matched under /l */
GET_RE_DEBUG_FLAGS_DECL;
@@ -16509,21 +16514,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
allow_multi_folds = FALSE;
#endif
- /* Assume we are going to generate an ANYOF node. */
- ret = reganode(pRExC_state,
- (LOC)
- ? ANYOFL
- : ANYOF,
- 0);
-
if (SIZE_ONLY) {
- RExC_size += ANYOF_SKIP;
listsv = &PL_sv_undef; /* For code scanners: listsv always non-NULL. */
}
else {
- ANYOF_FLAGS(ret) = 0;
-
- RExC_emit += ANYOF_SKIP;
listsv = newSVpvs_flags("# comment\n", SVs_TEMP);
initial_listsv_len = SvCUR(listsv);
SvTEMP_off(listsv); /* Grr, TEMPs and mortals are conflated. */
@@ -16974,7 +16968,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* We don't know yet what this matches, so have to flag
* it */
- ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
+ anyof_flags |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
}
else {
@@ -17179,14 +17173,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* by locale, and hence are dealt with separately */
if (! need_class) {
need_class = 1;
- if (SIZE_ONLY) {
- RExC_size += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
- }
- else {
- RExC_emit += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
- }
- ANYOF_FLAGS(ret) |= ANYOF_MATCHES_POSIXL;
- ANYOF_POSIXL_ZERO(ret);
+ anyof_flags |= ANYOF_MATCHES_POSIXL;
/* We can't change this into some other type of node
* (unless this is the only element, in which case there
@@ -17197,15 +17184,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* Coverity thinks it is possible for this to be negative; both
* jhi and khw think it's not, but be safer */
- assert(! (ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
+ assert(! (anyof_flags & ANYOF_MATCHES_POSIXL)
|| (namedclass + ((namedclass % 2) ? -1 : 1)) >= 0);
/* See if it already matches the complement of this POSIX
* class */
- if ((ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
- && ANYOF_POSIXL_TEST(ret, namedclass + ((namedclass % 2)
- ? -1
- : 1)))
+ if ( (anyof_flags & ANYOF_MATCHES_POSIXL)
+ && POSIXL_TEST(posixl, namedclass + ((namedclass % 2)
+ ? -1
+ : 1)))
{
posixl_matches_all = TRUE;
break; /* No need to continue. Since it matches both
@@ -17214,7 +17201,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
/* Add this class to those that should be checked at runtime */
- ANYOF_POSIXL_SET(ret, namedclass);
+ POSIXL_SET(posixl, namedclass);
/* The above-Latin1 characters are not subject to locale rules.
* Just add them, in the second pass, to the
@@ -17734,7 +17721,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
RExC_copy_start_in_constructed = RExC_start + constructed_prefix_len;
RExC_end = RExC_parse + len;
RExC_in_multi_char_class = 1;
- RExC_emit = (regnode *)orig_emit;
ret = reg(pRExC_state, 1, &reg_flags, depth+1);
@@ -17910,23 +17896,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* an optimization */
if (op != END) {
- /* Throw away this ANYOF regnode, and emit the calculated one,
+ /* Emit the calculated regnode,
* which should correspond to the beginning, not current, state of
* the parse */
const char * cur_parse = RExC_parse;
RExC_parse = (char *)orig_parse;
- if ( SIZE_ONLY) {
- if (! LOC) {
-
- /* To get locale nodes to not use the full ANYOF size would
- * require moving the code above that writes the portions
- * of it that aren't in other nodes to after this point.
- * e.g. ANYOF_POSIXL_SET */
- RExC_size = orig_size;
- }
- }
- else {
- RExC_emit = (regnode *)orig_emit;
if (PL_regkind[op] == POSIXD) {
if (op == POSIXL) {
RExC_contains_locale = 1;
@@ -17935,7 +17909,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
op += NPOSIXD - POSIXD;
}
}
- }
ret = reg_node(pRExC_state, op);
@@ -17965,9 +17938,26 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
}
- if (SIZE_ONLY)
+ /* Assume we are going to generate an ANYOF-type node. */
+ op = (posixl)
+ ? ANYOFPOSIXL
+ : (LOC)
+ ? ANYOFL
+ : ANYOF;
+ ret = reganode(pRExC_state, op, 0);
+
+ if (SIZE_ONLY) {
+ RExC_size += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP + 1;
return ret;
+ }
+
/****** !SIZE_ONLY (Pass 2) AFTER HERE *********/
+ RExC_emit += (op == ANYOFPOSIXL) ? ANYOF_POSIXL_SKIP : ANYOF_SKIP;
+
+ ANYOF_FLAGS(ret) = anyof_flags;
+ if (posixl) {
+ ANYOF_POSIXL_SET_TO_BITMAP(ret, posixl);
+ }
/* If folding, we calculate all characters that could fold to or from the
* ones already on the list */
@@ -19925,7 +19915,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
const bool inverted = flags & ANYOF_INVERT;
- if (OP(o) == ANYOFL) {
+ if (OP(o) == ANYOFL || OP(o) == ANYOFPOSIXL) {
if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
sv_catpvs(sv, "{utf8-locale-reqd}");
}
@@ -21195,7 +21185,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
not_utf8 = invlist_clone(PL_UpperLatin1, NULL);
}
}
- else if (OP(node) == ANYOFL) {
+ else if (OP(node) == ANYOFL || OP(node) == ANYOFPOSIXL) {
/* If either of these flags are set, what matches isn't
* determinable except during execution, so don't know enough here
diff --git a/t/re/anyof.t b/t/re/anyof.t
index 12fb9b3a8c..6970a40059 100644
--- a/t/re/anyof.t
+++ b/t/re/anyof.t
@@ -50,8 +50,8 @@ my @tests = (
'ebcdic_ok_below_this_marker',
'(?l:[\x{212A}])' => 'ANYOFL[212A]',
- '(?l:[\s\x{212A}])' => 'ANYOFL[\s][1680 2000-200A 2028-2029 202F 205F 212A 3000]',
- '(?l:[^\S\x{202F}])' => 'ANYOFL[^\\S][1680 2000-200A 2028-2029 205F 3000]',
+ '(?l:[\s\x{212A}])' => 'ANYOFPOSIXL[\s][1680 2000-200A 2028-2029 202F 205F 212A 3000]',
+ '(?l:[^\S\x{202F}])' => 'ANYOFPOSIXL[^\\S][1680 2000-200A 2028-2029 205F 3000]',
'(?i:[^:])' => 'ANYOF[^:][0100-INFINITY]',
'[\p{Any}]' => 'ANYOF[\x00-\xFF][0100-10FFFF]',
'[\p{IsMyRuntimeProperty}]' => 'ANYOF[+utf8::IsMyRuntimeProperty]',