summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2006-10-26 16:59:11 +0200
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2006-10-26 15:58:18 +0000
commit7f69552c33ff9bd1eb6665f732d0f22956ac2f30 (patch)
tree347e535bd40a59d18a31921fa858fa5f56e0145b /regcomp.c
parent258133d1989d727199a2ae29d4f498d5d7e9a2f9 (diff)
downloadperl-7f69552c33ff9bd1eb6665f732d0f22956ac2f30.tar.gz
Fix a problem with jump-tries, add (?FAIL) pattern.
Message-ID: <9b18b3110610260559k3efa98barc28987e88c581a8a@mail.gmail.com> p4raw-id: //depot/perl@29118
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c78
1 files changed, 53 insertions, 25 deletions
diff --git a/regcomp.c b/regcomp.c
index 25dc17f620..be8be1ba7d 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1174,7 +1174,7 @@ is the recommended Unicode-aware way of saying
if ( noper_next < tail ) { \
if (!trie->jump) \
Newxz( trie->jump, word_count + 1, U16); \
- trie->jump[curword] = (U16)(tail - noper_next); \
+ trie->jump[curword] = (U16)(noper_next - convert); \
if (!jumper) \
jumper = noper_next; \
if (!nextbranch) \
@@ -1225,6 +1225,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
U32 next_alloc = 0;
regnode *jumper = NULL;
regnode *nextbranch = NULL;
+ regnode *convert = NULL;
/* we just use folder as a flag in utf8 */
const U8 * const folder = ( flags == EXACTF
? PL_fold
@@ -1273,6 +1274,16 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
REG_NODE_NUM(last), REG_NODE_NUM(tail),
(int)depth);
});
+
+ /* Find the node we are going to overwrite */
+ if ( first == startbranch && OP( last ) != BRANCH ) {
+ /* whole branch chain */
+ convert = first;
+ } else {
+ /* branch sub-chain */
+ convert = NEXTOPER( first );
+ }
+
/* -- First loop and Setup --
We first traverse the branches and scan each word to determine if it
@@ -1770,7 +1781,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
);
{ /* Modify the program and insert the new TRIE node*/
- regnode *convert;
U8 nodetype =(U8)(flags & 0xFF);
char *str=NULL;
@@ -1788,23 +1798,22 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
the whole branch sequence, including the first.
*/
/* Find the node we are going to overwrite */
- if ( first == startbranch && OP( last ) != BRANCH ) {
- /* whole branch chain */
- convert = first;
- DEBUG_r({
- const regnode *nop = NEXTOPER( convert );
- mjd_offset= Node_Offset((nop));
- mjd_nodelen= Node_Length((nop));
- });
- } else {
+ if ( first != startbranch || OP( last ) == BRANCH ) {
/* branch sub-chain */
- convert = NEXTOPER( first );
NEXT_OFF( first ) = (U16)(last - first);
DEBUG_r({
mjd_offset= Node_Offset((convert));
mjd_nodelen= Node_Length((convert));
});
+ /* whole branch chain */
+ } else {
+ DEBUG_r({
+ const regnode *nop = NEXTOPER( convert );
+ mjd_offset= Node_Offset((nop));
+ mjd_nodelen= Node_Length((nop));
+ });
}
+
DEBUG_OPTIMISE_r(
PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n",
(int)depth * 2 + 2, "",
@@ -1917,7 +1926,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs
jump[0], which is otherwise unused by the jump logic.
We use this when dumping a trie and during optimisation. */
if (trie->jump)
- trie->jump[0] = (U16)(tail - nextbranch);
+ trie->jump[0] = (U16)(nextbranch - convert);
/* XXXX */
if ( !trie->states[trie->startstate].wordnum && trie->bitmap &&
@@ -2091,7 +2100,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode
SV * const mysv=sv_newmortal(); \
regnode *Next = regnext(scan); \
regprop(RExC_rx, mysv, scan); \
- PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s [%d]\n", \
+ PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
(int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
Next ? (REG_NODE_NUM(Next)) : 0 ); \
});
@@ -3483,6 +3492,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* NOTE - There is similar code to this block above for handling
BRANCH nodes on the initial study. If you change stuff here
check there too. */
+ regnode *trie_node= scan;
regnode *tail= regnext(scan);
reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ];
I32 max1 = 0, min1 = I32_MAX;
@@ -3523,8 +3533,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (trie->jump[word]) {
if (!nextbranch)
- nextbranch = tail - trie->jump[0];
- scan= tail - trie->jump[word];
+ nextbranch = trie_node + trie->jump[0];
+ scan= trie_node + trie->jump[word];
/* We go from the jump point to the branch that follows
it. Note this means we need the vestigal unused branches
even though they arent otherwise used.
@@ -3855,7 +3865,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
r->paren_names = 0;
if (RExC_seen & REG_SEEN_RECURSE) {
- Newx(RExC_parens, RExC_npar,regnode *);
+ Newxz(RExC_parens, RExC_npar,regnode *);
SAVEFREEPV(RExC_parens);
}
@@ -4568,10 +4578,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
RExC_parse++;
case '=': /* (?=...) */
case '!': /* (?!...) */
+ if (*RExC_parse == ')')
+ goto do_op_fail;
RExC_seen_zerolen++;
case ':': /* (?:...) */
case '>': /* (?>...) */
break;
+ case 'F':
+ if (RExC_parse[0] == 'A' &&
+ RExC_parse[1] == 'I' &&
+ RExC_parse[2] == 'L')
+ RExC_parse+=3;
+ if (*RExC_parse != ')')
+ vFAIL("Sequence (?FAIL) or (?F) not terminated");
+ do_op_fail:
+ ret = reg_node(pRExC_state, OPFAIL);
+ nextchar(pRExC_state);
+ return ret;
+ break;
case '$': /* (?$...) */
case '@': /* (?@...) */
vFAIL2("Sequence (?%c...) not implemented", (int)paren);
@@ -4588,8 +4612,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
case 'R' : /* (?R) */
if (*RExC_parse != ')')
FAIL("Sequence (?R) not terminated");
- reg_node(pRExC_state, SRECURSE);
- break; /* (?PARNO) */
+ ret = reg_node(pRExC_state, SRECURSE);
+ nextchar(pRExC_state);
+ return ret;
+ /*notreached*/
{ /* named and numeric backreferences */
I32 num;
char * parse_start;
@@ -8442,6 +8468,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
DUMPUNTIL(NEXTOPER(node), next);
}
else if ( PL_regkind[(U8)op] == TRIE ) {
+ const regnode *this_trie = node;
const char op = OP(node);
const I32 n = ARG(node);
const reg_ac_data * const ac = op>=AHOCORASICK ?
@@ -8462,18 +8489,19 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
PL_colors[0], PL_colors[1],
(SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) |
PERL_PV_PRETTY_ELIPSES |
- PERL_PV_PRETTY_LTGT
+ PERL_PV_PRETTY_LTGT
)
: "???"
);
if (trie->jump) {
- U16 dist= trie->jump[word_idx+1];
- PerlIO_printf(Perl_debug_log, "(%u)\n",(next - dist) - start);
+ U16 dist = trie->jump[word_idx+1];
+ PerlIO_printf(Perl_debug_log, "(%u)\n",
+ (dist ? this_trie + dist : next) - start);
if (dist) {
if (!nextbranch)
- nextbranch= next - trie->jump[0];
- DUMPUNTIL(next - dist, nextbranch);
- }
+ nextbranch = this_trie + trie->jump[0];
+ DUMPUNTIL(this_trie + dist, nextbranch);
+ }
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode *)nextbranch);
} else {