diff options
-rw-r--r-- | regcomp.c | 78 | ||||
-rw-r--r-- | regcomp.sym | 7 | ||||
-rw-r--r-- | regexec.c | 23 | ||||
-rw-r--r-- | regexp.h | 4 | ||||
-rw-r--r-- | regnodes.h | 137 | ||||
-rw-r--r-- | t/op/re_tests | 33 |
6 files changed, 175 insertions, 107 deletions
@@ -1174,7 +1174,7 @@ is the recommended Unicode-aware way of saying if ( noper_next < tail ) { \ if (!trie->jump) \ Newxz( trie->jump, word_count + 1, U16); \ - trie->jump[curword] = (U16)(tail - noper_next); \ + trie->jump[curword] = (U16)(noper_next - convert); \ if (!jumper) \ jumper = noper_next; \ if (!nextbranch) \ @@ -1225,6 +1225,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs U32 next_alloc = 0; regnode *jumper = NULL; regnode *nextbranch = NULL; + regnode *convert = NULL; /* we just use folder as a flag in utf8 */ const U8 * const folder = ( flags == EXACTF ? PL_fold @@ -1273,6 +1274,16 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs REG_NODE_NUM(last), REG_NODE_NUM(tail), (int)depth); }); + + /* Find the node we are going to overwrite */ + if ( first == startbranch && OP( last ) != BRANCH ) { + /* whole branch chain */ + convert = first; + } else { + /* branch sub-chain */ + convert = NEXTOPER( first ); + } + /* -- First loop and Setup -- We first traverse the branches and scan each word to determine if it @@ -1770,7 +1781,6 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs ); { /* Modify the program and insert the new TRIE node*/ - regnode *convert; U8 nodetype =(U8)(flags & 0xFF); char *str=NULL; @@ -1788,23 +1798,22 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs the whole branch sequence, including the first. */ /* Find the node we are going to overwrite */ - if ( first == startbranch && OP( last ) != BRANCH ) { - /* whole branch chain */ - convert = first; - DEBUG_r({ - const regnode *nop = NEXTOPER( convert ); - mjd_offset= Node_Offset((nop)); - mjd_nodelen= Node_Length((nop)); - }); - } else { + if ( first != startbranch || OP( last ) == BRANCH ) { /* branch sub-chain */ - convert = NEXTOPER( first ); NEXT_OFF( first ) = (U16)(last - first); DEBUG_r({ mjd_offset= Node_Offset((convert)); mjd_nodelen= Node_Length((convert)); }); + /* whole branch chain */ + } else { + DEBUG_r({ + const regnode *nop = NEXTOPER( convert ); + mjd_offset= Node_Offset((nop)); + mjd_nodelen= Node_Length((nop)); + }); } + DEBUG_OPTIMISE_r( PerlIO_printf(Perl_debug_log, "%*sMJD offset:%"UVuf" MJD length:%"UVuf"\n", (int)depth * 2 + 2, "", @@ -1917,7 +1926,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, regnode *firs jump[0], which is otherwise unused by the jump logic. We use this when dumping a trie and during optimisation. */ if (trie->jump) - trie->jump[0] = (U16)(tail - nextbranch); + trie->jump[0] = (U16)(nextbranch - convert); /* XXXX */ if ( !trie->states[trie->startstate].wordnum && trie->bitmap && @@ -2091,7 +2100,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode SV * const mysv=sv_newmortal(); \ regnode *Next = regnext(scan); \ regprop(RExC_rx, mysv, scan); \ - PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s [%d]\n", \ + PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \ (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\ Next ? (REG_NODE_NUM(Next)) : 0 ); \ }); @@ -3483,6 +3492,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, /* NOTE - There is similar code to this block above for handling BRANCH nodes on the initial study. If you change stuff here check there too. */ + regnode *trie_node= scan; regnode *tail= regnext(scan); reg_trie_data *trie = (reg_trie_data*)RExC_rx->data->data[ ARG(scan) ]; I32 max1 = 0, min1 = I32_MAX; @@ -3523,8 +3533,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (trie->jump[word]) { if (!nextbranch) - nextbranch = tail - trie->jump[0]; - scan= tail - trie->jump[word]; + nextbranch = trie_node + trie->jump[0]; + scan= trie_node + trie->jump[word]; /* We go from the jump point to the branch that follows it. Note this means we need the vestigal unused branches even though they arent otherwise used. @@ -3855,7 +3865,7 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->paren_names = 0; if (RExC_seen & REG_SEEN_RECURSE) { - Newx(RExC_parens, RExC_npar,regnode *); + Newxz(RExC_parens, RExC_npar,regnode *); SAVEFREEPV(RExC_parens); } @@ -4568,10 +4578,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) RExC_parse++; case '=': /* (?=...) */ case '!': /* (?!...) */ + if (*RExC_parse == ')') + goto do_op_fail; RExC_seen_zerolen++; case ':': /* (?:...) */ case '>': /* (?>...) */ break; + case 'F': + if (RExC_parse[0] == 'A' && + RExC_parse[1] == 'I' && + RExC_parse[2] == 'L') + RExC_parse+=3; + if (*RExC_parse != ')') + vFAIL("Sequence (?FAIL) or (?F) not terminated"); + do_op_fail: + ret = reg_node(pRExC_state, OPFAIL); + nextchar(pRExC_state); + return ret; + break; case '$': /* (?$...) */ case '@': /* (?@...) */ vFAIL2("Sequence (?%c...) not implemented", (int)paren); @@ -4588,8 +4612,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) case 'R' : /* (?R) */ if (*RExC_parse != ')') FAIL("Sequence (?R) not terminated"); - reg_node(pRExC_state, SRECURSE); - break; /* (?PARNO) */ + ret = reg_node(pRExC_state, SRECURSE); + nextchar(pRExC_state); + return ret; + /*notreached*/ { /* named and numeric backreferences */ I32 num; char * parse_start; @@ -8442,6 +8468,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, DUMPUNTIL(NEXTOPER(node), next); } else if ( PL_regkind[(U8)op] == TRIE ) { + const regnode *this_trie = node; const char op = OP(node); const I32 n = ARG(node); const reg_ac_data * const ac = op>=AHOCORASICK ? @@ -8462,18 +8489,19 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, PL_colors[0], PL_colors[1], (SvUTF8(*elem_ptr) ? PERL_PV_ESCAPE_UNI : 0) | PERL_PV_PRETTY_ELIPSES | - PERL_PV_PRETTY_LTGT + PERL_PV_PRETTY_LTGT ) : "???" ); if (trie->jump) { - U16 dist= trie->jump[word_idx+1]; - PerlIO_printf(Perl_debug_log, "(%u)\n",(next - dist) - start); + U16 dist = trie->jump[word_idx+1]; + PerlIO_printf(Perl_debug_log, "(%u)\n", + (dist ? this_trie + dist : next) - start); if (dist) { if (!nextbranch) - nextbranch= next - trie->jump[0]; - DUMPUNTIL(next - dist, nextbranch); - } + nextbranch = this_trie + trie->jump[0]; + DUMPUNTIL(this_trie + dist, nextbranch); + } if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) nextbranch= regnext((regnode *)nextbranch); } else { diff --git a/regcomp.sym b/regcomp.sym index 561b25d101..73e27a80ed 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -164,16 +164,19 @@ NREFF NREF, no-sv 1 Match already matched string, folded NREFFL NREF, no-sv 1 Match already matched string, folded in loc. -#*Special conditionals +#*Special conditionals (70..72) NGROUPP NGROUPP, no-sv 1 Whether the group matched. RECURSEP RECURSEP, num 1 Whether we are in a specific recurse. DEFINEP DEFINEP, none 1 Never execute directly. +#*Bactracking +OPFAIL OPFAIL, none Same as (?!) + # NEW STUFF ABOVE THIS LINE -- Please update counts below. ################################################################################ -#*SPECIAL REGOPS (70, 71) +#*SPECIAL REGOPS # This is not really a node, but an optimized away piece of a "long" node. # To simplify debugging output, we mark it as if it were a node @@ -168,6 +168,7 @@ S_regcppush(pTHX_ I32 parenfloor) #define REGCP_OTHER_ELEMS 8 SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS); + for (p = PL_regsize; p > parenfloor; p--) { /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */ SSPUSHINT(PL_regendp[p]); @@ -2763,13 +2764,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) ST.accepted = 0; /* how many accepting states we have seen */ ST.B = next; ST.jump = trie->jump; - -#ifdef DEBUGGING ST.me = scan; -#endif - - /* traverse the TRIE keeping track of all accepting states we transition through until we get to a failing node. @@ -2894,10 +2890,10 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) locinput = PL_reginput; nextchr = UCHARAT(locinput); - if ( !ST.jump ) + if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum]) scan = ST.B; else - scan = ST.B - ST.jump[ST.accept_buff[0].wordnum]; + scan = ST.me + ST.jump[ST.accept_buff[0].wordnum]; continue; /* execute rest of RE */ } @@ -2943,9 +2939,9 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) SV ** const tmp = RX_DEBUG(reginfo->prog) ? av_fetch( trie->words, ST.accept_buff[ best ].wordnum - 1, 0 ) : NULL; - regnode *nextop=!ST.jump ? + regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ? ST.B : - ST.B - ST.jump[ST.accept_buff[best].wordnum]; + ST.me + ST.jump[ST.accept_buff[best].wordnum]; PerlIO_printf( Perl_debug_log, "%*s %strying alternation #%d <%s> at node #%d %s\n", REPORT_CODE_OFF+depth*2, "", PL_colors[4], @@ -2962,11 +2958,11 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) best = ST.accepted; } PL_reginput = (char *)ST.accept_buff[ best ].endpos; - if ( !ST.jump ) { + if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) { PUSH_STATE_GOTO(TRIE_next, ST.B); /* NOTREACHED */ } else { - PUSH_STATE_GOTO(TRIE_next, ST.B - ST.jump[ST.accept_buff[best].wordnum]); + PUSH_STATE_GOTO(TRIE_next, ST.me + ST.jump[ST.accept_buff[best].wordnum]); /* NOTREACHED */ } /* NOTREACHED */ @@ -3601,6 +3597,8 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog) n = ARG(scan); /* which paren pair */ PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr; PL_regendp[n] = locinput - PL_bostr; + /*if (n > PL_regsize) + PL_regsize = n;*/ if (n > (I32)*PL_reglastparen) *PL_reglastparen = n; *PL_reglastcloseparen = n; @@ -4484,7 +4482,6 @@ NULL #undef ST - case END: fake_end: if (cur_eval) { @@ -4611,6 +4608,8 @@ NULL if (next == scan) next = NULL; break; + case OPFAIL: + sayNO; default: PerlIO_printf(Perl_error_log, "%"UVxf" %d\n", PTR2UV(scan), OP(scan)); @@ -227,9 +227,9 @@ typedef struct regmatch_state { struct { reg_trie_accepted *accept_buff; U32 accepted; /* how many accepting states we have seen */ - U16 *jump; /* negative offsets from B */ + U16 *jump; /* positive offsets from me */ regnode *B; /* node following the trie */ - regnode *me; /* only needed for debugging */ + regnode *me; /* Which node am I - needed for jump tries*/ } trie; struct { diff --git a/regnodes.h b/regnodes.h index f7ebda17df..d6842b52c3 100644 --- a/regnodes.h +++ b/regnodes.h @@ -6,8 +6,8 @@ /* Regops and State definitions */ -#define REGNODE_MAX 74 -#define REGMATCH_STATE_MAX 104 +#define REGNODE_MAX 75 +#define REGMATCH_STATE_MAX 105 #define END 0 /* 0000 End of program. */ #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */ @@ -82,41 +82,42 @@ #define NGROUPP 70 /* 0x46 Whether the group matched. */ #define RECURSEP 71 /* 0x47 Whether we are in a specific recurse. */ #define DEFINEP 72 /* 0x48 Never execute directly. */ -#define OPTIMIZED 73 /* 0x49 Placeholder for dump. */ -#define PSEUDO 74 /* 0x4a Pseudo opcode for internal use. */ +#define OPFAIL 73 /* 0x49 Same as (?!) */ +#define OPTIMIZED 74 /* 0x4a Placeholder for dump. */ +#define PSEUDO 75 /* 0x4b Pseudo opcode for internal use. */ /* ------------ States ------------- */ -#define TRIE_next 75 /* 0x4b Regmatch state for TRIE */ -#define TRIE_next_fail 76 /* 0x4c Regmatch state for TRIE */ -#define EVAL_AB 77 /* 0x4d Regmatch state for EVAL */ -#define EVAL_AB_fail 78 /* 0x4e Regmatch state for EVAL */ -#define CURLYX_end 79 /* 0x4f Regmatch state for CURLYX */ -#define CURLYX_end_fail 80 /* 0x50 Regmatch state for CURLYX */ -#define WHILEM_A_pre 81 /* 0x51 Regmatch state for WHILEM */ -#define WHILEM_A_pre_fail 82 /* 0x52 Regmatch state for WHILEM */ -#define WHILEM_A_min 83 /* 0x53 Regmatch state for WHILEM */ -#define WHILEM_A_min_fail 84 /* 0x54 Regmatch state for WHILEM */ -#define WHILEM_A_max 85 /* 0x55 Regmatch state for WHILEM */ -#define WHILEM_A_max_fail 86 /* 0x56 Regmatch state for WHILEM */ -#define WHILEM_B_min 87 /* 0x57 Regmatch state for WHILEM */ -#define WHILEM_B_min_fail 88 /* 0x58 Regmatch state for WHILEM */ -#define WHILEM_B_max 89 /* 0x59 Regmatch state for WHILEM */ -#define WHILEM_B_max_fail 90 /* 0x5a Regmatch state for WHILEM */ -#define BRANCH_next 91 /* 0x5b Regmatch state for BRANCH */ -#define BRANCH_next_fail 92 /* 0x5c Regmatch state for BRANCH */ -#define CURLYM_A 93 /* 0x5d Regmatch state for CURLYM */ -#define CURLYM_A_fail 94 /* 0x5e Regmatch state for CURLYM */ -#define CURLYM_B 95 /* 0x5f Regmatch state for CURLYM */ -#define CURLYM_B_fail 96 /* 0x60 Regmatch state for CURLYM */ -#define IFMATCH_A 97 /* 0x61 Regmatch state for IFMATCH */ -#define IFMATCH_A_fail 98 /* 0x62 Regmatch state for IFMATCH */ -#define CURLY_B_min_known 99 /* 0x63 Regmatch state for CURLY */ -#define CURLY_B_min_known_fail 100 /* 0x64 Regmatch state for CURLY */ -#define CURLY_B_min 101 /* 0x65 Regmatch state for CURLY */ -#define CURLY_B_min_fail 102 /* 0x66 Regmatch state for CURLY */ -#define CURLY_B_max 103 /* 0x67 Regmatch state for CURLY */ -#define CURLY_B_max_fail 104 /* 0x68 Regmatch state for CURLY */ +#define TRIE_next 76 /* 0x4c Regmatch state for TRIE */ +#define TRIE_next_fail 77 /* 0x4d Regmatch state for TRIE */ +#define EVAL_AB 78 /* 0x4e Regmatch state for EVAL */ +#define EVAL_AB_fail 79 /* 0x4f Regmatch state for EVAL */ +#define CURLYX_end 80 /* 0x50 Regmatch state for CURLYX */ +#define CURLYX_end_fail 81 /* 0x51 Regmatch state for CURLYX */ +#define WHILEM_A_pre 82 /* 0x52 Regmatch state for WHILEM */ +#define WHILEM_A_pre_fail 83 /* 0x53 Regmatch state for WHILEM */ +#define WHILEM_A_min 84 /* 0x54 Regmatch state for WHILEM */ +#define WHILEM_A_min_fail 85 /* 0x55 Regmatch state for WHILEM */ +#define WHILEM_A_max 86 /* 0x56 Regmatch state for WHILEM */ +#define WHILEM_A_max_fail 87 /* 0x57 Regmatch state for WHILEM */ +#define WHILEM_B_min 88 /* 0x58 Regmatch state for WHILEM */ +#define WHILEM_B_min_fail 89 /* 0x59 Regmatch state for WHILEM */ +#define WHILEM_B_max 90 /* 0x5a Regmatch state for WHILEM */ +#define WHILEM_B_max_fail 91 /* 0x5b Regmatch state for WHILEM */ +#define BRANCH_next 92 /* 0x5c Regmatch state for BRANCH */ +#define BRANCH_next_fail 93 /* 0x5d Regmatch state for BRANCH */ +#define CURLYM_A 94 /* 0x5e Regmatch state for CURLYM */ +#define CURLYM_A_fail 95 /* 0x5f Regmatch state for CURLYM */ +#define CURLYM_B 96 /* 0x60 Regmatch state for CURLYM */ +#define CURLYM_B_fail 97 /* 0x61 Regmatch state for CURLYM */ +#define IFMATCH_A 98 /* 0x62 Regmatch state for IFMATCH */ +#define IFMATCH_A_fail 99 /* 0x63 Regmatch state for IFMATCH */ +#define CURLY_B_min_known 100 /* 0x64 Regmatch state for CURLY */ +#define CURLY_B_min_known_fail 101 /* 0x65 Regmatch state for CURLY */ +#define CURLY_B_min 102 /* 0x66 Regmatch state for CURLY */ +#define CURLY_B_min_fail 103 /* 0x67 Regmatch state for CURLY */ +#define CURLY_B_max 104 /* 0x68 Regmatch state for CURLY */ +#define CURLY_B_max_fail 105 /* 0x69 Regmatch state for CURLY */ /* PL_regkind[] What type of regop or state is this. */ @@ -197,6 +198,7 @@ EXTCONST U8 PL_regkind[] = { NGROUPP, /* NGROUPP */ RECURSEP, /* RECURSEP */ DEFINEP, /* DEFINEP */ + OPFAIL, /* OPFAIL */ NOTHING, /* OPTIMIZED */ PSEUDO, /* PSEUDO */ /* ------------ States ------------- */ @@ -310,6 +312,7 @@ static const U8 regarglen[] = { EXTRA_SIZE(struct regnode_1), /* NGROUPP */ EXTRA_SIZE(struct regnode_1), /* RECURSEP */ EXTRA_SIZE(struct regnode_1), /* DEFINEP */ + 0, /* OPFAIL */ 0, /* OPTIMIZED */ 0, /* PSEUDO */ }; @@ -390,6 +393,7 @@ static const char reg_off_by_arg[] = { 0, /* NGROUPP */ 0, /* RECURSEP */ 0, /* DEFINEP */ + 0, /* OPFAIL */ 0, /* OPTIMIZED */ 0, /* PSEUDO */ }; @@ -471,39 +475,40 @@ const char * reg_name[] = { "NGROUPP", /* 0x46 */ "RECURSEP", /* 0x47 */ "DEFINEP", /* 0x48 */ - "OPTIMIZED", /* 0x49 */ - "PSEUDO", /* 0x4a */ + "OPFAIL", /* 0x49 */ + "OPTIMIZED", /* 0x4a */ + "PSEUDO", /* 0x4b */ /* ------------ States ------------- */ - "TRIE_next", /* 0x4b */ - "TRIE_next_fail", /* 0x4c */ - "EVAL_AB", /* 0x4d */ - "EVAL_AB_fail", /* 0x4e */ - "CURLYX_end", /* 0x4f */ - "CURLYX_end_fail", /* 0x50 */ - "WHILEM_A_pre", /* 0x51 */ - "WHILEM_A_pre_fail", /* 0x52 */ - "WHILEM_A_min", /* 0x53 */ - "WHILEM_A_min_fail", /* 0x54 */ - "WHILEM_A_max", /* 0x55 */ - "WHILEM_A_max_fail", /* 0x56 */ - "WHILEM_B_min", /* 0x57 */ - "WHILEM_B_min_fail", /* 0x58 */ - "WHILEM_B_max", /* 0x59 */ - "WHILEM_B_max_fail", /* 0x5a */ - "BRANCH_next", /* 0x5b */ - "BRANCH_next_fail", /* 0x5c */ - "CURLYM_A", /* 0x5d */ - "CURLYM_A_fail", /* 0x5e */ - "CURLYM_B", /* 0x5f */ - "CURLYM_B_fail", /* 0x60 */ - "IFMATCH_A", /* 0x61 */ - "IFMATCH_A_fail", /* 0x62 */ - "CURLY_B_min_known", /* 0x63 */ - "CURLY_B_min_known_fail", /* 0x64 */ - "CURLY_B_min", /* 0x65 */ - "CURLY_B_min_fail", /* 0x66 */ - "CURLY_B_max", /* 0x67 */ - "CURLY_B_max_fail", /* 0x68 */ + "TRIE_next", /* 0x4c */ + "TRIE_next_fail", /* 0x4d */ + "EVAL_AB", /* 0x4e */ + "EVAL_AB_fail", /* 0x4f */ + "CURLYX_end", /* 0x50 */ + "CURLYX_end_fail", /* 0x51 */ + "WHILEM_A_pre", /* 0x52 */ + "WHILEM_A_pre_fail", /* 0x53 */ + "WHILEM_A_min", /* 0x54 */ + "WHILEM_A_min_fail", /* 0x55 */ + "WHILEM_A_max", /* 0x56 */ + "WHILEM_A_max_fail", /* 0x57 */ + "WHILEM_B_min", /* 0x58 */ + "WHILEM_B_min_fail", /* 0x59 */ + "WHILEM_B_max", /* 0x5a */ + "WHILEM_B_max_fail", /* 0x5b */ + "BRANCH_next", /* 0x5c */ + "BRANCH_next_fail", /* 0x5d */ + "CURLYM_A", /* 0x5e */ + "CURLYM_A_fail", /* 0x5f */ + "CURLYM_B", /* 0x60 */ + "CURLYM_B_fail", /* 0x61 */ + "IFMATCH_A", /* 0x62 */ + "IFMATCH_A_fail", /* 0x63 */ + "CURLY_B_min_known", /* 0x64 */ + "CURLY_B_min_known_fail", /* 0x65 */ + "CURLY_B_min", /* 0x66 */ + "CURLY_B_min_fail", /* 0x67 */ + "CURLY_B_max", /* 0x68 */ + "CURLY_B_max_fail", /* 0x69 */ }; #endif /* DEBUGGING */ #else diff --git a/t/op/re_tests b/t/op/re_tests index dbbe993073..9b9e5f8056 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -1147,3 +1147,36 @@ foo(?:aA|bB)?+b foobBb y $& foobBb ([^()]++|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x round\(([^()]++)\) _I(round(xs * sz),1) y $1 xs * sz +(foo[1x]|bar[2x]|baz[3x])+y foo1bar2baz3y y $1 baz3 +(foo[1x]|bar[2x]|baz[3x])+y foo1bar2baz3y y $& foo1bar2baz3y +(foo[1x]|bar[2x]|baz[3x])*y foo1bar2baz3y y $1 baz3 +(foo[1x]|bar[2x]|baz[3x])*y foo1bar2baz3y y $& foo1bar2baz3y + +([yX].|WORDS|[yX].|WORD)S WORDS y $1 WORD +(WORDS|WORLD|WORD)S WORDS y $1 WORD +([yX].|WORDS|WORD|[xY].)S WORDS y $1 WORD +(foo|fool|[zx].|money|parted)$ fool y $1 fool +([zx].|foo|fool|[zq].|money|parted|[yx].)$ fool y $1 fool +(foo|fool|[zx].|money|parted)$ fools n - - +([zx].|foo|fool|[qx].|money|parted|[py].)$ fools n - - + +([yX].|WORDS|[yX].|WORD)+S WORDS y $1 WORD +(WORDS|WORLD|WORD)+S WORDS y $1 WORD +([yX].|WORDS|WORD|[xY].)+S WORDS y $1 WORD +(foo|fool|[zx].|money|parted)+$ fool y $1 fool +([zx].|foo|fool|[zq].|money|parted|[yx].)+$ fool y $1 fool +(foo|fool|[zx].|money|parted)+$ fools n - - +([zx].|foo|fool|[qx].|money|parted|[py].)+$ fools n - - + +(x|y|z[QW])+(longish|loquatious|excessive|overblown[QW])+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW +(x|y|z[QW])*(longish|loquatious|excessive|overblown[QW])* xyzQzWlongishoverblownW y $1-$2 zW-overblownW +(x|y|z[QW]){1,5}(longish|loquatious|excessive|overblown[QW]){1,5} xyzQzWlongishoverblownW y $1-$2 zW-overblownW + +(x|y|z[QW])++(longish|loquatious|excessive|overblown[QW])++ xyzQzWlongishoverblownW y $1-$2 zW-overblownW +(x|y|z[QW])*+(longish|loquatious|excessive|overblown[QW])*+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW +(x|y|z[QW]){1,5}+(longish|loquatious|excessive|overblown[QW]){1,5}+ xyzQzWlongishoverblownW y $1-$2 zW-overblownW + + +a*(?!) aaaab n - - +a*(?FAIL) aaaab n - - +a*(?F) aaaab n - - |