diff options
author | Karl Williamson <khw@cpan.org> | 2020-11-15 21:06:11 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-01-20 06:51:49 -0700 |
commit | a7b8d88a7db0f93e2ec0bef63f0460d0d3247b10 (patch) | |
tree | a3d531e8f5d6476f3bbe556398a2c76c34d30f32 /regcomp.c | |
parent | 012ac233b0f87e11d3ffed84dbca75e927e854aa (diff) | |
download | perl-a7b8d88a7db0f93e2ec0bef63f0460d0d3247b10.tar.gz |
regcomp.c: Change names of 2 macros for mnemonics
The new names are more understandable to me. This also adds a second
parameter to one macro, that is unused until the next commit in the
series.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 5253 |
1 files changed, 2627 insertions, 2626 deletions
@@ -224,11 +224,11 @@ struct RExC_state_t { regnode *end_op; /* END node in program */ I32 utf8; /* whether the pattern is utf8 or not */ I32 orig_utf8; /* whether the pattern was originally in utf8 */ - /* XXX use this for future optimisation of case - * where pattern must be upgraded to utf8. */ + /* XXX use this for future optimisation of case + * where pattern must be upgraded to utf8. */ I32 uni_semantics; /* If a d charset modifier should use unicode - rules, even if the pattern is not in - utf8 */ + rules, even if the pattern is not in + utf8 */ I32 recurse_count; /* Number of recurse regops we have generated */ regnode **recurse; /* Recurse regops */ @@ -242,7 +242,7 @@ struct RExC_state_t { I32 in_multi_char_class; int code_index; /* next code_blocks[] slot */ struct reg_code_blocks *code_blocks;/* positions of literal (?{}) - within pattern */ + within pattern */ SSize_t maxlen; /* mininum possible number of chars in string to match */ scan_frame *frame_head; scan_frame *frame_last; @@ -360,8 +360,9 @@ struct RExC_state_t { if (RExC_naughty < TOO_NAUGHTY) \ RExC_naughty += RExC_naughty / (exp) + (add) -#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?') -#define ISMULT2(s) (ISMULT1(*s) || ((*s) == '{' && regcurly(s))) +#define isNON_BRACE_QUANTIFIER(c) ((c) == '*' || (c) == '+' || (c) == '?') +#define isQUANTIFIER(s,e) ( isNON_BRACE_QUANTIFIER(*s) \ + || ((*s) == '{' && regcurly(s))) /* * Flags to be passed up and down. @@ -801,23 +802,23 @@ static const scan_data_t zero_scan_data = { #define _FAIL(code) STMT_START { \ const char *ellipses = ""; \ IV len = RExC_precomp_end - RExC_precomp; \ - \ + \ PREPARE_TO_DIE; \ if (len > RegexLengthToShowInErrorMessages) { \ - /* chop 10 shorter than the max, to ensure meaning of "..." */ \ - len = RegexLengthToShowInErrorMessages - 10; \ - ellipses = "..."; \ + /* chop 10 shorter than the max, to ensure meaning of "..." */ \ + len = RegexLengthToShowInErrorMessages - 10; \ + ellipses = "..."; \ } \ code; \ } STMT_END #define FAIL(msg) _FAIL( \ Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/", \ - msg, UTF8fARG(UTF, len, RExC_precomp), ellipses)) + msg, UTF8fARG(UTF, len, RExC_precomp), ellipses)) #define FAIL2(msg,arg) _FAIL( \ Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/", \ - arg, UTF8fARG(UTF, len, RExC_precomp), ellipses)) + arg, UTF8fARG(UTF, len, RExC_precomp), ellipses)) #define FAIL3(msg,arg1,arg2) _FAIL( \ Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/", \ @@ -828,7 +829,7 @@ static const scan_data_t zero_scan_data = { */ #define Simple_vFAIL(m) STMT_START { \ Perl_croak(aTHX_ "%s" REPORT_LOCATION, \ - m, REPORT_LOCATION_ARGS(RExC_parse)); \ + m, REPORT_LOCATION_ARGS(RExC_parse)); \ } STMT_END /* @@ -861,7 +862,7 @@ static const scan_data_t zero_scan_data = { */ #define Simple_vFAIL3(m, a1, a2) STMT_START { \ S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, \ - REPORT_LOCATION_ARGS(RExC_parse)); \ + REPORT_LOCATION_ARGS(RExC_parse)); \ } STMT_END /* @@ -877,7 +878,7 @@ static const scan_data_t zero_scan_data = { */ #define Simple_vFAIL4(m, a1, a2, a3) STMT_START { \ S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, a3, \ - REPORT_LOCATION_ARGS(RExC_parse)); \ + REPORT_LOCATION_ARGS(RExC_parse)); \ } STMT_END #define vFAIL4(m,a1,a2,a3) STMT_START { \ @@ -968,7 +969,7 @@ static const scan_data_t zero_scan_data = { _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), \ m REPORT_LOCATION, \ - REPORT_LOCATION_ARGS(loc))) + REPORT_LOCATION_ARGS(loc))) #define vWARN(loc, m) \ _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ @@ -980,26 +981,26 @@ static const scan_data_t zero_scan_data = { _WARN_HELPER(loc, packWARN(WARN_DEPRECATED), \ Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), \ m REPORT_LOCATION, \ - REPORT_LOCATION_ARGS(loc))) + REPORT_LOCATION_ARGS(loc))) #define ckWARNdep(loc,m) \ _WARN_HELPER(loc, packWARN(WARN_DEPRECATED), \ Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \ - m REPORT_LOCATION, \ - REPORT_LOCATION_ARGS(loc))) + m REPORT_LOCATION, \ + REPORT_LOCATION_ARGS(loc))) #define ckWARNregdep(loc,m) \ _WARN_HELPER(loc, packWARN2(WARN_DEPRECATED, WARN_REGEXP), \ Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, \ WARN_REGEXP), \ - m REPORT_LOCATION, \ - REPORT_LOCATION_ARGS(loc))) + m REPORT_LOCATION, \ + REPORT_LOCATION_ARGS(loc))) #define ckWARN2reg_d(loc,m, a1) \ _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP), \ - m REPORT_LOCATION, \ - a1, REPORT_LOCATION_ARGS(loc))) + m REPORT_LOCATION, \ + a1, REPORT_LOCATION_ARGS(loc))) #define ckWARN2reg(loc, m, a1) \ _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ @@ -1011,34 +1012,34 @@ static const scan_data_t zero_scan_data = { _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ Perl_warner(aTHX_ packWARN(WARN_REGEXP), \ m REPORT_LOCATION, \ - a1, a2, REPORT_LOCATION_ARGS(loc))) + a1, a2, REPORT_LOCATION_ARGS(loc))) #define ckWARN3reg(loc, m, a1, a2) \ _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), \ m REPORT_LOCATION, \ - a1, a2, \ + a1, a2, \ REPORT_LOCATION_ARGS(loc))) #define vWARN4(loc, m, a1, a2, a3) \ _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ Perl_warner(aTHX_ packWARN(WARN_REGEXP), \ m REPORT_LOCATION, \ - a1, a2, a3, \ + a1, a2, a3, \ REPORT_LOCATION_ARGS(loc))) #define ckWARN4reg(loc, m, a1, a2, a3) \ _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), \ m REPORT_LOCATION, \ - a1, a2, a3, \ + a1, a2, a3, \ REPORT_LOCATION_ARGS(loc))) #define vWARN5(loc, m, a1, a2, a3, a4) \ _WARN_HELPER(loc, packWARN(WARN_REGEXP), \ Perl_warner(aTHX_ packWARN(WARN_REGEXP), \ m REPORT_LOCATION, \ - a1, a2, a3, a4, \ + a1, a2, a3, a4, \ REPORT_LOCATION_ARGS(loc))) #define ckWARNexperimental(loc, class, m) \ @@ -1080,14 +1081,14 @@ static const scan_data_t zero_scan_data = { #define ProgLen(ri) ri->u.offsets[0] #define SetProgLen(ri,x) ri->u.offsets[0] = x #define Set_Node_Offset_To_R(offset,byte) STMT_START { \ - MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \ - __LINE__, (int)(offset), (int)(byte))); \ - if((offset) < 0) { \ - Perl_croak(aTHX_ "value of node is %d in Offset macro", \ + MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \ + __LINE__, (int)(offset), (int)(byte))); \ + if((offset) < 0) { \ + Perl_croak(aTHX_ "value of node is %d in Offset macro", \ (int)(offset)); \ - } else { \ + } else { \ RExC_offsets[2*(offset)-1] = (byte); \ - } \ + } \ } STMT_END #define Set_Node_Offset(node,byte) \ @@ -1095,14 +1096,14 @@ static const scan_data_t zero_scan_data = { #define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse) #define Set_Node_Length_To_R(node,len) STMT_START { \ - MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n", \ - __LINE__, (int)(node), (int)(len))); \ - if((node) < 0) { \ - Perl_croak(aTHX_ "value of node is %d in Length macro", \ + MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n", \ + __LINE__, (int)(node), (int)(len))); \ + if((node) < 0) { \ + Perl_croak(aTHX_ "value of node is %d in Length macro", \ (int)(node)); \ - } else { \ - RExC_offsets[2*(node)] = (len); \ - } \ + } else { \ + RExC_offsets[2*(node)] = (len); \ + } \ } STMT_END #define Set_Node_Length(node,len) \ @@ -1476,13 +1477,13 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) { const U8 i = data->cur_is_floating; - SvSetMagicSV(longest_sv, data->last_found); + SvSetMagicSV(longest_sv, data->last_found); data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min; - if (!i) /* fixed */ - data->substrs[0].max_offset = data->substrs[0].min_offset; - else { /* float */ - data->substrs[1].max_offset = + if (!i) /* fixed */ + data->substrs[0].max_offset = data->substrs[0].min_offset; + else { /* float */ + data->substrs[1].max_offset = (is_inf) ? OPTIMIZE_INFTY : (l @@ -1490,8 +1491,8 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, /* temporary underflow guard for 5.32 */ : data->pos_delta < 0 ? OPTIMIZE_INFTY : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min - ? OPTIMIZE_INFTY - : data->pos_min + data->pos_delta)); + ? OPTIMIZE_INFTY + : data->pos_min + data->pos_delta)); } data->substrs[i].flags &= ~SF_BEFORE_EOL; @@ -1502,12 +1503,12 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data, SvCUR_set(data->last_found, 0); { - SV * const sv = data->last_found; - if (SvUTF8(sv) && SvMAGICAL(sv)) { - MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8); - if (mg) - mg->mg_len = 0; - } + SV * const sv = data->last_found; + if (SvUTF8(sv) && SvMAGICAL(sv)) { + MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8); + if (mg) + mg->mg_len = 0; + } } data->last_end = -1; data->flags &= ~SF_BEFORE_EOL; @@ -1596,10 +1597,10 @@ S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc) * test cases for locale, many parts of it may not work properly, it is * safest to avoid locale unless necessary. */ if (RExC_contains_locale) { - ANYOF_POSIXL_SETALL(ssc); + ANYOF_POSIXL_SETALL(ssc); } else { - ANYOF_POSIXL_ZERO(ssc); + ANYOF_POSIXL_ZERO(ssc); } } @@ -2254,7 +2255,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc) STATIC void S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, - AV *revcharmap, U32 depth) + AV *revcharmap, U32 depth) { U32 state; SV *sv=sv_newmortal(); @@ -2268,14 +2269,14 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, depth+1, "Match","Base","Ofs" ); for( state = 0 ; state < trie->uniquecharcount ; state++ ) { - SV ** const tmp = av_fetch( revcharmap, state, 0); + SV ** const tmp = av_fetch( revcharmap, state, 0); if ( tmp ) { Perl_re_printf( aTHX_ "%*s", colwidth, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth, - PL_colors[0], PL_colors[1], - (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) | - PERL_PV_ESCAPE_FIRSTCHAR + PL_colors[0], PL_colors[1], + (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) | + PERL_PV_ESCAPE_FIRSTCHAR ) ); } @@ -2288,7 +2289,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, Perl_re_printf( aTHX_ "\n"); for( state = 1 ; state < trie->statecount ; state++ ) { - const U32 base = trie->states[ state ].trans.base; + const U32 base = trie->states[ state ].trans.base; Perl_re_indentf( aTHX_ "#%4" UVXf "|", depth+1, (UV)state); @@ -2335,8 +2336,8 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, depth); for (word=1; word <= trie->wordcount; word++) { Perl_re_printf( aTHX_ " %d:(%d,%d)", - (int)word, (int)(trie->wordinfo[word].prev), - (int)(trie->wordinfo[word].len)); + (int)word, (int)(trie->wordinfo[word].prev), + (int)(trie->wordinfo[word].len)); } Perl_re_printf( aTHX_ "\n" ); } @@ -2348,8 +2349,8 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap, */ STATIC void S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, - HV *widecharmap, AV *revcharmap, U32 next_alloc, - U32 depth) + HV *widecharmap, AV *revcharmap, U32 next_alloc, + U32 depth) { U32 state; SV *sv=sv_newmortal(); @@ -2377,9 +2378,9 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, ); } for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) { - SV ** const tmp = av_fetch( revcharmap, + SV ** const tmp = av_fetch( revcharmap, TRIE_LIST_ITEM(state, charid).forid, 0); - if ( tmp ) { + if ( tmp ) { Perl_re_printf( aTHX_ "%*s:%3X=%4" UVXf " | ", colwidth, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), @@ -2408,8 +2409,8 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie, */ STATIC void S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, - HV *widecharmap, AV *revcharmap, U32 next_alloc, - U32 depth) + HV *widecharmap, AV *revcharmap, U32 next_alloc, + U32 depth) { U32 state; U16 charid; @@ -2427,14 +2428,14 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, Perl_re_indentf( aTHX_ "Char : ", depth+1 ); for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) { - SV ** const tmp = av_fetch( revcharmap, charid, 0); + SV ** const tmp = av_fetch( revcharmap, charid, 0); if ( tmp ) { Perl_re_printf( aTHX_ "%*s", colwidth, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth, - PL_colors[0], PL_colors[1], - (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) | - PERL_PV_ESCAPE_FIRSTCHAR + PL_colors[0], PL_colors[1], + (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) | + PERL_PV_ESCAPE_FIRSTCHAR ) ); } @@ -2479,9 +2480,9 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie, /* make_trie(startbranch,first,last,tail,word_count,flags,depth) startbranch: the first branch in the whole branch sequence first : start branch of sequence of branch-exact nodes. - May be the same as startbranch + May be the same as startbranch last : Thing following the last branch. - May be the same as tail. + May be the same as tail. tail : item following the branch sequence count : words in the sequence flags : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS|L|FLU8)/ @@ -2557,10 +2558,10 @@ and should turn into: 1: CURLYM[1] {1,32767}(18) 5: TRIE(16) - [Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1] - <ac> - <ad> - <ab> + [Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1] + <ac> + <ad> + <ab> 16: SUCCEED(0) 17: NOTHING(18) 18: END(0) @@ -2580,8 +2581,8 @@ and would end up looking like: 1: TRIE(8) [Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1] - <foo> - <bar> + <foo> + <bar> 7: TAIL(8) 8: EXACT <baz>(10) 10: END(0) @@ -2595,19 +2596,19 @@ is the recommended Unicode-aware way of saying #define TRIE_STORE_REVCHAR(val) \ STMT_START { \ - if (UTF) { \ + if (UTF) { \ SV *zlopp = newSV(UTF8_MAXBYTES); \ - unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp); \ + unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp); \ unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val); \ *kapow = '\0'; \ - SvCUR_set(zlopp, kapow - flrbbbbb); \ - SvPOK_on(zlopp); \ - SvUTF8_on(zlopp); \ - av_push(revcharmap, zlopp); \ - } else { \ + SvCUR_set(zlopp, kapow - flrbbbbb); \ + SvPOK_on(zlopp); \ + SvUTF8_on(zlopp); \ + av_push(revcharmap, zlopp); \ + } else { \ char ooooff = (char)val; \ - av_push(revcharmap, newSVpvn(&ooooff, 1)); \ - } \ + av_push(revcharmap, newSVpvn(&ooooff, 1)); \ + } \ } STMT_END /* This gets the next character from the input, folding it if not already @@ -2638,8 +2639,8 @@ is the recommended Unicode-aware way of saying #define TRIE_LIST_PUSH(state,fid,ns) STMT_START { \ if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) { \ - U32 ging = TRIE_LIST_LEN( state ) * 2; \ - Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \ + U32 ging = TRIE_LIST_LEN( state ) * 2; \ + Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \ TRIE_LIST_LEN( state ) = ging; \ } \ TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid; \ @@ -2649,7 +2650,7 @@ is the recommended Unicode-aware way of saying #define TRIE_LIST_NEW(state) STMT_START { \ Newx( trie->states[ state ].trans.list, \ - 4, reg_trie_trans_le ); \ + 4, reg_trie_trans_le ); \ TRIE_LIST_CUR( state ) = 1; \ TRIE_LIST_LEN( state ) = 4; \ } STMT_END @@ -2688,8 +2689,8 @@ is the recommended Unicode-aware way of saying /* It's a dupe. Pre-insert into the wordinfo[].prev */\ /* chain, so that when the bits of chain are later */\ /* linked together, the dups appear in the chain */\ - trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \ - trie->wordinfo[dupe].prev = curword; \ + trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \ + trie->wordinfo[dupe].prev = curword; \ } else { \ /* we haven't inserted this word yet. */ \ trie->states[ state ].wordnum = curword; \ @@ -2769,11 +2770,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, switch (flags) { case EXACT: case EXACT_REQ8: case EXACTL: break; - case EXACTFAA: + case EXACTFAA: case EXACTFUP: - case EXACTFU: - case EXACTFLU8: folder = PL_fold_latin1; break; - case EXACTF: folder = PL_fold; break; + case EXACTFU: + case EXACTFLU8: folder = PL_fold_latin1; break; + case EXACTF: folder = PL_fold; break; default: Perl_croak( aTHX_ "panic! In trie construction, unknown node type %u %s", (unsigned) flags, PL_reg_name[flags] ); } @@ -2784,7 +2785,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, RExC_rxi->data->data[ data_slot ] = (void*)trie; trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) ); if (flags == EXACT || flags == EXACT_REQ8 || flags == EXACTL) - trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 ); + trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 ); trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc( trie->wordcount+1, sizeof(reg_trie_wordinfo)); @@ -2964,8 +2965,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, TRIE_STORE_REVCHAR( uvc ); } if ( set_bit ) { - /* store the codepoint in the bitmap, and its folded - * equivalent. */ + /* store the codepoint in the bitmap, and its folded + * equivalent. */ TRIE_BITMAP_SET_FOLDED(trie, uvc, folder); set_bit = 0; /* We've done our bit :-) */ } @@ -3010,8 +3011,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, "TRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n", depth+1, ( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count, - (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount, - (int)trie->minlen, (int)trie->maxlen ) + (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount, + (int)trie->minlen, (int)trie->maxlen ) ); /* @@ -3059,17 +3060,17 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_ "Compiling trie using list compiler\n", depth+1)); - trie->states = (reg_trie_state *) - PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2, - sizeof(reg_trie_state) ); + trie->states = (reg_trie_state *) + PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2, + sizeof(reg_trie_state) ); TRIE_LIST_NEW(1); next_alloc = 2; for ( cur = first ; cur < last ; cur = regnext( cur ) ) { regnode *noper = NEXTOPER( cur ); - U32 state = 1; /* required init */ - U16 charid = 0; /* sanity init */ + U32 state = 1; /* required init */ + U16 charid = 0; /* sanity init */ U32 wordlen = 0; /* required init */ if (OP(noper) == NOTHING) { @@ -3096,7 +3097,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, if ( uvc < 256 ) { charid = trie->charmap[ uvc ]; - } else { + } else { SV** const svpp = hv_fetch( widecharmap, (char*)&uvc, sizeof( UV ), @@ -3106,7 +3107,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } else { charid=(U16)SvIV( *svpp ); } - } + } /* charid is now 0 if we dont know the char read, or * nonzero if we do */ if ( charid ) { @@ -3117,7 +3118,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, charid--; if ( !trie->states[ state ].trans.list ) { TRIE_LIST_NEW( state ); - } + } for ( check = 1; check <= TRIE_LIST_USED( state ); check++ ) @@ -3131,15 +3132,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } if ( ! newstate ) { newstate = next_alloc++; - prev_states[newstate] = state; + prev_states[newstate] = state; TRIE_LIST_PUSH( state, charid, newstate ); transcount++; } state = newstate; } else { Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc ); - } - } + } + } } else { /* If we end up here it is because we skipped past a NOTHING, but did not end up * on a trieable type. So we need to reset noper back to point at the first regop @@ -3154,18 +3155,18 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, /* next alloc is the NEXT state to be allocated */ trie->statecount = next_alloc; trie->states = (reg_trie_state *) - PerlMemShared_realloc( trie->states, - next_alloc - * sizeof(reg_trie_state) ); + PerlMemShared_realloc( trie->states, + next_alloc + * sizeof(reg_trie_state) ); /* and now dump it out before we compress it */ DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap, - revcharmap, next_alloc, - depth+1) + revcharmap, next_alloc, + depth+1) ); trie->trans = (reg_trie_trans *) - PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) ); + PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) ); { U32 state; U32 tp = 0; @@ -3184,22 +3185,22 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, if (trie->states[state].trans.list) { U16 minid=TRIE_LIST_ITEM( state, 1).forid; U16 maxid=minid; - U16 idx; + U16 idx; for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) { - const U16 forid = TRIE_LIST_ITEM( state, idx).forid; - if ( forid < minid ) { - minid=forid; - } else if ( forid > maxid ) { - maxid=forid; - } + const U16 forid = TRIE_LIST_ITEM( state, idx).forid; + if ( forid < minid ) { + minid=forid; + } else if ( forid > maxid ) { + maxid=forid; + } } if ( transcount < tp + maxid - minid + 1) { transcount *= 2; - trie->trans = (reg_trie_trans *) - PerlMemShared_realloc( trie->trans, - transcount - * sizeof(reg_trie_trans) ); + trie->trans = (reg_trie_trans *) + PerlMemShared_realloc( trie->trans, + transcount + * sizeof(reg_trie_trans) ); Zero( trie->trans + (transcount / 2), transcount / 2, reg_trie_trans ); @@ -3285,13 +3286,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_ "Compiling trie using table compiler\n", depth+1)); - trie->trans = (reg_trie_trans *) - PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 ) - * trie->uniquecharcount + 1, - sizeof(reg_trie_trans) ); + trie->trans = (reg_trie_trans *) + PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 ) + * trie->uniquecharcount + 1, + sizeof(reg_trie_trans) ); trie->states = (reg_trie_state *) - PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2, - sizeof(reg_trie_state) ); + PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2, + sizeof(reg_trie_state) ); next_alloc = trie->uniquecharcount + 1; @@ -3342,8 +3343,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, if ( !trie->trans[ state + charid ].next ) { trie->trans[ state + charid ].next = next_alloc; trie->trans[ state ].check++; - prev_states[TRIE_NODENUM(next_alloc)] - = TRIE_NODENUM(state); + prev_states[TRIE_NODENUM(next_alloc)] + = TRIE_NODENUM(state); next_alloc += trie->uniquecharcount; } state = trie->trans[ state + charid ].next; @@ -3367,8 +3368,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, /* and now dump it out before we compress it */ DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap, - revcharmap, - next_alloc, depth+1)); + revcharmap, + next_alloc, depth+1)); { /* @@ -3433,15 +3434,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, demq */ const U32 laststate = TRIE_NODENUM( next_alloc ); - U32 state, charid; + U32 state, charid; U32 pos = 0, zp=0; trie->statecount = laststate; for ( state = 1 ; state < laststate ; state++ ) { U8 flag = 0; - const U32 stateidx = TRIE_NODEIDX( state ); - const U32 o_used = trie->trans[ stateidx ].check; - U32 used = trie->trans[ stateidx ].check; + const U32 stateidx = TRIE_NODEIDX( state ); + const U32 o_used = trie->trans[ stateidx ].check; + U32 used = trie->trans[ stateidx ].check; trie->trans[ stateidx ].check = 0; for ( charid = 0; @@ -3484,8 +3485,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, } trie->lasttrans = pos + 1; trie->states = (reg_trie_state *) - PerlMemShared_realloc( trie->states, laststate - * sizeof(reg_trie_state) ); + PerlMemShared_realloc( trie->states, laststate + * sizeof(reg_trie_state) ); DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_ "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n", depth+1, @@ -3506,8 +3507,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, ); /* resize the trans array to remove unused space */ trie->trans = (reg_trie_trans *) - PerlMemShared_realloc( trie->trans, trie->lasttrans - * sizeof(reg_trie_trans) ); + PerlMemShared_realloc( trie->trans, trie->lasttrans + * sizeof(reg_trie_trans) ); { /* Modify the program and insert the new TRIE node */ U8 nodetype =(U8)(flags & 0xFF); @@ -3602,20 +3603,20 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, (UV)state)); if (first_ofs >= 0) { SV ** const tmp = av_fetch( revcharmap, first_ofs, 0); - const U8 * const ch = (U8*)SvPV_nolen_const( *tmp ); + const U8 * const ch = (U8*)SvPV_nolen_const( *tmp ); TRIE_BITMAP_SET_FOLDED(trie,*ch, folder); DEBUG_OPTIMISE_r( Perl_re_printf( aTHX_ "%s", (char*)ch) ); - } - } + } + } /* store the current firstchar in the bitmap */ TRIE_BITMAP_SET_FOLDED(trie,*ch, folder); DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch)); - } + } first_ofs = ofs; - } + } } if ( count == 1 ) { /* This state has only one transition, its transition is part @@ -3630,9 +3631,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, depth+1, (UV)state, (UV)first_ofs, pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6, - PL_colors[0], PL_colors[1], - (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) | - PERL_PV_ESCAPE_FIRSTCHAR + PL_colors[0], PL_colors[1], + (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) | + PERL_PV_ESCAPE_FIRSTCHAR ) ); }); @@ -3645,15 +3646,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, setSTR_LEN(convert, (U8)(STR_LEN(convert) + len)); while (len--) *str++ = *ch++; - } else { + } else { #ifdef DEBUGGING - if (state>1) + if (state>1) DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "]\n")); #endif - break; - } - } - trie->prefixlen = (state-1); + break; + } + } + trie->prefixlen = (state-1); if (str) { regnode *n = convert+NODE_SZ_STR(convert); assert( NODE_SZ_STR(convert) <= U16_MAX ); @@ -3694,7 +3695,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, #endif if (trie->maxlen) { convert = n; - } else { + } else { NEXT_OFF(convert) = (U16)(tail - convert); DEBUG_r(optimize= n); } @@ -3703,23 +3704,23 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, if (!jumper) jumper = last; if ( trie->maxlen ) { - NEXT_OFF( convert ) = (U16)(tail - convert); - ARG_SET( convert, data_slot ); - /* Store the offset to the first unabsorbed branch in - jump[0], which is otherwise unused by the jump logic. - We use this when dumping a trie and during optimisation. */ - if (trie->jump) - trie->jump[0] = (U16)(nextbranch - convert); + NEXT_OFF( convert ) = (U16)(tail - convert); + ARG_SET( convert, data_slot ); + /* Store the offset to the first unabsorbed branch in + jump[0], which is otherwise unused by the jump logic. + We use this when dumping a trie and during optimisation. */ + if (trie->jump) + trie->jump[0] = (U16)(nextbranch - convert); /* If the start state is not accepting (meaning there is no empty string/NOTHING) - * and there is a bitmap - * and the first "jump target" node we found leaves enough room - * then convert the TRIE node into a TRIEC node, with the bitmap - * embedded inline in the opcode - this is hypothetically faster. - */ + * and there is a bitmap + * and the first "jump target" node we found leaves enough room + * then convert the TRIE node into a TRIEC node, with the bitmap + * embedded inline in the opcode - this is hypothetically faster. + */ if ( !trie->states[trie->startstate].wordnum - && trie->bitmap - && ( (char *)jumper - (char *)convert) >= (int)sizeof(struct regnode_charclass) ) + && trie->bitmap + && ( (char *)jumper - (char *)convert) >= (int)sizeof(struct regnode_charclass) ) { OP( convert ) = TRIEC; Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char); @@ -3768,26 +3769,26 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, * already linked up earlier. */ { - U16 word; - U32 state; - U16 prev; - - for (word=1; word <= trie->wordcount; word++) { - prev = 0; - if (trie->wordinfo[word].prev) - continue; - state = trie->wordinfo[word].accept; - while (state) { - state = prev_states[state]; - if (!state) - break; - prev = trie->states[state].wordnum; - if (prev) - break; - } - trie->wordinfo[word].prev = prev; - } - Safefree(prev_states); + U16 word; + U32 state; + U16 prev; + + for (word=1; word <= trie->wordcount; word++) { + prev = 0; + if (trie->wordinfo[word].prev) + continue; + state = trie->wordinfo[word].accept; + while (state) { + state = prev_states[state]; + if (!state) + break; + prev = trie->states[state].wordnum; + if (prev) + break; + } + trie->wordinfo[word].prev = prev; + } + Safefree(prev_states); } @@ -3884,20 +3885,20 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour fail[ 0 ] = fail[ 1 ] = 1; for ( charid = 0; charid < ucharcount ; charid++ ) { - const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 ); - if ( newstate ) { + const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 ); + if ( newstate ) { q[ q_write ] = newstate; /* set to point at the root */ fail[ q[ q_write++ ] ]=1; } } while ( q_read < q_write) { - const U32 cur = q[ q_read++ % numstates ]; + const U32 cur = q[ q_read++ % numstates ]; base = trie->states[ cur ].trans.base; for ( charid = 0 ; charid < ucharcount ; charid++ ) { - const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 ); - if (ch_state) { + const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 ); + if (ch_state) { U32 fail_state = cur; U32 fail_base; do { @@ -4259,16 +4260,16 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, } #ifdef EXPERIMENTAL_INPLACESCAN - if (flags && !NEXT_OFF(n)) { - DEBUG_PEEP("atch", val, depth, 0); - if (reg_off_by_arg[OP(n)]) { - ARG_SET(n, val - n); - } - else { - NEXT_OFF(n) = val - n; - } - stopnow = 1; - } + if (flags && !NEXT_OFF(n)) { + DEBUG_PEEP("atch", val, depth, 0); + if (reg_off_by_arg[OP(n)]) { + ARG_SET(n, val - n); + } + else { + NEXT_OFF(n) = val - n; + } + stopnow = 1; + } #endif } @@ -4294,11 +4295,11 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, int total_count_delta = 0; /* Total delta number of characters that multi-char folds expand to */ - /* One pass is made over the node's string looking for all the - * possibilities. To avoid some tests in the loop, there are two main - * cases, for UTF-8 patterns (which can't have EXACTF nodes) and - * non-UTF-8 */ - if (UTF) { + /* One pass is made over the node's string looking for all the + * possibilities. To avoid some tests in the loop, there are two main + * cases, for UTF-8 patterns (which can't have EXACTF nodes) and + * non-UTF-8 */ + if (UTF) { U8* folded = NULL; if (OP(scan) == EXACTFL) { @@ -4355,7 +4356,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, * executed */ while (s < s_end - 1) /* Can stop 1 before the end, as minimum length sequence we are looking for is 2 */ - { + { int count = 0; /* How many characters in a multi-char fold */ int len = is_MULTI_CHAR_FOLD_utf8_safe(s, s_end); if (! len) { /* Not a multi-char fold: get next char */ @@ -4391,7 +4392,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, * the character that folds to the sequence is) */ total_count_delta += count - 1; next_iteration: ; - } + } /* We created a temporary folded copy of the string in EXACTFL * nodes. Therefore we need to be sure it doesn't go below zero, @@ -4406,8 +4407,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, *min_subtract += total_count_delta; Safefree(folded); - } - else if (OP(scan) == EXACTFAA) { + } + else if (OP(scan) == EXACTFAA) { /* Non-UTF-8 pattern, EXACTFAA node. There can't be a multi-char * fold to the ASCII range (and there are no existing ones in the @@ -4418,7 +4419,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \ || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \ || UNICODE_DOT_DOT_VERSION > 0) - while (s < s_end) { + while (s < s_end) { if (*s == LATIN_SMALL_LETTER_SHARP_S) { OP(scan) = EXACTFAA_NO_TRIE; *unfolded_multi_char = TRUE; @@ -4427,7 +4428,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, s++; } } - else if (OP(scan) != EXACTFAA_NO_TRIE) { + else if (OP(scan) != EXACTFAA_NO_TRIE) { /* Non-UTF-8 pattern, not EXACTFAA node. Look for the multi-char * folds that are all Latin1. As explained in the comments @@ -4435,11 +4436,11 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, * and EXACTFL nodes; it can be in the final position. Otherwise * we can stop looking 1 byte earlier because have to find at least * two characters for a multi-fold */ - const U8* upper = (OP(scan) == EXACTF || OP(scan) == EXACTFL) + const U8* upper = (OP(scan) == EXACTF || OP(scan) == EXACTFL) ? s_end : s_end -1; - while (s < upper) { + while (s < upper) { int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end); if (! len) { /* Not a multi-char fold. */ if (*s == LATIN_SMALL_LETTER_SHARP_S @@ -4465,13 +4466,13 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, if (OP(scan) != EXACTF && OP(scan) != EXACTFL) { OP(scan) = EXACTFUP; } - } + } *min_subtract += len - 1; s += len; - } + } #endif - } + } } #ifdef DEBUGGING @@ -4479,9 +4480,9 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, * ops and/or strings with fake optimized ops */ n = scan + NODE_SZ_STR(scan); while (n <= stop) { - OP(n) = OPTIMIZED; - FLAGS(n) = 0; - NEXT_OFF(n) = 0; + OP(n) = OPTIMIZED; + FLAGS(n) = 0; + NEXT_OFF(n) = 0; n++; } #endif @@ -4552,19 +4553,19 @@ S_rck_elide_nothing(pTHX_ regnode *node) STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, - regnode *last, - scan_data_t *data, - I32 stopparen, + regnode *last, + scan_data_t *data, + I32 stopparen, U32 recursed_depth, - regnode_ssc *and_withp, - U32 flags, U32 depth, bool was_mutate_ok) - /* scanp: Start here (read-write). */ - /* deltap: Write maxlen-minlen here. */ - /* last: Stop before this one. */ - /* data: string data about the pattern */ - /* stopparen: treat close N as END */ - /* recursed: which subroutines have we recursed into */ - /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */ + regnode_ssc *and_withp, + U32 flags, U32 depth, bool was_mutate_ok) + /* scanp: Start here (read-write). */ + /* deltap: Write maxlen-minlen here. */ + /* last: Stop before this one. */ + /* data: string data about the pattern */ + /* stopparen: treat close N as END */ + /* recursed: which subroutines have we recursed into */ + /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */ { SSize_t final_minlen; /* There must be at least this number of characters to match */ @@ -4627,12 +4628,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, UV min_subtract = 0; /* How mmany chars to subtract from the minimum node length to get a real minimum (because the folded version may be shorter) */ - bool unfolded_multi_char = FALSE; + bool unfolded_multi_char = FALSE; /* avoid mutating ops if we are anywhere within the recursed or * enframed handling for a GOSUB: the outermost level will handle it. */ bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub); - /* Peephole optimizer: */ + /* Peephole optimizer: */ DEBUG_STUDYDATA("Peep", data, depth, is_inf); DEBUG_PEEP("Peep", scan, depth, flags); @@ -4690,21 +4691,21 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, OP(scan) == BRANCHJ || OP(scan) == IFTHEN ) { - next = regnext(scan); - code = OP(scan); + next = regnext(scan); + code = OP(scan); /* The op(next)==code check below is to see if we * have "BRANCH-BRANCH", "BRANCHJ-BRANCHJ", "IFTHEN-IFTHEN" * IFTHEN is special as it might not appear in pairs. * Not sure whether BRANCH-BRANCHJ is possible, regardless * we dont handle it cleanly. */ - if (OP(next) == code || code == IFTHEN) { + if (OP(next) == code || code == IFTHEN) { /* NOTE - There is similar code to this block below for * handling TRIE nodes on a re-study. If you change stuff here * check there too. */ - SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0; - regnode_ssc accum; - regnode * const startbranch=scan; + SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0; + regnode_ssc accum; + regnode * const startbranch=scan; if (flags & SCF_DO_SUBSTR) { /* Cannot merge strings after this. */ @@ -4712,164 +4713,164 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } if (flags & SCF_DO_STCLASS) - ssc_init_zero(pRExC_state, &accum); + ssc_init_zero(pRExC_state, &accum); - while (OP(scan) == code) { - SSize_t deltanext, minnext, fake; - I32 f = 0; - regnode_ssc this_class; + while (OP(scan) == code) { + SSize_t deltanext, minnext, fake; + I32 f = 0; + regnode_ssc this_class; DEBUG_PEEP("Branch", scan, depth, flags); - num++; + num++; StructCopy(&zero_scan_data, &data_fake, scan_data_t); - if (data) { - data_fake.whilem_c = data->whilem_c; - data_fake.last_closep = data->last_closep; - } - else - data_fake.last_closep = &fake; + if (data) { + data_fake.whilem_c = data->whilem_c; + data_fake.last_closep = data->last_closep; + } + else + data_fake.last_closep = &fake; - data_fake.pos_delta = delta; - next = regnext(scan); + data_fake.pos_delta = delta; + next = regnext(scan); scan = NEXTOPER(scan); /* everything */ if (code != BRANCH) /* everything but BRANCH */ - scan = NEXTOPER(scan); + scan = NEXTOPER(scan); - if (flags & SCF_DO_STCLASS) { - ssc_init(pRExC_state, &this_class); - data_fake.start_class = &this_class; - f = SCF_DO_STCLASS_AND; - } - if (flags & SCF_WHILEM_VISITED_POS) - f |= SCF_WHILEM_VISITED_POS; + if (flags & SCF_DO_STCLASS) { + ssc_init(pRExC_state, &this_class); + data_fake.start_class = &this_class; + f = SCF_DO_STCLASS_AND; + } + if (flags & SCF_WHILEM_VISITED_POS) + f |= SCF_WHILEM_VISITED_POS; - /* we suppose the run is continuous, last=next...*/ + /* we suppose the run is continuous, last=next...*/ /* recurse study_chunk() for each BRANCH in an alternation */ - minnext = study_chunk(pRExC_state, &scan, minlenp, + minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, next, &data_fake, stopparen, recursed_depth, NULL, f, depth+1, mutate_ok); - if (min1 > minnext) - min1 = minnext; - if (deltanext == OPTIMIZE_INFTY) { - is_inf = is_inf_internal = 1; - max1 = OPTIMIZE_INFTY; - } else if (max1 < minnext + deltanext) - max1 = minnext + deltanext; - scan = next; - if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) - pars++; - if (data_fake.flags & SCF_SEEN_ACCEPT) { - if ( stopmin > minnext) - stopmin = min + min1; - flags &= ~SCF_DO_SUBSTR; - if (data) - data->flags |= SCF_SEEN_ACCEPT; - } - if (data) { - if (data_fake.flags & SF_HAS_EVAL) - data->flags |= SF_HAS_EVAL; - data->whilem_c = data_fake.whilem_c; - } - if (flags & SCF_DO_STCLASS) - ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class); - } - if (code == IFTHEN && num < 2) /* Empty ELSE branch */ - min1 = 0; - if (flags & SCF_DO_SUBSTR) { - data->pos_min += min1; - if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1)) - data->pos_delta = OPTIMIZE_INFTY; - else - data->pos_delta += max1 - min1; - if (max1 != min1 || is_inf) - data->cur_is_floating = 1; - } - min += min1; - if (delta == OPTIMIZE_INFTY - || OPTIMIZE_INFTY - delta - (max1 - min1) < 0) - delta = OPTIMIZE_INFTY; - else - delta += max1 - min1; - if (flags & SCF_DO_STCLASS_OR) { - ssc_or(pRExC_state, data->start_class, (regnode_charclass*) &accum); - if (min1) { - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); - flags &= ~SCF_DO_STCLASS; - } - } - else if (flags & SCF_DO_STCLASS_AND) { - if (min1) { - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum); - flags &= ~SCF_DO_STCLASS; - } - else { - /* Switch to OR mode: cache the old value of - * data->start_class */ - INIT_AND_WITHP; - StructCopy(data->start_class, and_withp, regnode_ssc); - flags &= ~SCF_DO_STCLASS_AND; - StructCopy(&accum, data->start_class, regnode_ssc); - flags |= SCF_DO_STCLASS_OR; - } - } + if (min1 > minnext) + min1 = minnext; + if (deltanext == OPTIMIZE_INFTY) { + is_inf = is_inf_internal = 1; + max1 = OPTIMIZE_INFTY; + } else if (max1 < minnext + deltanext) + max1 = minnext + deltanext; + scan = next; + if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR)) + pars++; + if (data_fake.flags & SCF_SEEN_ACCEPT) { + if ( stopmin > minnext) + stopmin = min + min1; + flags &= ~SCF_DO_SUBSTR; + if (data) + data->flags |= SCF_SEEN_ACCEPT; + } + if (data) { + if (data_fake.flags & SF_HAS_EVAL) + data->flags |= SF_HAS_EVAL; + data->whilem_c = data_fake.whilem_c; + } + if (flags & SCF_DO_STCLASS) + ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class); + } + if (code == IFTHEN && num < 2) /* Empty ELSE branch */ + min1 = 0; + if (flags & SCF_DO_SUBSTR) { + data->pos_min += min1; + if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1)) + data->pos_delta = OPTIMIZE_INFTY; + else + data->pos_delta += max1 - min1; + if (max1 != min1 || is_inf) + data->cur_is_floating = 1; + } + min += min1; + if (delta == OPTIMIZE_INFTY + || OPTIMIZE_INFTY - delta - (max1 - min1) < 0) + delta = OPTIMIZE_INFTY; + else + delta += max1 - min1; + if (flags & SCF_DO_STCLASS_OR) { + ssc_or(pRExC_state, data->start_class, (regnode_charclass*) &accum); + if (min1) { + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); + flags &= ~SCF_DO_STCLASS; + } + } + else if (flags & SCF_DO_STCLASS_AND) { + if (min1) { + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum); + flags &= ~SCF_DO_STCLASS; + } + else { + /* Switch to OR mode: cache the old value of + * data->start_class */ + INIT_AND_WITHP; + StructCopy(data->start_class, and_withp, regnode_ssc); + flags &= ~SCF_DO_STCLASS_AND; + StructCopy(&accum, data->start_class, regnode_ssc); + flags |= SCF_DO_STCLASS_OR; + } + } if (PERL_ENABLE_TRIE_OPTIMISATION && OP(startbranch) == BRANCH && mutate_ok ) { - /* demq. + /* demq. Assuming this was/is a branch we are dealing with: 'scan' now points at the item that follows the branch sequence, whatever it is. We now start at the beginning of the sequence and look for subsequences of - BRANCH->EXACT=>x1 - BRANCH->EXACT=>x2 - tail + BRANCH->EXACT=>x1 + BRANCH->EXACT=>x2 + tail which would be constructed from a pattern like /A|LIST|OF|WORDS/ - If we can find such a subsequence we need to turn the first - element into a trie and then add the subsequent branch exact - strings to the trie. + If we can find such a subsequence we need to turn the first + element into a trie and then add the subsequent branch exact + strings to the trie. - We have two cases + We have two cases 1. patterns where the whole set of branches can be converted. - 2. patterns where only a subset can be converted. + 2. patterns where only a subset can be converted. - In case 1 we can replace the whole set with a single regop - for the trie. In case 2 we need to keep the start and end - branches so + In case 1 we can replace the whole set with a single regop + for the trie. In case 2 we need to keep the start and end + branches so - 'BRANCH EXACT; BRANCH EXACT; BRANCH X' - becomes BRANCH TRIE; BRANCH X; + 'BRANCH EXACT; BRANCH EXACT; BRANCH X' + becomes BRANCH TRIE; BRANCH X; - There is an additional case, that being where there is a - common prefix, which gets split out into an EXACT like node - preceding the TRIE node. + There is an additional case, that being where there is a + common prefix, which gets split out into an EXACT like node + preceding the TRIE node. - If x(1..n)==tail then we can do a simple trie, if not we make - a "jump" trie, such that when we match the appropriate word - we "jump" to the appropriate tail node. Essentially we turn - a nested if into a case structure of sorts. + If x(1..n)==tail then we can do a simple trie, if not we make + a "jump" trie, such that when we match the appropriate word + we "jump" to the appropriate tail node. Essentially we turn + a nested if into a case structure of sorts. - */ + */ - int made=0; - if (!re_trie_maxbuff) { - re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1); - if (!SvIOK(re_trie_maxbuff)) - sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT); - } + int made=0; + if (!re_trie_maxbuff) { + re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1); + if (!SvIOK(re_trie_maxbuff)) + sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT); + } if ( SvIV(re_trie_maxbuff)>=0 ) { regnode *cur; regnode *first = (regnode *)NULL; @@ -5005,8 +5006,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } Perl_re_printf( aTHX_ "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n", REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur), - PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype] - ); + PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype] + ); }); /* Is noper a trieable nodetype that can be merged @@ -5029,15 +5030,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * otherwise we update the end pointer. */ if ( !first ) { first = cur; - if ( noper_trietype == NOTHING ) { + if ( noper_trietype == NOTHING ) { #if !defined(DEBUGGING) && !defined(NOJUMPTRIE) - regnode * const noper_next = regnext( noper ); + regnode * const noper_next = regnext( noper ); U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0; - U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0; + U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0; #endif if ( noper_next_trietype ) { - trietype = noper_next_trietype; + trietype = noper_next_trietype; } else if (noper_next_type) { /* a NOTHING regop is 1 regop wide. * We need at least two for a trie @@ -5052,8 +5053,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, trietype = noper_trietype; prev = cur; } - if (first) - count++; + if (first) + count++; } /* end handle mergable triable node */ else { /* handle unmergable node - @@ -5156,12 +5157,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } /* TRIE_MAXBUF is non zero */ } /* do trie */ - } - else if ( code == BRANCHJ ) { /* single branch is optimized. */ - scan = NEXTOPER(NEXTOPER(scan)); - } else /* single branch is optimized. */ - scan = NEXTOPER(scan); - continue; + } + else if ( code == BRANCHJ ) { /* single branch is optimized. */ + scan = NEXTOPER(NEXTOPER(scan)); + } else /* single branch is optimized. */ + scan = NEXTOPER(scan); + continue; } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) { I32 paren = 0; regnode *start = NULL; @@ -5249,12 +5250,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, flags &= ~SCF_DO_STCLASS; start= NULL; /* reset start so we dont recurse later on. */ - } + } } else { - paren = stopparen; + paren = stopparen; start = scan + 2; - end = regnext(scan); - } + end = regnext(scan); + } if (start) { scan_frame *newframe; assert(end); @@ -5285,73 +5286,73 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, DEBUG_STUDYDATA("frame-new", data, depth, is_inf); DEBUG_PEEP("fnew", scan, depth, flags); - frame = newframe; - scan = start; - stopparen = paren; - last = end; + frame = newframe; + scan = start; + stopparen = paren; + last = end; depth = depth + 1; recursed_depth= my_recursed_depth; - continue; - } - } - else if (PL_regkind[OP(scan)] == EXACT && ! isEXACTFish(OP(scan))) { - SSize_t bytelen = STR_LEN(scan), charlen; - UV uc; + continue; + } + } + else if (PL_regkind[OP(scan)] == EXACT && ! isEXACTFish(OP(scan))) { + SSize_t bytelen = STR_LEN(scan), charlen; + UV uc; assert(bytelen); - if (UTF) { - const U8 * const s = (U8*)STRING(scan); - uc = utf8_to_uvchr_buf(s, s + bytelen, NULL); - charlen = utf8_length(s, s + bytelen); - } else { - uc = *((U8*)STRING(scan)); + if (UTF) { + const U8 * const s = (U8*)STRING(scan); + uc = utf8_to_uvchr_buf(s, s + bytelen, NULL); + charlen = utf8_length(s, s + bytelen); + } else { + uc = *((U8*)STRING(scan)); charlen = bytelen; - } - min += charlen; - if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */ - /* The code below prefers earlier match for fixed - offset, later match for variable offset. */ - if (data->last_end == -1) { /* Update the start info. */ - data->last_start_min = data->pos_min; + } + min += charlen; + if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */ + /* The code below prefers earlier match for fixed + offset, later match for variable offset. */ + if (data->last_end == -1) { /* Update the start info. */ + data->last_start_min = data->pos_min; data->last_start_max = is_inf ? OPTIMIZE_INFTY : (data->pos_delta > OPTIMIZE_INFTY - data->pos_min) ? OPTIMIZE_INFTY : data->pos_min + data->pos_delta; - } - sv_catpvn(data->last_found, STRING(scan), bytelen); - if (UTF) - SvUTF8_on(data->last_found); - { - SV * const sv = data->last_found; - MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ? - mg_find(sv, PERL_MAGIC_utf8) : NULL; - if (mg && mg->mg_len >= 0) - mg->mg_len += charlen; - } - data->last_end = data->pos_min + charlen; - data->pos_min += charlen; /* As in the first entry. */ - data->flags &= ~SF_BEFORE_EOL; - } + } + sv_catpvn(data->last_found, STRING(scan), bytelen); + if (UTF) + SvUTF8_on(data->last_found); + { + SV * const sv = data->last_found; + MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ? + mg_find(sv, PERL_MAGIC_utf8) : NULL; + if (mg && mg->mg_len >= 0) + mg->mg_len += charlen; + } + data->last_end = data->pos_min + charlen; + data->pos_min += charlen; /* As in the first entry. */ + data->flags &= ~SF_BEFORE_EOL; + } /* ANDing the code point leaves at most it, and not in locale, and * can't match null string */ - if (flags & SCF_DO_STCLASS_AND) { + if (flags & SCF_DO_STCLASS_AND) { ssc_cp_and(data->start_class, uc); ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING; ssc_clear_locale(data->start_class); - } - else if (flags & SCF_DO_STCLASS_OR) { + } + else if (flags & SCF_DO_STCLASS_OR) { ssc_add_cp(data->start_class, uc); - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */ ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING; - } - flags &= ~SCF_DO_STCLASS; - } + } + flags &= ~SCF_DO_STCLASS; + } else if (PL_regkind[OP(scan)] == EXACT) { /* But OP != EXACT!, so is EXACTFish */ - SSize_t bytelen = STR_LEN(scan), charlen; + SSize_t bytelen = STR_LEN(scan), charlen; const U8 * s = (U8*)STRING(scan); /* Replace a length 1 ASCII fold pair node with an ANYOFM node, @@ -5374,28 +5375,28 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, continue; } - /* Search for fixed substrings supports EXACT only. */ - if (flags & SCF_DO_SUBSTR) { - assert(data); + /* Search for fixed substrings supports EXACT only. */ + if (flags & SCF_DO_SUBSTR) { + assert(data); scan_commit(pRExC_state, data, minlenp, is_inf); - } + } charlen = UTF ? (SSize_t) utf8_length(s, s + bytelen) : bytelen; - if (unfolded_multi_char) { + if (unfolded_multi_char) { RExC_seen |= REG_UNFOLDED_MULTI_SEEN; - } - min += charlen - min_subtract; + } + min += charlen - min_subtract; assert (min >= 0); delta += min_subtract; - if (flags & SCF_DO_SUBSTR) { - data->pos_min += charlen - min_subtract; - if (data->pos_min < 0) { + if (flags & SCF_DO_SUBSTR) { + data->pos_min += charlen - min_subtract; + if (data->pos_min < 0) { data->pos_min = 0; } data->pos_delta += min_subtract; - if (min_subtract) { - data->cur_is_floating = 1; /* float */ - } - } + if (min_subtract) { + data->cur_is_floating = 1; /* float */ + } + } if (flags & SCF_DO_STCLASS) { SV* EXACTF_invlist = make_exactf_invlist(pRExC_state, scan); @@ -5418,41 +5419,41 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, flags &= ~SCF_DO_STCLASS; SvREFCNT_dec(EXACTF_invlist); } - } - else if (REGNODE_VARIES(OP(scan))) { - SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0; - I32 fl = 0, f = flags; - regnode * const oscan = scan; - regnode_ssc this_class; - regnode_ssc *oclass = NULL; - I32 next_is_eval = 0; - - switch (PL_regkind[OP(scan)]) { - case WHILEM: /* End of (?:...)* . */ - scan = NEXTOPER(scan); - goto finish; - case PLUS: - if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) { - next = NEXTOPER(scan); - if ( ( PL_regkind[OP(next)] == EXACT + } + else if (REGNODE_VARIES(OP(scan))) { + SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0; + I32 fl = 0, f = flags; + regnode * const oscan = scan; + regnode_ssc this_class; + regnode_ssc *oclass = NULL; + I32 next_is_eval = 0; + + switch (PL_regkind[OP(scan)]) { + case WHILEM: /* End of (?:...)* . */ + scan = NEXTOPER(scan); + goto finish; + case PLUS: + if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) { + next = NEXTOPER(scan); + if ( ( PL_regkind[OP(next)] == EXACT && ! isEXACTFish(OP(next))) || (flags & SCF_DO_STCLASS)) { - mincount = 1; - maxcount = REG_INFTY; - next = regnext(scan); - scan = NEXTOPER(scan); - goto do_curly; - } - } - if (flags & SCF_DO_SUBSTR) - data->pos_min++; + mincount = 1; + maxcount = REG_INFTY; + next = regnext(scan); + scan = NEXTOPER(scan); + goto do_curly; + } + } + if (flags & SCF_DO_SUBSTR) + data->pos_min++; /* This will bypass the formal 'min += minnext * mincount' * calculation in the do_curly path, so assumes min width * of the PLUS payload is exactly one. */ - min++; - /* FALLTHROUGH */ - case STAR: + min++; + /* FALLTHROUGH */ + case STAR: next = NEXTOPER(scan); /* This temporary node can now be turned into EXACTFU, and @@ -5483,121 +5484,121 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, FLAGS(next) = mask; } - if (flags & SCF_DO_STCLASS) { - mincount = 0; - maxcount = REG_INFTY; - next = regnext(scan); - scan = NEXTOPER(scan); - goto do_curly; - } - if (flags & SCF_DO_SUBSTR) { + if (flags & SCF_DO_STCLASS) { + mincount = 0; + maxcount = REG_INFTY; + next = regnext(scan); + scan = NEXTOPER(scan); + goto do_curly; + } + if (flags & SCF_DO_SUBSTR) { scan_commit(pRExC_state, data, minlenp, is_inf); /* Cannot extend fixed substrings */ - data->cur_is_floating = 1; /* float */ - } + data->cur_is_floating = 1; /* float */ + } is_inf = is_inf_internal = 1; scan = regnext(scan); - goto optimize_curly_tail; - case CURLY: - if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM) - && (scan->flags == stopparen)) - { - mincount = 1; - maxcount = 1; - } else { - mincount = ARG1(scan); - maxcount = ARG2(scan); - } - next = regnext(scan); - if (OP(scan) == CURLYX) { - I32 lp = (data ? *(data->last_closep) : 0); - scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX); - } - scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS; - next_is_eval = (OP(scan) == EVAL); - do_curly: - if (flags & SCF_DO_SUBSTR) { + goto optimize_curly_tail; + case CURLY: + if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM) + && (scan->flags == stopparen)) + { + mincount = 1; + maxcount = 1; + } else { + mincount = ARG1(scan); + maxcount = ARG2(scan); + } + next = regnext(scan); + if (OP(scan) == CURLYX) { + I32 lp = (data ? *(data->last_closep) : 0); + scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX); + } + scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS; + next_is_eval = (OP(scan) == EVAL); + do_curly: + if (flags & SCF_DO_SUBSTR) { if (mincount == 0) scan_commit(pRExC_state, data, minlenp, is_inf); /* Cannot extend fixed substrings */ - pos_before = data->pos_min; - } - if (data) { - fl = data->flags; - data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL); - if (is_inf) - data->flags |= SF_IS_INF; - } - if (flags & SCF_DO_STCLASS) { - ssc_init(pRExC_state, &this_class); - oclass = data->start_class; - data->start_class = &this_class; - f |= SCF_DO_STCLASS_AND; - f &= ~SCF_DO_STCLASS_OR; - } - /* Exclude from super-linear cache processing any {n,m} - regops for which the combination of input pos and regex - pos is not enough information to determine if a match - will be possible. - - For example, in the regex /foo(bar\s*){4,8}baz/ with the - regex pos at the \s*, the prospects for a match depend not - only on the input position but also on how many (bar\s*) - repeats into the {4,8} we are. */ + pos_before = data->pos_min; + } + if (data) { + fl = data->flags; + data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL); + if (is_inf) + data->flags |= SF_IS_INF; + } + if (flags & SCF_DO_STCLASS) { + ssc_init(pRExC_state, &this_class); + oclass = data->start_class; + data->start_class = &this_class; + f |= SCF_DO_STCLASS_AND; + f &= ~SCF_DO_STCLASS_OR; + } + /* Exclude from super-linear cache processing any {n,m} + regops for which the combination of input pos and regex + pos is not enough information to determine if a match + will be possible. + + For example, in the regex /foo(bar\s*){4,8}baz/ with the + regex pos at the \s*, the prospects for a match depend not + only on the input position but also on how many (bar\s*) + repeats into the {4,8} we are. */ if ((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY)) - f &= ~SCF_WHILEM_VISITED_POS; + f &= ~SCF_WHILEM_VISITED_POS; - /* This will finish on WHILEM, setting scan, or on NULL: */ + /* This will finish on WHILEM, setting scan, or on NULL: */ /* recurse study_chunk() on loop bodies */ - minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, + minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext, last, data, stopparen, recursed_depth, NULL, (mincount == 0 ? (f & ~SCF_DO_SUBSTR) : f) , depth+1, mutate_ok); - if (flags & SCF_DO_STCLASS) - data->start_class = oclass; - if (mincount == 0 || minnext == 0) { - if (flags & SCF_DO_STCLASS_OR) { - ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class); - } - else if (flags & SCF_DO_STCLASS_AND) { - /* Switch to OR mode: cache the old value of - * data->start_class */ - INIT_AND_WITHP; - StructCopy(data->start_class, and_withp, regnode_ssc); - flags &= ~SCF_DO_STCLASS_AND; - StructCopy(&this_class, data->start_class, regnode_ssc); - flags |= SCF_DO_STCLASS_OR; + if (flags & SCF_DO_STCLASS) + data->start_class = oclass; + if (mincount == 0 || minnext == 0) { + if (flags & SCF_DO_STCLASS_OR) { + ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class); + } + else if (flags & SCF_DO_STCLASS_AND) { + /* Switch to OR mode: cache the old value of + * data->start_class */ + INIT_AND_WITHP; + StructCopy(data->start_class, and_withp, regnode_ssc); + flags &= ~SCF_DO_STCLASS_AND; + StructCopy(&this_class, data->start_class, regnode_ssc); + flags |= SCF_DO_STCLASS_OR; ANYOF_FLAGS(data->start_class) |= SSC_MATCHES_EMPTY_STRING; - } - } else { /* Non-zero len */ - if (flags & SCF_DO_STCLASS_OR) { - ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class); - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); - } - else if (flags & SCF_DO_STCLASS_AND) - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &this_class); - flags &= ~SCF_DO_STCLASS; - } - if (!scan) /* It was not CURLYX, but CURLY. */ - scan = next; - if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR) - /* ? quantifier ok, except for (?{ ... }) */ - && (next_is_eval || !(mincount == 0 && maxcount == 1)) - && (minnext == 0) && (deltanext == 0) - && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) + } + } else { /* Non-zero len */ + if (flags & SCF_DO_STCLASS_OR) { + ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class); + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); + } + else if (flags & SCF_DO_STCLASS_AND) + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &this_class); + flags &= ~SCF_DO_STCLASS; + } + if (!scan) /* It was not CURLYX, but CURLY. */ + scan = next; + if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR) + /* ? quantifier ok, except for (?{ ... }) */ + && (next_is_eval || !(mincount == 0 && maxcount == 1)) + && (minnext == 0) && (deltanext == 0) + && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR)) && maxcount <= REG_INFTY/3) /* Complement check for big count */ - { - _WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP), + { + _WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP), Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Quantifier unexpected on zero-length expression " "in regex m/%" UTF8f "/", - UTF8fARG(UTF, RExC_precomp_end - RExC_precomp, - RExC_precomp))); + UTF8fARG(UTF, RExC_precomp_end - RExC_precomp, + RExC_precomp))); } if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext ) @@ -5606,146 +5607,146 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, FAIL("Regexp out of space"); } - min += minnext * mincount; - is_inf_internal |= deltanext == OPTIMIZE_INFTY + min += minnext * mincount; + is_inf_internal |= deltanext == OPTIMIZE_INFTY || (maxcount == REG_INFTY && minnext + deltanext > 0); - is_inf |= is_inf_internal; + is_inf |= is_inf_internal; if (is_inf) { - delta = OPTIMIZE_INFTY; + delta = OPTIMIZE_INFTY; } else { - delta += (minnext + deltanext) * maxcount + delta += (minnext + deltanext) * maxcount - minnext * mincount; } - /* Try powerful optimization CURLYX => CURLYN. */ - if ( OP(oscan) == CURLYX && data - && data->flags & SF_IN_PAR - && !(data->flags & SF_HAS_EVAL) - && !deltanext && minnext == 1 + /* Try powerful optimization CURLYX => CURLYN. */ + if ( OP(oscan) == CURLYX && data + && data->flags & SF_IN_PAR + && !(data->flags & SF_HAS_EVAL) + && !deltanext && minnext == 1 && mutate_ok ) { - /* Try to optimize to CURLYN. */ - regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; - regnode * const nxt1 = nxt; + /* Try to optimize to CURLYN. */ + regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; + regnode * const nxt1 = nxt; #ifdef DEBUGGING - regnode *nxt2; + regnode *nxt2; #endif - /* Skip open. */ - nxt = regnext(nxt); - if (!REGNODE_SIMPLE(OP(nxt)) - && !(PL_regkind[OP(nxt)] == EXACT - && STR_LEN(nxt) == 1)) - goto nogo; + /* Skip open. */ + nxt = regnext(nxt); + if (!REGNODE_SIMPLE(OP(nxt)) + && !(PL_regkind[OP(nxt)] == EXACT + && STR_LEN(nxt) == 1)) + goto nogo; #ifdef DEBUGGING - nxt2 = nxt; + nxt2 = nxt; #endif - nxt = regnext(nxt); - if (OP(nxt) != CLOSE) - goto nogo; - if (RExC_open_parens) { + nxt = regnext(nxt); + if (OP(nxt) != CLOSE) + goto nogo; + if (RExC_open_parens) { /*open->CURLYM*/ RExC_open_parens[ARG(nxt1)] = REGNODE_OFFSET(oscan); /*close->while*/ RExC_close_parens[ARG(nxt1)] = REGNODE_OFFSET(nxt) + 2; - } - /* Now we know that nxt2 is the only contents: */ - oscan->flags = (U8)ARG(nxt); - OP(oscan) = CURLYN; - OP(nxt1) = NOTHING; /* was OPEN. */ + } + /* Now we know that nxt2 is the only contents: */ + oscan->flags = (U8)ARG(nxt); + OP(oscan) = CURLYN; + OP(nxt1) = NOTHING; /* was OPEN. */ #ifdef DEBUGGING - OP(nxt1 + 1) = OPTIMIZED; /* was count. */ - NEXT_OFF(nxt1+ 1) = 0; /* just for consistency. */ - NEXT_OFF(nxt2) = 0; /* just for consistency with CURLY. */ - OP(nxt) = OPTIMIZED; /* was CLOSE. */ - OP(nxt + 1) = OPTIMIZED; /* was count. */ - NEXT_OFF(nxt+ 1) = 0; /* just for consistency. */ + OP(nxt1 + 1) = OPTIMIZED; /* was count. */ + NEXT_OFF(nxt1+ 1) = 0; /* just for consistency. */ + NEXT_OFF(nxt2) = 0; /* just for consistency with CURLY. */ + OP(nxt) = OPTIMIZED; /* was CLOSE. */ + OP(nxt + 1) = OPTIMIZED; /* was count. */ + NEXT_OFF(nxt+ 1) = 0; /* just for consistency. */ #endif - } - nogo: - - /* Try optimization CURLYX => CURLYM. */ - if ( OP(oscan) == CURLYX && data - && !(data->flags & SF_HAS_PAR) - && !(data->flags & SF_HAS_EVAL) - && !deltanext /* atom is fixed width */ - && minnext != 0 /* CURLYM can't handle zero width */ + } + nogo: + + /* Try optimization CURLYX => CURLYM. */ + if ( OP(oscan) == CURLYX && data + && !(data->flags & SF_HAS_PAR) + && !(data->flags & SF_HAS_EVAL) + && !deltanext /* atom is fixed width */ + && minnext != 0 /* CURLYM can't handle zero width */ /* Nor characters whose fold at run-time may be * multi-character */ && ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN) && mutate_ok - ) { - /* XXXX How to optimize if data == 0? */ - /* Optimize to a simpler form. */ - regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */ - regnode *nxt2; - - OP(oscan) = CURLYM; - while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/ - && (OP(nxt2) != WHILEM)) - nxt = nxt2; - OP(nxt2) = SUCCEED; /* Whas WHILEM */ - /* Need to optimize away parenths. */ - if ((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) { - /* Set the parenth number. */ - regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/ - - oscan->flags = (U8)ARG(nxt); - if (RExC_open_parens) { + ) { + /* XXXX How to optimize if data == 0? */ + /* Optimize to a simpler form. */ + regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */ + regnode *nxt2; + + OP(oscan) = CURLYM; + while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/ + && (OP(nxt2) != WHILEM)) + nxt = nxt2; + OP(nxt2) = SUCCEED; /* Whas WHILEM */ + /* Need to optimize away parenths. */ + if ((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) { + /* Set the parenth number. */ + regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/ + + oscan->flags = (U8)ARG(nxt); + if (RExC_open_parens) { /*open->CURLYM*/ RExC_open_parens[ARG(nxt1)] = REGNODE_OFFSET(oscan); /*close->NOTHING*/ RExC_close_parens[ARG(nxt1)] = REGNODE_OFFSET(nxt2) + 1; - } - OP(nxt1) = OPTIMIZED; /* was OPEN. */ - OP(nxt) = OPTIMIZED; /* was CLOSE. */ + } + OP(nxt1) = OPTIMIZED; /* was OPEN. */ + OP(nxt) = OPTIMIZED; /* was CLOSE. */ #ifdef DEBUGGING - OP(nxt1 + 1) = OPTIMIZED; /* was count. */ - OP(nxt + 1) = OPTIMIZED; /* was count. */ - NEXT_OFF(nxt1 + 1) = 0; /* just for consistency. */ - NEXT_OFF(nxt + 1) = 0; /* just for consistency. */ + OP(nxt1 + 1) = OPTIMIZED; /* was count. */ + OP(nxt + 1) = OPTIMIZED; /* was count. */ + NEXT_OFF(nxt1 + 1) = 0; /* just for consistency. */ + NEXT_OFF(nxt + 1) = 0; /* just for consistency. */ #endif #if 0 - while ( nxt1 && (OP(nxt1) != WHILEM)) { - regnode *nnxt = regnext(nxt1); - if (nnxt == nxt) { - if (reg_off_by_arg[OP(nxt1)]) - ARG_SET(nxt1, nxt2 - nxt1); - else if (nxt2 - nxt1 < U16_MAX) - NEXT_OFF(nxt1) = nxt2 - nxt1; - else - OP(nxt) = NOTHING; /* Cannot beautify */ - } - nxt1 = nnxt; - } + while ( nxt1 && (OP(nxt1) != WHILEM)) { + regnode *nnxt = regnext(nxt1); + if (nnxt == nxt) { + if (reg_off_by_arg[OP(nxt1)]) + ARG_SET(nxt1, nxt2 - nxt1); + else if (nxt2 - nxt1 < U16_MAX) + NEXT_OFF(nxt1) = nxt2 - nxt1; + else + OP(nxt) = NOTHING; /* Cannot beautify */ + } + nxt1 = nnxt; + } #endif - /* Optimize again: */ + /* Optimize again: */ /* recurse study_chunk() on optimised CURLYX => CURLYM */ - study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt, + study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt, NULL, stopparen, recursed_depth, NULL, 0, depth+1, mutate_ok); - } - else - oscan->flags = 0; - } - else if ((OP(oscan) == CURLYX) - && (flags & SCF_WHILEM_VISITED_POS) - /* See the comment on a similar expression above. - However, this time it's not a subexpression - we care about, but the expression itself. */ - && (maxcount == REG_INFTY) - && data) { - /* This stays as CURLYX, we can put the count/of pair. */ - /* Find WHILEM (as in regexec.c) */ - regnode *nxt = oscan + NEXT_OFF(oscan); - - if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */ - nxt += ARG(nxt); + } + else + oscan->flags = 0; + } + else if ((OP(oscan) == CURLYX) + && (flags & SCF_WHILEM_VISITED_POS) + /* See the comment on a similar expression above. + However, this time it's not a subexpression + we care about, but the expression itself. */ + && (maxcount == REG_INFTY) + && data) { + /* This stays as CURLYX, we can put the count/of pair. */ + /* Find WHILEM (as in regexec.c) */ + regnode *nxt = oscan + NEXT_OFF(oscan); + + if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */ + nxt += ARG(nxt); nxt = PREVOPER(nxt); if (nxt->flags & 0xf) { /* we've already set whilem count on this node */ @@ -5754,68 +5755,68 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, nxt->flags = (U8)(data->whilem_c | (RExC_whilem_seen << 4)); /* On WHILEM */ } - } - if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) - pars++; - if (flags & SCF_DO_SUBSTR) { - SV *last_str = NULL; + } + if (data && fl & (SF_HAS_PAR|SF_IN_PAR)) + pars++; + if (flags & SCF_DO_SUBSTR) { + SV *last_str = NULL; STRLEN last_chrs = 0; - int counted = mincount != 0; + int counted = mincount != 0; if (data->last_end > 0 && mincount != 0) { /* Ends with a string. */ - SSize_t b = pos_before >= data->last_start_min - ? pos_before : data->last_start_min; - STRLEN l; - const char * const s = SvPV_const(data->last_found, l); - SSize_t old = b - data->last_start_min; + SSize_t b = pos_before >= data->last_start_min + ? pos_before : data->last_start_min; + STRLEN l; + const char * const s = SvPV_const(data->last_found, l); + SSize_t old = b - data->last_start_min; assert(old >= 0); - if (UTF) - old = utf8_hop_forward((U8*)s, old, + if (UTF) + old = utf8_hop_forward((U8*)s, old, (U8 *) SvEND(data->last_found)) - (U8*)s; - l -= old; - /* Get the added string: */ - last_str = newSVpvn_utf8(s + old, l, UTF); + l -= old; + /* Get the added string: */ + last_str = newSVpvn_utf8(s + old, l, UTF); last_chrs = UTF ? utf8_length((U8*)(s + old), (U8*)(s + old + l)) : l; - if (deltanext == 0 && pos_before == b) { - /* What was added is a constant string */ - if (mincount > 1) { + if (deltanext == 0 && pos_before == b) { + /* What was added is a constant string */ + if (mincount > 1) { - SvGROW(last_str, (mincount * l) + 1); - repeatcpy(SvPVX(last_str) + l, - SvPVX_const(last_str), l, + SvGROW(last_str, (mincount * l) + 1); + repeatcpy(SvPVX(last_str) + l, + SvPVX_const(last_str), l, mincount - 1); - SvCUR_set(last_str, SvCUR(last_str) * mincount); - /* Add additional parts. */ - SvCUR_set(data->last_found, - SvCUR(data->last_found) - l); - sv_catsv(data->last_found, last_str); - { - SV * sv = data->last_found; - MAGIC *mg = - SvUTF8(sv) && SvMAGICAL(sv) ? - mg_find(sv, PERL_MAGIC_utf8) : NULL; - if (mg && mg->mg_len >= 0) - mg->mg_len += last_chrs * (mincount-1); - } + SvCUR_set(last_str, SvCUR(last_str) * mincount); + /* Add additional parts. */ + SvCUR_set(data->last_found, + SvCUR(data->last_found) - l); + sv_catsv(data->last_found, last_str); + { + SV * sv = data->last_found; + MAGIC *mg = + SvUTF8(sv) && SvMAGICAL(sv) ? + mg_find(sv, PERL_MAGIC_utf8) : NULL; + if (mg && mg->mg_len >= 0) + mg->mg_len += last_chrs * (mincount-1); + } last_chrs *= mincount; - data->last_end += l * (mincount - 1); - } - } else { - /* start offset must point into the last copy */ - data->last_start_min += minnext * (mincount - 1); - data->last_start_max = + data->last_end += l * (mincount - 1); + } + } else { + /* start offset must point into the last copy */ + data->last_start_min += minnext * (mincount - 1); + data->last_start_max = is_inf ? OPTIMIZE_INFTY - : data->last_start_max + + : data->last_start_max + (maxcount - 1) * (minnext + data->pos_delta); - } - } - /* It is counted once already... */ - data->pos_min += minnext * (mincount - counted); + } + } + /* It is counted once already... */ + data->pos_min += minnext * (mincount - counted); #if 0 Perl_re_printf( aTHX_ "counted=%" UVuf " deltanext=%" UVuf " OPTIMIZE_INFTY=%" UVuf " minnext=%" UVuf @@ -5827,52 +5828,52 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", (UV)(-counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta)); #endif - if (deltanext == OPTIMIZE_INFTY + if (deltanext == OPTIMIZE_INFTY || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= OPTIMIZE_INFTY - data->pos_delta) - data->pos_delta = OPTIMIZE_INFTY; - else - data->pos_delta += - counted * deltanext + - (minnext + deltanext) * maxcount - minnext * mincount; - if (mincount != maxcount) { - /* Cannot extend fixed substrings found inside - the group. */ + data->pos_delta = OPTIMIZE_INFTY; + else + data->pos_delta += - counted * deltanext + + (minnext + deltanext) * maxcount - minnext * mincount; + if (mincount != maxcount) { + /* Cannot extend fixed substrings found inside + the group. */ scan_commit(pRExC_state, data, minlenp, is_inf); - if (mincount && last_str) { - SV * const sv = data->last_found; - MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ? - mg_find(sv, PERL_MAGIC_utf8) : NULL; - - if (mg) - mg->mg_len = -1; - sv_setsv(sv, last_str); - data->last_end = data->pos_min; - data->last_start_min = data->pos_min - last_chrs; - data->last_start_max = is_inf - ? OPTIMIZE_INFTY - : data->pos_min + data->pos_delta - last_chrs; - } - data->cur_is_floating = 1; /* float */ - } - SvREFCNT_dec(last_str); - } - if (data && (fl & SF_HAS_EVAL)) - data->flags |= SF_HAS_EVAL; - optimize_curly_tail: - rck_elide_nothing(oscan); - continue; - - default: + if (mincount && last_str) { + SV * const sv = data->last_found; + MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ? + mg_find(sv, PERL_MAGIC_utf8) : NULL; + + if (mg) + mg->mg_len = -1; + sv_setsv(sv, last_str); + data->last_end = data->pos_min; + data->last_start_min = data->pos_min - last_chrs; + data->last_start_max = is_inf + ? OPTIMIZE_INFTY + : data->pos_min + data->pos_delta - last_chrs; + } + data->cur_is_floating = 1; /* float */ + } + SvREFCNT_dec(last_str); + } + if (data && (fl & SF_HAS_EVAL)) + data->flags |= SF_HAS_EVAL; + optimize_curly_tail: + rck_elide_nothing(oscan); + continue; + + default: Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d", OP(scan)); case REF: case CLUMP: - if (flags & SCF_DO_SUBSTR) { + if (flags & SCF_DO_SUBSTR) { /* Cannot expect anything... */ scan_commit(pRExC_state, data, minlenp, is_inf); - data->cur_is_floating = 1; /* float */ - } - is_inf = is_inf_internal = 1; - if (flags & SCF_DO_STCLASS_OR) { + data->cur_is_floating = 1; /* float */ + } + is_inf = is_inf_internal = 1; + if (flags & SCF_DO_STCLASS_OR) { if (OP(scan) == CLUMP) { /* Actually is any start char, but very few code points * aren't start characters */ @@ -5882,12 +5883,12 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", ssc_anything(data->start_class); } } - flags &= ~SCF_DO_STCLASS; - break; - } - } - else if (OP(scan) == LNBREAK) { - if (flags & SCF_DO_STCLASS) { + flags &= ~SCF_DO_STCLASS; + break; + } + } + else if (OP(scan) == LNBREAK) { + if (flags & SCF_DO_STCLASS) { if (flags & SCF_DO_STCLASS_AND) { ssc_intersection(data->start_class, PL_XPosix_ptrs[_CC_VERTSPACE], FALSE); @@ -5899,16 +5900,16 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", ssc_union(data->start_class, PL_XPosix_ptrs[_CC_VERTSPACE], FALSE); - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); /* See commit msg for * 749e076fceedeb708a624933726e7989f2302f6a */ ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING; } - flags &= ~SCF_DO_STCLASS; + flags &= ~SCF_DO_STCLASS; } - min++; + min++; if (delta != OPTIMIZE_INFTY) delta++; /* Because of the 2 char string cr-lf */ if (flags & SCF_DO_SUBSTR) { @@ -5918,17 +5919,17 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", if (data->pos_delta != OPTIMIZE_INFTY) { data->pos_delta += 1; } - data->cur_is_floating = 1; /* float */ + data->cur_is_floating = 1; /* float */ } - } - else if (REGNODE_SIMPLE(OP(scan))) { + } + else if (REGNODE_SIMPLE(OP(scan))) { - if (flags & SCF_DO_SUBSTR) { + if (flags & SCF_DO_SUBSTR) { scan_commit(pRExC_state, data, minlenp, is_inf); - data->pos_min++; - } - min++; - if (flags & SCF_DO_STCLASS) { + data->pos_min++; + } + min++; + if (flags & SCF_DO_STCLASS) { bool invert = 0; SV* my_invlist = NULL; U8 namedclass; @@ -5936,21 +5937,21 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */ ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING; - /* Some of the logic below assumes that switching - locale on will only add false positives. */ - switch (OP(scan)) { + /* Some of the logic below assumes that switching + locale on will only add false positives. */ + switch (OP(scan)) { - default: + default: #ifdef DEBUGGING Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan)); #endif - case SANY: - if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ - ssc_match_all_cp(data->start_class); - break; + case SANY: + if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ + ssc_match_all_cp(data->start_class); + break; - case REG_ANY: + case REG_ANY: { SV* REG_ANY_invlist = _new_invlist(2); REG_ANY_invlist = add_cp_to_invlist(REG_ANY_invlist, @@ -5970,8 +5971,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", ssc_clear_locale(data->start_class); } SvREFCNT_dec_NN(REG_ANY_invlist); - } - break; + } + break; case ANYOFD: case ANYOFL: @@ -5981,13 +5982,13 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", case ANYOFHr: case ANYOFHs: case ANYOF: - if (flags & SCF_DO_STCLASS_AND) - ssc_and(pRExC_state, data->start_class, + if (flags & SCF_DO_STCLASS_AND) + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) scan); - else - ssc_or(pRExC_state, data->start_class, + else + ssc_or(pRExC_state, data->start_class, (regnode_charclass *) scan); - break; + break; case NANYOFM: /* NANYOFM already contains the inversion of the input ANYOF data, so, unlike things like @@ -6028,11 +6029,11 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", break; } - case NPOSIXL: + case NPOSIXL: invert = 1; /* FALLTHROUGH */ - case POSIXL: + case POSIXL: namedclass = classnum_to_namedclass(FLAGS(scan)) + invert; if (flags & SCF_DO_STCLASS_AND) { bool was_there = cBOOL( @@ -6072,16 +6073,16 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", what's matched */ invert = 1; /* FALLTHROUGH */ - case POSIXA: + case POSIXA: my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)], NULL); goto join_posix_and_ascii; - case NPOSIXD: - case NPOSIXU: + case NPOSIXD: + case NPOSIXU: invert = 1; /* FALLTHROUGH */ - case POSIXD: - case POSIXU: + case POSIXD: + case POSIXU: my_invlist = invlist_clone(PL_XPosix_ptrs[FLAGS(scan)], NULL); /* NPOSIXD matches all upper Latin1 code points unless the @@ -6105,23 +6106,23 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", ssc_union(data->start_class, my_invlist, invert); } SvREFCNT_dec(my_invlist); - } - if (flags & SCF_DO_STCLASS_OR) - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); - flags &= ~SCF_DO_STCLASS; - } - } - else if (PL_regkind[OP(scan)] == EOL && flags & SCF_DO_SUBSTR) { - data->flags |= (OP(scan) == MEOL - ? SF_BEFORE_MEOL - : SF_BEFORE_SEOL); + } + if (flags & SCF_DO_STCLASS_OR) + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); + flags &= ~SCF_DO_STCLASS; + } + } + else if (PL_regkind[OP(scan)] == EOL && flags & SCF_DO_SUBSTR) { + data->flags |= (OP(scan) == MEOL + ? SF_BEFORE_MEOL + : SF_BEFORE_SEOL); scan_commit(pRExC_state, data, minlenp, is_inf); - } - else if ( PL_regkind[OP(scan)] == BRANCHJ - /* Lookbehind, or need to calculate parens/evals/stclass: */ - && (scan->flags || data || (flags & SCF_DO_STCLASS)) - && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) + } + else if ( PL_regkind[OP(scan)] == BRANCHJ + /* Lookbehind, or need to calculate parens/evals/stclass: */ + && (scan->flags || data || (flags & SCF_DO_STCLASS)) + && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) { if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY || OP(scan) == UNLESSM ) @@ -6139,16 +6140,16 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", if (data) { data_fake.whilem_c = data->whilem_c; data_fake.last_closep = data->last_closep; - } + } else data_fake.last_closep = &fake; - data_fake.pos_delta = delta; + data_fake.pos_delta = delta; if ( flags & SCF_DO_STCLASS && !scan->flags && OP(scan) == IFMATCH ) { /* Lookahead */ ssc_init(pRExC_state, &intrnl); data_fake.start_class = &intrnl; f |= SCF_DO_STCLASS_AND; - } + } if (flags & SCF_WHILEM_VISITED_POS) f |= SCF_WHILEM_VISITED_POS; next = regnext(scan); @@ -6165,7 +6166,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", || minnext > (I32)U8_MAX || minnext + deltanext > (I32)U8_MAX) { - FAIL2("Lookbehind longer than %" UVuf " not implemented", + FAIL2("Lookbehind longer than %" UVuf " not implemented", (UV)U8_MAX); } @@ -6190,24 +6191,24 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", data->whilem_c = data_fake.whilem_c; } if (f & SCF_DO_STCLASS_AND) { - if (flags & SCF_DO_STCLASS_OR) { - /* OR before, AND after: ideally we would recurse with - * data_fake to get the AND applied by study of the - * remainder of the pattern, and then derecurse; - * *** HACK *** for now just treat as "no information". - * See [perl #56690]. - */ - ssc_init(pRExC_state, data->start_class); - } else { + if (flags & SCF_DO_STCLASS_OR) { + /* OR before, AND after: ideally we would recurse with + * data_fake to get the AND applied by study of the + * remainder of the pattern, and then derecurse; + * *** HACK *** for now just treat as "no information". + * See [perl #56690]. + */ + ssc_init(pRExC_state, data->start_class); + } else { /* AND before and after: combine and continue. These * assertions are zero-length, so can match an EMPTY * string */ - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl); + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl); ANYOF_FLAGS(data->start_class) |= SSC_MATCHES_EMPTY_STRING; - } + } } - } + } #if PERL_ENABLE_POSITIVE_ASSERTION_STUDY else { /* Positive Lookahead/lookbehind @@ -6245,9 +6246,9 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", data_fake.flags = 0; data_fake.substrs[0].flags = 0; data_fake.substrs[1].flags = 0; - data_fake.pos_delta = delta; + data_fake.pos_delta = delta; if (is_inf) - data_fake.flags |= SF_IS_INF; + data_fake.flags |= SF_IS_INF; if ( flags & SCF_DO_STCLASS && !scan->flags && OP(scan) == IFMATCH ) { /* Lookahead */ ssc_init(pRExC_state, &intrnl); @@ -6272,7 +6273,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", || *minnextp > (I32)U8_MAX || *minnextp + deltanext > (I32)U8_MAX) { - FAIL2("Lookbehind longer than %" UVuf " not implemented", + FAIL2("Lookbehind longer than %" UVuf " not implemented", (UV)U8_MAX); } @@ -6314,65 +6315,65 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", } } } - } + } #endif - } - else if (OP(scan) == OPEN) { - if (stopparen != (I32)ARG(scan)) - pars++; - } - else if (OP(scan) == CLOSE) { - if (stopparen == (I32)ARG(scan)) { - break; - } - if ((I32)ARG(scan) == is_par) { - next = regnext(scan); - - if ( next && (OP(next) != WHILEM) && next < last) - is_par = 0; /* Disable optimization */ - } - if (data) - *(data->last_closep) = ARG(scan); - } - else if (OP(scan) == EVAL) { - if (data) - data->flags |= SF_HAS_EVAL; - } - else if ( PL_regkind[OP(scan)] == ENDLIKE ) { - if (flags & SCF_DO_SUBSTR) { + } + else if (OP(scan) == OPEN) { + if (stopparen != (I32)ARG(scan)) + pars++; + } + else if (OP(scan) == CLOSE) { + if (stopparen == (I32)ARG(scan)) { + break; + } + if ((I32)ARG(scan) == is_par) { + next = regnext(scan); + + if ( next && (OP(next) != WHILEM) && next < last) + is_par = 0; /* Disable optimization */ + } + if (data) + *(data->last_closep) = ARG(scan); + } + else if (OP(scan) == EVAL) { + if (data) + data->flags |= SF_HAS_EVAL; + } + else if ( PL_regkind[OP(scan)] == ENDLIKE ) { + if (flags & SCF_DO_SUBSTR) { scan_commit(pRExC_state, data, minlenp, is_inf); - flags &= ~SCF_DO_SUBSTR; - } - if (data && OP(scan)==ACCEPT) { - data->flags |= SCF_SEEN_ACCEPT; - if (stopmin > min) - stopmin = min; - } - } - else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */ - { - if (flags & SCF_DO_SUBSTR) { + flags &= ~SCF_DO_SUBSTR; + } + if (data && OP(scan)==ACCEPT) { + data->flags |= SCF_SEEN_ACCEPT; + if (stopmin > min) + stopmin = min; + } + } + else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */ + { + if (flags & SCF_DO_SUBSTR) { scan_commit(pRExC_state, data, minlenp, is_inf); - data->cur_is_floating = 1; /* float */ - } - is_inf = is_inf_internal = 1; - if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ - ssc_anything(data->start_class); - flags &= ~SCF_DO_STCLASS; - } - else if (OP(scan) == GPOS) { + data->cur_is_floating = 1; /* float */ + } + is_inf = is_inf_internal = 1; + if (flags & SCF_DO_STCLASS_OR) /* Allow everything */ + ssc_anything(data->start_class); + flags &= ~SCF_DO_STCLASS; + } + else if (OP(scan) == GPOS) { if (!(RExC_rx->intflags & PREGf_GPOS_FLOAT) && - !(delta || is_inf || (data && data->pos_delta))) - { + !(delta || is_inf || (data && data->pos_delta))) + { if (!(RExC_rx->intflags & PREGf_ANCH) && (flags & SCF_DO_SUBSTR)) RExC_rx->intflags |= PREGf_ANCH_GPOS; - if (RExC_rx->gofs < (STRLEN)min) - RExC_rx->gofs = min; + if (RExC_rx->gofs < (STRLEN)min) + RExC_rx->gofs = min; } else { RExC_rx->intflags |= PREGf_GPOS_FLOAT; RExC_rx->gofs = 0; } - } + } #ifdef TRIE_STUDY_OPT #ifdef FULL_TRIE_STUDY else if (PL_regkind[OP(scan)] == TRIE) { @@ -6411,7 +6412,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", } else data_fake.last_closep = &fake; - data_fake.pos_delta = delta; + data_fake.pos_delta = delta; if (flags & SCF_DO_STCLASS) { ssc_init(pRExC_state, &this_class); data_fake.start_class = &this_class; @@ -6448,11 +6449,11 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", pars++; if (data_fake.flags & SCF_SEEN_ACCEPT) { if ( stopmin > min + min1) - stopmin = min + min1; - flags &= ~SCF_DO_SUBSTR; - if (data) - data->flags |= SCF_SEEN_ACCEPT; - } + stopmin = min + min1; + flags &= ~SCF_DO_SUBSTR; + if (data) + data->flags |= SCF_SEEN_ACCEPT; + } if (data) { if (data_fake.flags & SF_HAS_EVAL) data->flags |= SF_HAS_EVAL; @@ -6490,7 +6491,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", else { /* Switch to OR mode: cache the old value of * data->start_class */ - INIT_AND_WITHP; + INIT_AND_WITHP; StructCopy(data->start_class, and_withp, regnode_ssc); flags &= ~SCF_DO_STCLASS_AND; StructCopy(&accum, data->start_class, regnode_ssc); @@ -6501,24 +6502,24 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", continue; } #else - else if (PL_regkind[OP(scan)] == TRIE) { - reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ]; - U8*bang=NULL; + else if (PL_regkind[OP(scan)] == TRIE) { + reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ]; + U8*bang=NULL; - min += trie->minlen; - delta += (trie->maxlen - trie->minlen); - flags &= ~SCF_DO_STCLASS; /* xxx */ + min += trie->minlen; + delta += (trie->maxlen - trie->minlen); + flags &= ~SCF_DO_STCLASS; /* xxx */ if (flags & SCF_DO_SUBSTR) { /* Cannot expect anything... */ scan_commit(pRExC_state, data, minlenp, is_inf); data->pos_min += trie->minlen; data->pos_delta += (trie->maxlen - trie->minlen); - if (trie->maxlen != trie->minlen) - data->cur_is_floating = 1; /* float */ + if (trie->maxlen != trie->minlen) + data->cur_is_floating = 1; /* float */ } if (trie->jump) /* no more substrings -- for now /grr*/ flags &= ~SCF_DO_SUBSTR; - } + } else if (OP(scan) == REGEX_SET) { Perl_croak(aTHX_ "panic: %s regnode should be resolved" " before optimization", reg_name[REGEX_SET]); @@ -6527,8 +6528,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", #endif /* old or new */ #endif /* TRIE_STUDY_OPT */ - /* Else: zero-length, ignore. */ - scan = regnext(scan); + /* Else: zero-length, ignore. */ + scan = regnext(scan); } finish: @@ -6557,19 +6558,19 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n", *deltap = is_inf_internal ? OPTIMIZE_INFTY : delta; if (flags & SCF_DO_SUBSTR && is_inf) - data->pos_delta = OPTIMIZE_INFTY - data->pos_min; + data->pos_delta = OPTIMIZE_INFTY - data->pos_min; if (is_par > (I32)U8_MAX) - is_par = 0; + is_par = 0; if (is_par && pars==1 && data) { - data->flags |= SF_IN_PAR; - data->flags &= ~SF_HAS_PAR; + data->flags |= SF_IN_PAR; + data->flags &= ~SF_HAS_PAR; } else if (pars && data) { - data->flags |= SF_HAS_PAR; - data->flags &= ~SF_IN_PAR; + data->flags |= SF_HAS_PAR; + data->flags &= ~SF_IN_PAR; } if (flags & SCF_DO_STCLASS_OR) - ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); + ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp); if (flags & SCF_TRIE_RESTUDY) data->flags |= SCF_TRIE_RESTUDY; @@ -6595,12 +6596,12 @@ S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n) PERL_ARGS_ASSERT_ADD_DATA; Renewc(RExC_rxi->data, - sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1), - char, struct reg_data); + sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1), + char, struct reg_data); if(count) - Renew(RExC_rxi->data->what, count + n, U8); + Renew(RExC_rxi->data->what, count + n, U8); else - Newx(RExC_rxi->data->what, n, U8); + Newx(RExC_rxi->data->what, n, U8); RExC_rxi->data->count = count + n; Copy(s, RExC_rxi->data->what + count, n, U8); return count; @@ -6614,22 +6615,22 @@ Perl_reginitcolors(pTHX) { const char * const s = PerlEnv_getenv("PERL_RE_COLORS"); if (s) { - char *t = savepv(s); - int i = 0; - PL_colors[0] = t; - while (++i < 6) { - t = strchr(t, '\t'); - if (t) { - *t = '\0'; - PL_colors[i] = ++t; - } - else - PL_colors[i] = t = (char *)""; - } + char *t = savepv(s); + int i = 0; + PL_colors[0] = t; + while (++i < 6) { + t = strchr(t, '\t'); + if (t) { + *t = '\0'; + PL_colors[i] = ++t; + } + else + PL_colors[i] = t = (char *)""; + } } else { - int i = 0; - while (i < 6) - PL_colors[i++] = (char *)""; + int i = 0; + while (i < 6) + PL_colors[i++] = (char *)""; } PL_colorset = 1; } @@ -6666,24 +6667,24 @@ regexp_engine const * Perl_current_re_engine(pTHX) { if (IN_PERL_COMPILETIME) { - HV * const table = GvHV(PL_hintgv); - SV **ptr; + HV * const table = GvHV(PL_hintgv); + SV **ptr; - if (!table || !(PL_hints & HINT_LOCALIZE_HH)) - return &PL_core_reg_engine; - ptr = hv_fetchs(table, "regcomp", FALSE); - if ( !(ptr && SvIOK(*ptr) && SvIV(*ptr))) - return &PL_core_reg_engine; - return INT2PTR(regexp_engine*, SvIV(*ptr)); + if (!table || !(PL_hints & HINT_LOCALIZE_HH)) + return &PL_core_reg_engine; + ptr = hv_fetchs(table, "regcomp", FALSE); + if ( !(ptr && SvIOK(*ptr) && SvIV(*ptr))) + return &PL_core_reg_engine; + return INT2PTR(regexp_engine*, SvIV(*ptr)); } else { - SV *ptr; - if (!PL_curcop->cop_hints_hash) - return &PL_core_reg_engine; - ptr = cop_hints_fetch_pvs(PL_curcop, "regcomp", 0); - if ( !(ptr && SvIOK(ptr) && SvIV(ptr))) - return &PL_core_reg_engine; - return INT2PTR(regexp_engine*, SvIV(ptr)); + SV *ptr; + if (!PL_curcop->cop_hints_hash) + return &PL_core_reg_engine; + ptr = cop_hints_fetch_pvs(PL_curcop, "regcomp", 0); + if ( !(ptr && SvIOK(ptr) && SvIV(ptr))) + return &PL_core_reg_engine; + return INT2PTR(regexp_engine*, SvIV(ptr)); } } @@ -6699,7 +6700,7 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags) /* Dispatch a request to compile a regexp to correct regexp engine. */ DEBUG_COMPILE_r({ Perl_re_printf( aTHX_ "Using engine %" UVxf "\n", - PTR2UV(eng)); + PTR2UV(eng)); }); return CALLREGCOMP_ENG(eng, pattern, flags); } @@ -6770,7 +6771,7 @@ S_alloc_code_blocks(pTHX_ int ncode) static void S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state, - char **pat_p, STRLEN *plen_p, int num_code_blocks) + char **pat_p, STRLEN *plen_p, int num_code_blocks) { U8 *const src = (U8*)*pat_p; U8 *dst, *d; @@ -6929,7 +6930,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, oplist = OpSIBLING(oplist);; } - /* apply magic and QR overloading to arg */ + /* apply magic and QR overloading to arg */ SvGETMAGIC(msv); if (SvROK(msv) && SvAMAGIC(msv)) { @@ -7061,7 +7062,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, static bool S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, - char *pat, STRLEN plen) + char *pat, STRLEN plen) { int n = 0; STRLEN s; @@ -7069,21 +7070,21 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state, PERL_UNUSED_CONTEXT; for (s = 0; s < plen; s++) { - if ( pRExC_state->code_blocks + if ( pRExC_state->code_blocks && n < pRExC_state->code_blocks->count - && s == pRExC_state->code_blocks->cb[n].start) - { - s = pRExC_state->code_blocks->cb[n].end; - n++; - continue; - } - /* TODO ideally should handle [..], (#..), /#.../x to reduce false - * positives here */ - if (pat[s] == '(' && s+2 <= plen && pat[s+1] == '?' && - (pat[s+2] == '{' + && s == pRExC_state->code_blocks->cb[n].start) + { + s = pRExC_state->code_blocks->cb[n].end; + n++; + continue; + } + /* TODO ideally should handle [..], (#..), /#.../x to reduce false + * positives here */ + if (pat[s] == '(' && s+2 <= plen && pat[s+1] == '?' && + (pat[s+2] == '{' || (s + 2 <= plen && pat[s+2] == '?' && pat[s+3] == '{')) - ) - return 1; + ) + return 1; } return 0; } @@ -7120,39 +7121,39 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, DECLARE_AND_GET_RE_DEBUG_FLAGS; if (pRExC_state->runtime_code_qr) { - /* this is the second time we've been called; this should - * only happen if the main pattern got upgraded to utf8 - * during compilation; re-use the qr we compiled first time - * round (which should be utf8 too) - */ - qr = pRExC_state->runtime_code_qr; - pRExC_state->runtime_code_qr = NULL; - assert(RExC_utf8 && SvUTF8(qr)); + /* this is the second time we've been called; this should + * only happen if the main pattern got upgraded to utf8 + * during compilation; re-use the qr we compiled first time + * round (which should be utf8 too) + */ + qr = pRExC_state->runtime_code_qr; + pRExC_state->runtime_code_qr = NULL; + assert(RExC_utf8 && SvUTF8(qr)); } else { - int n = 0; - STRLEN s; - char *p, *newpat; - int newlen = plen + 7; /* allow for "qr''xx\0" extra chars */ - SV *sv, *qr_ref; - dSP; - - /* determine how many extra chars we need for ' and \ escaping */ - for (s = 0; s < plen; s++) { - if (pat[s] == '\'' || pat[s] == '\\') - newlen++; - } - - Newx(newpat, newlen, char); - p = newpat; - *p++ = 'q'; *p++ = 'r'; *p++ = '\''; - - for (s = 0; s < plen; s++) { - if ( pRExC_state->code_blocks - && n < pRExC_state->code_blocks->count - && s == pRExC_state->code_blocks->cb[n].start) - { - /* blank out literal code block so that they aren't + int n = 0; + STRLEN s; + char *p, *newpat; + int newlen = plen + 7; /* allow for "qr''xx\0" extra chars */ + SV *sv, *qr_ref; + dSP; + + /* determine how many extra chars we need for ' and \ escaping */ + for (s = 0; s < plen; s++) { + if (pat[s] == '\'' || pat[s] == '\\') + newlen++; + } + + Newx(newpat, newlen, char); + p = newpat; + *p++ = 'q'; *p++ = 'r'; *p++ = '\''; + + for (s = 0; s < plen; s++) { + if ( pRExC_state->code_blocks + && n < pRExC_state->code_blocks->count + && s == pRExC_state->code_blocks->cb[n].start) + { + /* blank out literal code block so that they aren't * recompiled: eg change from/to: * /(?{xyz})/ * /(?=====)/ @@ -7163,76 +7164,76 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, * /(?(?{xyz}))/ * /(?(?=====))/ */ - assert(pat[s] == '('); - assert(pat[s+1] == '?'); + assert(pat[s] == '('); + assert(pat[s+1] == '?'); *p++ = '('; *p++ = '?'; s += 2; - while (s < pRExC_state->code_blocks->cb[n].end) { - *p++ = '='; - s++; - } + while (s < pRExC_state->code_blocks->cb[n].end) { + *p++ = '='; + s++; + } *p++ = ')'; - n++; - continue; - } - if (pat[s] == '\'' || pat[s] == '\\') - *p++ = '\\'; - *p++ = pat[s]; - } - *p++ = '\''; - if (pRExC_state->pm_flags & RXf_PMf_EXTENDED) { - *p++ = 'x'; + n++; + continue; + } + if (pat[s] == '\'' || pat[s] == '\\') + *p++ = '\\'; + *p++ = pat[s]; + } + *p++ = '\''; + if (pRExC_state->pm_flags & RXf_PMf_EXTENDED) { + *p++ = 'x'; if (pRExC_state->pm_flags & RXf_PMf_EXTENDED_MORE) { *p++ = 'x'; } } - *p++ = '\0'; - DEBUG_COMPILE_r({ + *p++ = '\0'; + DEBUG_COMPILE_r({ Perl_re_printf( aTHX_ - "%sre-parsing pattern for runtime code:%s %s\n", - PL_colors[4], PL_colors[5], newpat); - }); + "%sre-parsing pattern for runtime code:%s %s\n", + PL_colors[4], PL_colors[5], newpat); + }); - sv = newSVpvn_flags(newpat, p-newpat-1, RExC_utf8 ? SVf_UTF8 : 0); - Safefree(newpat); + sv = newSVpvn_flags(newpat, p-newpat-1, RExC_utf8 ? SVf_UTF8 : 0); + Safefree(newpat); - ENTER; - SAVETMPS; - save_re_context(); - PUSHSTACKi(PERLSI_REQUIRE); + ENTER; + SAVETMPS; + save_re_context(); + PUSHSTACKi(PERLSI_REQUIRE); /* G_RE_REPARSING causes the toker to collapse \\ into \ when * parsing qr''; normally only q'' does this. It also alters * hints handling */ - eval_sv(sv, G_SCALAR|G_RE_REPARSING); - SvREFCNT_dec_NN(sv); - SPAGAIN; - qr_ref = POPs; - PUTBACK; - { - SV * const errsv = ERRSV; - if (SvTRUE_NN(errsv)) + eval_sv(sv, G_SCALAR|G_RE_REPARSING); + SvREFCNT_dec_NN(sv); + SPAGAIN; + qr_ref = POPs; + PUTBACK; + { + SV * const errsv = ERRSV; + if (SvTRUE_NN(errsv)) /* use croak_sv ? */ - Perl_croak_nocontext("%" SVf, SVfARG(errsv)); - } - assert(SvROK(qr_ref)); - qr = SvRV(qr_ref); - assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp); - /* the leaving below frees the tmp qr_ref. - * Give qr a life of its own */ - SvREFCNT_inc(qr); - POPSTACK; - FREETMPS; - LEAVE; + Perl_croak_nocontext("%" SVf, SVfARG(errsv)); + } + assert(SvROK(qr_ref)); + qr = SvRV(qr_ref); + assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp); + /* the leaving below frees the tmp qr_ref. + * Give qr a life of its own */ + SvREFCNT_inc(qr); + POPSTACK; + FREETMPS; + LEAVE; } if (!RExC_utf8 && SvUTF8(qr)) { - /* first time through; the pattern got upgraded; save the - * qr for the next time through */ - assert(!pRExC_state->runtime_code_qr); - pRExC_state->runtime_code_qr = qr; - return 0; + /* first time through; the pattern got upgraded; save the + * qr for the next time through */ + assert(!pRExC_state->runtime_code_qr); + pRExC_state->runtime_code_qr = qr; + return 0; } @@ -7241,17 +7242,17 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, /* merge the main (r1) and run-time (r2) code blocks into one */ { - RXi_GET_DECL(ReANY((REGEXP *)qr), r2); - struct reg_code_block *new_block, *dst; - RExC_state_t * const r1 = pRExC_state; /* convenient alias */ - int i1 = 0, i2 = 0; + RXi_GET_DECL(ReANY((REGEXP *)qr), r2); + struct reg_code_block *new_block, *dst; + RExC_state_t * const r1 = pRExC_state; /* convenient alias */ + int i1 = 0, i2 = 0; int r1c, r2c; - if (!r2->code_blocks || !r2->code_blocks->count) /* we guessed wrong */ - { - SvREFCNT_dec_NN(qr); - return 1; - } + if (!r2->code_blocks || !r2->code_blocks->count) /* we guessed wrong */ + { + SvREFCNT_dec_NN(qr); + return 1; + } if (!r1->code_blocks) r1->code_blocks = S_alloc_code_blocks(aTHX_ 0); @@ -7259,46 +7260,46 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, r1c = r1->code_blocks->count; r2c = r2->code_blocks->count; - Newx(new_block, r1c + r2c, struct reg_code_block); - - dst = new_block; - - while (i1 < r1c || i2 < r2c) { - struct reg_code_block *src; - bool is_qr = 0; - - if (i1 == r1c) { - src = &r2->code_blocks->cb[i2++]; - is_qr = 1; - } - else if (i2 == r2c) - src = &r1->code_blocks->cb[i1++]; - else if ( r1->code_blocks->cb[i1].start - < r2->code_blocks->cb[i2].start) - { - src = &r1->code_blocks->cb[i1++]; - assert(src->end < r2->code_blocks->cb[i2].start); - } - else { - assert( r1->code_blocks->cb[i1].start - > r2->code_blocks->cb[i2].start); - src = &r2->code_blocks->cb[i2++]; - is_qr = 1; - assert(src->end < r1->code_blocks->cb[i1].start); - } - - assert(pat[src->start] == '('); - assert(pat[src->end] == ')'); - dst->start = src->start; - dst->end = src->end; - dst->block = src->block; - dst->src_regex = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr) - : src->src_regex; - dst++; - } - r1->code_blocks->count += r2c; - Safefree(r1->code_blocks->cb); - r1->code_blocks->cb = new_block; + Newx(new_block, r1c + r2c, struct reg_code_block); + + dst = new_block; + + while (i1 < r1c || i2 < r2c) { + struct reg_code_block *src; + bool is_qr = 0; + + if (i1 == r1c) { + src = &r2->code_blocks->cb[i2++]; + is_qr = 1; + } + else if (i2 == r2c) + src = &r1->code_blocks->cb[i1++]; + else if ( r1->code_blocks->cb[i1].start + < r2->code_blocks->cb[i2].start) + { + src = &r1->code_blocks->cb[i1++]; + assert(src->end < r2->code_blocks->cb[i2].start); + } + else { + assert( r1->code_blocks->cb[i1].start + > r2->code_blocks->cb[i2].start); + src = &r2->code_blocks->cb[i2++]; + is_qr = 1; + assert(src->end < r1->code_blocks->cb[i1].start); + } + + assert(pat[src->start] == '('); + assert(pat[src->end] == ')'); + dst->start = src->start; + dst->end = src->end; + dst->block = src->block; + dst->src_regex = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr) + : src->src_regex; + dst++; + } + r1->code_blocks->count += r2c; + Safefree(r1->code_blocks->cb); + r1->code_blocks->cb = new_block; } SvREFCNT_dec_NN(qr); @@ -7506,8 +7507,8 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx) REGEXP * Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, - OP *expr, const regexp_engine* eng, REGEXP *old_re, - bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags) + OP *expr, const regexp_engine* eng, REGEXP *old_re, + bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags) { REGEXP *Rx; /* Capital 'R' means points to a REGEXP */ STRLEN plen; @@ -7548,19 +7549,19 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, pRExC_state->code_blocks = NULL; if (is_bare_re) - *is_bare_re = FALSE; + *is_bare_re = FALSE; if (expr && (expr->op_type == OP_LIST || - (expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) { - /* allocate code_blocks if needed */ - OP *o; - int ncode = 0; + (expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) { + /* allocate code_blocks if needed */ + OP *o; + int ncode = 0; - for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) - if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) - ncode++; /* count of DO blocks */ + for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) + if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) + ncode++; /* count of DO blocks */ - if (ncode) + if (ncode) pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_ ncode); } @@ -7638,15 +7639,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, exp = SvPV_nomg(pat, plen); if (!eng->op_comp) { - if ((SvUTF8(pat) && IN_BYTES) - || SvGMAGICAL(pat) || SvAMAGIC(pat)) - { - /* make a temporary copy; either to convert to bytes, - * or to avoid repeating get-magic / overloaded stringify */ - pat = newSVpvn_flags(exp, plen, SVs_TEMP | - (IN_BYTES ? 0 : SvUTF8(pat))); - } - return CALLREGCOMP_ENG(eng, pat, orig_rx_flags); + if ((SvUTF8(pat) && IN_BYTES) + || SvGMAGICAL(pat) || SvAMAGIC(pat)) + { + /* make a temporary copy; either to convert to bytes, + * or to avoid repeating get-magic / overloaded stringify */ + pat = newSVpvn_flags(exp, plen, SVs_TEMP | + (IN_BYTES ? 0 : SvUTF8(pat))); + } + return CALLREGCOMP_ENG(eng, pat, orig_rx_flags); } /* ignore the utf8ness if the pattern is 0 length */ @@ -7690,11 +7691,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, * to utf8 */ if ((pm_flags & PMf_USE_RE_EVAL) - /* this second condition covers the non-regex literal case, - * i.e. $foo =~ '(?{})'. */ - || (IN_PERL_COMPILETIME && (PL_hints & HINT_RE_EVAL)) + /* this second condition covers the non-regex literal case, + * i.e. $foo =~ '(?{})'. */ + || (IN_PERL_COMPILETIME && (PL_hints & HINT_RE_EVAL)) ) - runtime_code = S_has_runtime_code(aTHX_ pRExC_state, exp, plen); + runtime_code = S_has_runtime_code(aTHX_ pRExC_state, exp, plen); redo_parse: /* return old regex if pattern hasn't changed */ @@ -7708,10 +7709,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, && !recompile && !!RX_UTF8(old_re) == !!RExC_utf8 && ( RX_COMPFLAGS(old_re) == ( orig_rx_flags & RXf_PMf_FLAGCOPYMASK ) ) - && RX_PRECOMP(old_re) - && RX_PRELEN(old_re) == plen + && RX_PRECOMP(old_re) + && RX_PRELEN(old_re) == plen && memEQ(RX_PRECOMP(old_re), exp, plen) - && !runtime_code /* with runtime code, always recompile */ ) + && !runtime_code /* with runtime code, always recompile */ ) { DEBUG_COMPILE_r({ SV *dsv= sv_newmortal(); @@ -7734,9 +7735,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, && initial_charset == REGEX_DEPENDS_CHARSET) { - /* Set to use unicode semantics if the pattern is in utf8 and has the - * 'depends' charset specified, as it means unicode when utf8 */ - set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET); + /* Set to use unicode semantics if the pattern is in utf8 and has the + * 'depends' charset specified, as it means unicode when utf8 */ + set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET); RExC_uni_semantics = 1; } @@ -7744,16 +7745,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (runtime_code) { assert(TAINTING_get || !TAINT_get); - if (TAINT_get) - Perl_croak(aTHX_ "Eval-group in insecure regular expression"); + if (TAINT_get) + Perl_croak(aTHX_ "Eval-group in insecure regular expression"); - if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) { - /* whoops, we have a non-utf8 pattern, whilst run-time code - * got compiled as utf8. Try again with a utf8 pattern */ + if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) { + /* whoops, we have a non-utf8 pattern, whilst run-time code + * got compiled as utf8. Try again with a utf8 pattern */ S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen, pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0); goto redo_parse; - } + } } assert(!pRExC_state->runtime_code_qr); @@ -7828,7 +7829,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RXp_COMPFLAGS(RExC_rx) = orig_rx_flags & RXf_PMf_FLAGCOPYMASK; if (pm_flags & PMf_IS_QR) { - RExC_rxi->code_blocks = pRExC_state->code_blocks; + RExC_rxi->code_blocks = pRExC_state->code_blocks; if (RExC_rxi->code_blocks) { RExC_rxi->code_blocks->refcnt++; } @@ -7870,7 +7871,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_total_parens = RExC_npar; } else if (! MUST_RESTART(flags)) { - ReREFCNT_dec(Rx); + ReREFCNT_dec(Rx); Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile, flags=%#" UVxf, (UV) flags); } @@ -8032,7 +8033,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN; else RExC_seen &= ~REG_TOP_LEVEL_BRANCHES_SEEN; - StructCopy(&zero_scan_data, &data, scan_data_t); + StructCopy(&zero_scan_data, &data, scan_data_t); } #else StructCopy(&zero_scan_data, &data, scan_data_t); @@ -8043,171 +8044,171 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */ if (UTF) - SvUTF8_on(Rx); /* Unicode in it? */ + SvUTF8_on(Rx); /* Unicode in it? */ RExC_rxi->regstclass = NULL; if (RExC_naughty >= TOO_NAUGHTY) /* Probably an expensive pattern. */ - RExC_rx->intflags |= PREGf_NAUGHTY; + RExC_rx->intflags |= PREGf_NAUGHTY; scan = RExC_rxi->program + 1; /* First BRANCH. */ /* testing for BRANCH here tells us whether there is "must appear" data in the pattern. If there is then we can use it for optimisations */ if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)) { /* Only one top-level choice. */ - SSize_t fake; - STRLEN longest_length[2]; - regnode_ssc ch_class; /* pointed to by data */ - int stclass_flag; - SSize_t last_close = 0; /* pointed to by data */ + SSize_t fake; + STRLEN longest_length[2]; + regnode_ssc ch_class; /* pointed to by data */ + int stclass_flag; + SSize_t last_close = 0; /* pointed to by data */ regnode *first= scan; regnode *first_next= regnext(first); int i; - /* - * Skip introductions and multiplicators >= 1 - * so that we can extract the 'meat' of the pattern that must - * match in the large if() sequence following. - * NOTE that EXACT is NOT covered here, as it is normally - * picked up by the optimiser separately. - * - * This is unfortunate as the optimiser isnt handling lookahead - * properly currently. - * - */ - while ((OP(first) == OPEN && (sawopen = 1)) || - /* An OR of *one* alternative - should not happen now. */ - (OP(first) == BRANCH && OP(first_next) != BRANCH) || - /* for now we can't handle lookbehind IFMATCH*/ - (OP(first) == IFMATCH && !first->flags && (sawlookahead = 1)) || - (OP(first) == PLUS) || - (OP(first) == MINMOD) || - /* An {n,m} with n>0 */ - (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) || - (OP(first) == NOTHING && PL_regkind[OP(first_next)] != END )) - { - /* - * the only op that could be a regnode is PLUS, all the rest - * will be regnode_1 or regnode_2. - * + /* + * Skip introductions and multiplicators >= 1 + * so that we can extract the 'meat' of the pattern that must + * match in the large if() sequence following. + * NOTE that EXACT is NOT covered here, as it is normally + * picked up by the optimiser separately. + * + * This is unfortunate as the optimiser isnt handling lookahead + * properly currently. + * + */ + while ((OP(first) == OPEN && (sawopen = 1)) || + /* An OR of *one* alternative - should not happen now. */ + (OP(first) == BRANCH && OP(first_next) != BRANCH) || + /* for now we can't handle lookbehind IFMATCH*/ + (OP(first) == IFMATCH && !first->flags && (sawlookahead = 1)) || + (OP(first) == PLUS) || + (OP(first) == MINMOD) || + /* An {n,m} with n>0 */ + (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) || + (OP(first) == NOTHING && PL_regkind[OP(first_next)] != END )) + { + /* + * the only op that could be a regnode is PLUS, all the rest + * will be regnode_1 or regnode_2. + * * (yves doesn't think this is true) - */ - if (OP(first) == PLUS) - sawplus = 1; + */ + if (OP(first) == PLUS) + sawplus = 1; else { if (OP(first) == MINMOD) sawminmod = 1; - first += regarglen[OP(first)]; + first += regarglen[OP(first)]; } - first = NEXTOPER(first); - first_next= regnext(first); - } + first = NEXTOPER(first); + first_next= regnext(first); + } - /* Starting-point info. */ + /* Starting-point info. */ again: DEBUG_PEEP("first:", first, 0, 0); /* Ignore EXACT as we deal with it later. */ - if (PL_regkind[OP(first)] == EXACT) { - if (! isEXACTFish(OP(first))) { - NOOP; /* Empty, get anchored substr later. */ + if (PL_regkind[OP(first)] == EXACT) { + if (! isEXACTFish(OP(first))) { + NOOP; /* Empty, get anchored substr later. */ } - else - RExC_rxi->regstclass = first; - } + else + RExC_rxi->regstclass = first; + } #ifdef TRIE_STCLASS - else if (PL_regkind[OP(first)] == TRIE && - ((reg_trie_data *)RExC_rxi->data->data[ ARG(first) ])->minlen>0) - { + else if (PL_regkind[OP(first)] == TRIE && + ((reg_trie_data *)RExC_rxi->data->data[ ARG(first) ])->minlen>0) + { /* this can happen only on restudy */ RExC_rxi->regstclass = construct_ahocorasick_from_trie(pRExC_state, (regnode *)first, 0); - } + } #endif - else if (REGNODE_SIMPLE(OP(first))) - RExC_rxi->regstclass = first; - else if (PL_regkind[OP(first)] == BOUND || - PL_regkind[OP(first)] == NBOUND) - RExC_rxi->regstclass = first; - else if (PL_regkind[OP(first)] == BOL) { + else if (REGNODE_SIMPLE(OP(first))) + RExC_rxi->regstclass = first; + else if (PL_regkind[OP(first)] == BOUND || + PL_regkind[OP(first)] == NBOUND) + RExC_rxi->regstclass = first; + else if (PL_regkind[OP(first)] == BOL) { RExC_rx->intflags |= (OP(first) == MBOL ? PREGf_ANCH_MBOL : PREGf_ANCH_SBOL); - first = NEXTOPER(first); - goto again; - } - else if (OP(first) == GPOS) { + first = NEXTOPER(first); + goto again; + } + else if (OP(first) == GPOS) { RExC_rx->intflags |= PREGf_ANCH_GPOS; - first = NEXTOPER(first); - goto again; - } - else if ((!sawopen || !RExC_sawback) && + first = NEXTOPER(first); + goto again; + } + else if ((!sawopen || !RExC_sawback) && !sawlookahead && - (OP(first) == STAR && - PL_regkind[OP(NEXTOPER(first))] == REG_ANY) && + (OP(first) == STAR && + PL_regkind[OP(NEXTOPER(first))] == REG_ANY) && !(RExC_rx->intflags & PREGf_ANCH) && !pRExC_state->code_blocks) - { - /* turn .* into ^.* with an implied $*=1 */ - const int type = - (OP(NEXTOPER(first)) == REG_ANY) + { + /* turn .* into ^.* with an implied $*=1 */ + const int type = + (OP(NEXTOPER(first)) == REG_ANY) ? PREGf_ANCH_MBOL : PREGf_ANCH_SBOL; RExC_rx->intflags |= (type | PREGf_IMPLICIT); - first = NEXTOPER(first); - goto again; - } + first = NEXTOPER(first); + goto again; + } if (sawplus && !sawminmod && !sawlookahead && (!sawopen || !RExC_sawback) - && !pRExC_state->code_blocks) /* May examine pos and $& */ - /* x+ must match at the 1st pos of run of x's */ - RExC_rx->intflags |= PREGf_SKIP; + && !pRExC_state->code_blocks) /* May examine pos and $& */ + /* x+ must match at the 1st pos of run of x's */ + RExC_rx->intflags |= PREGf_SKIP; - /* Scan is after the zeroth branch, first is atomic matcher. */ + /* Scan is after the zeroth branch, first is atomic matcher. */ #ifdef TRIE_STUDY_OPT - DEBUG_PARSE_r( - if (!restudied) + DEBUG_PARSE_r( + if (!restudied) Perl_re_printf( aTHX_ "first at %" IVdf "\n", - (IV)(first - scan + 1)) + (IV)(first - scan + 1)) ); #else - DEBUG_PARSE_r( + DEBUG_PARSE_r( Perl_re_printf( aTHX_ "first at %" IVdf "\n", - (IV)(first - scan + 1)) + (IV)(first - scan + 1)) ); #endif - /* - * If there's something expensive in the r.e., find the - * longest literal string that must appear and make it the - * regmust. Resolve ties in favor of later strings, since - * the regstart check works with the beginning of the r.e. - * and avoiding duplication strengthens checking. Not a - * strong reason, but sufficient in the absence of others. - * [Now we resolve ties in favor of the earlier string if - * it happens that c_offset_min has been invalidated, since the - * earlier string may buy us something the later one won't.] - */ - - data.substrs[0].str = newSVpvs(""); - data.substrs[1].str = newSVpvs(""); - data.last_found = newSVpvs(""); - data.cur_is_floating = 0; /* initially any found substring is fixed */ - ENTER_with_name("study_chunk"); - SAVEFREESV(data.substrs[0].str); - SAVEFREESV(data.substrs[1].str); - SAVEFREESV(data.last_found); - first = scan; - if (!RExC_rxi->regstclass) { - ssc_init(pRExC_state, &ch_class); - data.start_class = &ch_class; - stclass_flag = SCF_DO_STCLASS_AND; - } else /* XXXX Check for BOUND? */ - stclass_flag = 0; - data.last_closep = &last_close; + /* + * If there's something expensive in the r.e., find the + * longest literal string that must appear and make it the + * regmust. Resolve ties in favor of later strings, since + * the regstart check works with the beginning of the r.e. + * and avoiding duplication strengthens checking. Not a + * strong reason, but sufficient in the absence of others. + * [Now we resolve ties in favor of the earlier string if + * it happens that c_offset_min has been invalidated, since the + * earlier string may buy us something the later one won't.] + */ + + data.substrs[0].str = newSVpvs(""); + data.substrs[1].str = newSVpvs(""); + data.last_found = newSVpvs(""); + data.cur_is_floating = 0; /* initially any found substring is fixed */ + ENTER_with_name("study_chunk"); + SAVEFREESV(data.substrs[0].str); + SAVEFREESV(data.substrs[1].str); + SAVEFREESV(data.last_found); + first = scan; + if (!RExC_rxi->regstclass) { + ssc_init(pRExC_state, &ch_class); + data.start_class = &ch_class; + stclass_flag = SCF_DO_STCLASS_AND; + } else /* XXXX Check for BOUND? */ + stclass_flag = 0; + data.last_closep = &last_close; DEBUG_RExC_seen(); /* * MAIN ENTRY FOR study_chunk() FOR m/PATTERN/ * (NO top level branches) */ - minlen = study_chunk(pRExC_state, &first, &minlen, &fake, + minlen = study_chunk(pRExC_state, &first, &minlen, &fake, scan + RExC_size, /* Up to end */ &data, -1, 0, NULL, SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag @@ -8218,15 +8219,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk")); - if ( RExC_total_parens == 1 && !data.cur_is_floating - && data.last_start_min == 0 && data.last_end > 0 - && !RExC_seen_zerolen + if ( RExC_total_parens == 1 && !data.cur_is_floating + && data.last_start_min == 0 && data.last_end > 0 + && !RExC_seen_zerolen && !(RExC_seen & REG_VERBARG_SEEN) && !(RExC_seen & REG_GPOS_SEEN) ){ - RExC_rx->extflags |= RXf_CHECK_ALL; + RExC_rx->extflags |= RXf_CHECK_ALL; } - scan_commit(pRExC_state, &data,&minlen, 0); + scan_commit(pRExC_state, &data,&minlen, 0); /* XXX this is done in reverse order because that's the way the @@ -8263,39 +8264,39 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } } - LEAVE_with_name("study_chunk"); + LEAVE_with_name("study_chunk"); - if (RExC_rxi->regstclass - && (OP(RExC_rxi->regstclass) == REG_ANY || OP(RExC_rxi->regstclass) == SANY)) - RExC_rxi->regstclass = NULL; + if (RExC_rxi->regstclass + && (OP(RExC_rxi->regstclass) == REG_ANY || OP(RExC_rxi->regstclass) == SANY)) + RExC_rxi->regstclass = NULL; - if ((!(RExC_rx->substrs->data[0].substr || RExC_rx->substrs->data[0].utf8_substr) + if ((!(RExC_rx->substrs->data[0].substr || RExC_rx->substrs->data[0].utf8_substr) || RExC_rx->substrs->data[0].min_offset) - && stclass_flag + && stclass_flag && ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING) - && is_ssc_worth_it(pRExC_state, data.start_class)) - { - const U32 n = add_data(pRExC_state, STR_WITH_LEN("f")); + && is_ssc_worth_it(pRExC_state, data.start_class)) + { + const U32 n = add_data(pRExC_state, STR_WITH_LEN("f")); ssc_finalize(pRExC_state, data.start_class); - Newx(RExC_rxi->data->data[n], 1, regnode_ssc); - StructCopy(data.start_class, - (regnode_ssc*)RExC_rxi->data->data[n], - regnode_ssc); - RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n]; - RExC_rx->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ - DEBUG_COMPILE_r({ SV *sv = sv_newmortal(); + Newx(RExC_rxi->data->data[n], 1, regnode_ssc); + StructCopy(data.start_class, + (regnode_ssc*)RExC_rxi->data->data[n], + regnode_ssc); + RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n]; + RExC_rx->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ + DEBUG_COMPILE_r({ SV *sv = sv_newmortal(); regprop(RExC_rx, sv, (regnode*)data.start_class, NULL, pRExC_state); Perl_re_printf( aTHX_ - "synthetic stclass \"%s\".\n", - SvPVX_const(sv));}); + "synthetic stclass \"%s\".\n", + SvPVX_const(sv));}); data.start_class = NULL; - } + } /* A temporary algorithm prefers floated substr to fixed one of * same length to dig more info. */ - i = (longest_length[0] <= longest_length[1]); + i = (longest_length[0] <= longest_length[1]); RExC_rx->substrs->check_ix = i; RExC_rx->check_end_shift = RExC_rx->substrs->data[i].end_shift; RExC_rx->check_substr = RExC_rx->substrs->data[i].substr; @@ -8305,38 +8306,38 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (!i && (RExC_rx->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS))) RExC_rx->intflags |= PREGf_NOSCAN; - if ((RExC_rx->check_substr || RExC_rx->check_utf8) ) { - RExC_rx->extflags |= RXf_USE_INTUIT; - if (SvTAIL(RExC_rx->check_substr ? RExC_rx->check_substr : RExC_rx->check_utf8)) - RExC_rx->extflags |= RXf_INTUIT_TAIL; - } + if ((RExC_rx->check_substr || RExC_rx->check_utf8) ) { + RExC_rx->extflags |= RXf_USE_INTUIT; + if (SvTAIL(RExC_rx->check_substr ? RExC_rx->check_substr : RExC_rx->check_utf8)) + RExC_rx->extflags |= RXf_INTUIT_TAIL; + } - /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere) - if ( (STRLEN)minlen < longest_length[1] ) + /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere) + if ( (STRLEN)minlen < longest_length[1] ) minlen= longest_length[1]; if ( (STRLEN)minlen < longest_length[0] ) minlen= longest_length[0]; */ } else { - /* Several toplevels. Best we can is to set minlen. */ - SSize_t fake; - regnode_ssc ch_class; - SSize_t last_close = 0; + /* Several toplevels. Best we can is to set minlen. */ + SSize_t fake; + regnode_ssc ch_class; + SSize_t last_close = 0; DEBUG_PARSE_r(Perl_re_printf( aTHX_ "\nMulti Top Level\n")); - scan = RExC_rxi->program + 1; - ssc_init(pRExC_state, &ch_class); - data.start_class = &ch_class; - data.last_closep = &last_close; + scan = RExC_rxi->program + 1; + ssc_init(pRExC_state, &ch_class); + data.start_class = &ch_class; + data.last_closep = &last_close; DEBUG_RExC_seen(); /* * MAIN ENTRY FOR study_chunk() FOR m/P1|P2|.../ * (patterns WITH top level branches) */ - minlen = study_chunk(pRExC_state, + minlen = study_chunk(pRExC_state, &scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL, SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied ? SCF_TRIE_DOING_RESTUDY @@ -8345,7 +8346,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, CHECK_RESTUDY_GOTO_butfirst(NOOP); - RExC_rx->check_substr = NULL; + RExC_rx->check_substr = NULL; RExC_rx->check_utf8 = NULL; RExC_rx->substrs->data[0].substr = NULL; RExC_rx->substrs->data[0].utf8_substr = NULL; @@ -8353,25 +8354,25 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_rx->substrs->data[1].utf8_substr = NULL; if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING) - && is_ssc_worth_it(pRExC_state, data.start_class)) + && is_ssc_worth_it(pRExC_state, data.start_class)) { - const U32 n = add_data(pRExC_state, STR_WITH_LEN("f")); + const U32 n = add_data(pRExC_state, STR_WITH_LEN("f")); ssc_finalize(pRExC_state, data.start_class); - Newx(RExC_rxi->data->data[n], 1, regnode_ssc); - StructCopy(data.start_class, - (regnode_ssc*)RExC_rxi->data->data[n], - regnode_ssc); - RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n]; - RExC_rx->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ - DEBUG_COMPILE_r({ SV* sv = sv_newmortal(); + Newx(RExC_rxi->data->data[n], 1, regnode_ssc); + StructCopy(data.start_class, + (regnode_ssc*)RExC_rxi->data->data[n], + regnode_ssc); + RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n]; + RExC_rx->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ + DEBUG_COMPILE_r({ SV* sv = sv_newmortal(); regprop(RExC_rx, sv, (regnode*)data.start_class, NULL, pRExC_state); Perl_re_printf( aTHX_ - "synthetic stclass \"%s\".\n", - SvPVX_const(sv));}); + "synthetic stclass \"%s\".\n", + SvPVX_const(sv));}); data.start_class = NULL; - } + } } if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN) { @@ -8402,16 +8403,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_rx->extflags |= RXf_NO_INPLACE_SUBST; /* inplace might break the lookbehind */ if (pRExC_state->code_blocks) - RExC_rx->extflags |= RXf_EVAL_SEEN; + RExC_rx->extflags |= RXf_EVAL_SEEN; if (RExC_seen & REG_VERBARG_SEEN) { - RExC_rx->intflags |= PREGf_VERBARG_SEEN; + RExC_rx->intflags |= PREGf_VERBARG_SEEN; RExC_rx->extflags |= RXf_NO_INPLACE_SUBST; /* don't understand this! Yves */ } if (RExC_seen & REG_CUTGROUP_SEEN) - RExC_rx->intflags |= PREGf_CUTGROUP_SEEN; + RExC_rx->intflags |= PREGf_CUTGROUP_SEEN; if (pm_flags & PMf_USE_RE_EVAL) - RExC_rx->intflags |= PREGf_USE_RE_EVAL; + RExC_rx->intflags |= PREGf_USE_RE_EVAL; if (RExC_paren_names) RXp_PAREN_NAMES(RExC_rx) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names)); else @@ -8567,7 +8568,7 @@ Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey, SV* Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv, - const U32 flags) + const U32 flags) { SV *ret; struct regexp *const rx = ReANY(r); @@ -8616,9 +8617,9 @@ Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key, if (flags & RXapif_ALL) { return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0); } else { - SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags); + SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags); if (sv) { - SvREFCNT_dec_NN(sv); + SvREFCNT_dec_NN(sv); return TRUE; } else { return FALSE; @@ -8637,11 +8638,11 @@ Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags) PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY; if ( rx && RXp_PAREN_NAMES(rx) ) { - (void)hv_iterinit(RXp_PAREN_NAMES(rx)); + (void)hv_iterinit(RXp_PAREN_NAMES(rx)); - return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY); + return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY); } else { - return FALSE; + return FALSE; } } @@ -8671,7 +8672,7 @@ Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags) } } if (parno || flags & RXapif_ALL) { - return newSVhek(HeKEY_hek(temphe)); + return newSVhek(HeKEY_hek(temphe)); } } } @@ -8695,7 +8696,7 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags) ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES)); av = MUTABLE_AV(SvRV(ret)); length = av_count(av); - SvREFCNT_dec_NN(ret); + SvREFCNT_dec_NN(ret); return newSViv(length); } else { Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar", @@ -8743,7 +8744,7 @@ Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags) void Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, - SV * const sv) + SV * const sv) { struct regexp *const rx = ReANY(r); char *s = NULL; @@ -8782,16 +8783,16 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, && rx->offs[0].start != -1) { /* $`, ${^PREMATCH} */ - i = rx->offs[0].start; - s = rx->subbeg; + i = rx->offs[0].start; + s = rx->subbeg; } else if ((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH) && rx->offs[0].end != -1) { /* $', ${^POSTMATCH} */ - s = rx->subbeg - rx->suboffset + rx->offs[0].end; - i = rx->sublen + rx->suboffset - rx->offs[0].end; + s = rx->subbeg - rx->suboffset + rx->offs[0].end; + i = rx->sublen + rx->suboffset - rx->offs[0].end; } else if (inRANGE(n, 0, (I32)rx->nparens) && @@ -8848,7 +8849,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, void Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren, - SV const * const value) + SV const * const value) { PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE; @@ -8893,32 +8894,32 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv, case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */ case RX_BUFF_IDX_PREMATCH: /* $` */ if (rx->offs[0].start != -1) { - i = rx->offs[0].start; - if (i > 0) { - s1 = 0; - t1 = i; - goto getlen; - } - } + i = rx->offs[0].start; + if (i > 0) { + s1 = 0; + t1 = i; + goto getlen; + } + } return 0; case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */ case RX_BUFF_IDX_POSTMATCH: /* $' */ - if (rx->offs[0].end != -1) { - i = rx->sublen - rx->offs[0].end; - if (i > 0) { - s1 = rx->offs[0].end; - t1 = rx->sublen; - goto getlen; - } - } + if (rx->offs[0].end != -1) { + i = rx->sublen - rx->offs[0].end; + if (i > 0) { + s1 = rx->offs[0].end; + t1 = rx->sublen; + goto getlen; + } + } return 0; default: /* $& / ${^MATCH}, $1, $2, ... */ - if (paren <= (I32)rx->nparens && + if (paren <= (I32)rx->nparens && (s1 = rx->offs[paren].start) != -1 && (t1 = rx->offs[paren].end) != -1) - { + { i = t1 - s1; goto getlen; } else { @@ -8945,11 +8946,11 @@ SV* Perl_reg_qr_package(pTHX_ REGEXP * const rx) { PERL_ARGS_ASSERT_REG_QR_PACKAGE; - PERL_UNUSED_ARG(rx); - if (0) - return NULL; - else - return newSVpvs("Regexp"); + PERL_UNUSED_ARG(rx); + if (0) + return NULL; + else + return newSVpvs("Regexp"); } /* Scans the name of a named buffer from the pattern. @@ -8977,22 +8978,22 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags) else if (isIDFIRST_lazy_if_safe(RExC_parse, RExC_end, UTF)) { /* Note that the code here assumes well-formed UTF-8. Skip IDFIRST by * using do...while */ - if (UTF) - do { - RExC_parse += UTF8SKIP(RExC_parse); - } while ( RExC_parse < RExC_end + if (UTF) + do { + RExC_parse += UTF8SKIP(RExC_parse); + } while ( RExC_parse < RExC_end && isWORDCHAR_utf8_safe((U8*)RExC_parse, (U8*) RExC_end)); - else - do { - RExC_parse++; - } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse)); + else + do { + RExC_parse++; + } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse)); } else { RExC_parse++; /* so the <- from the vFAIL is after the offending character */ vFAIL("Group name must start with a non-digit word character"); } sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start), - SVs_TEMP | (UTF ? SVf_UTF8 : 0)); + SVs_TEMP | (UTF ? SVf_UTF8 : 0)); if ( flags == REG_RSN_RETURN_NAME) return sv_name; else if (flags==REG_RSN_RETURN_DATA) { @@ -9312,7 +9313,7 @@ Perl__new_invlist(pTHX_ IV initial_size) SV* new_list; if (initial_size < 0) { - initial_size = 10; + initial_size = 10; } new_list = newSV_type(SVt_INVLIST); @@ -9358,7 +9359,7 @@ Perl__new_invlist_C_array(pTHX_ const UV* const list) SvPV_set(invlist, (char *) (list + HEADER_LENGTH)); SvLEN_set(invlist, 0); /* Means we own the contents, and the system - shouldn't touch it */ + shouldn't touch it */ *(get_invlist_offset_addr(invlist)) = offset; @@ -9398,39 +9399,39 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, array = _invlist_array_init(invlist, ! offset); } else { - /* Here, the existing list is non-empty. The current max entry in the - * list is generally the first value not in the set, except when the - * set extends to the end of permissible values, in which case it is - * the first entry in that final set, and so this call is an attempt to - * append out-of-order */ - - UV final_element = len - 1; - array = invlist_array(invlist); - if ( array[final_element] > start - || ELEMENT_RANGE_MATCHES_INVLIST(final_element)) - { - Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c", - array[final_element], start, - ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f'); - } + /* Here, the existing list is non-empty. The current max entry in the + * list is generally the first value not in the set, except when the + * set extends to the end of permissible values, in which case it is + * the first entry in that final set, and so this call is an attempt to + * append out-of-order */ + + UV final_element = len - 1; + array = invlist_array(invlist); + if ( array[final_element] > start + || ELEMENT_RANGE_MATCHES_INVLIST(final_element)) + { + Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c", + array[final_element], start, + ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f'); + } /* Here, it is a legal append. If the new range begins 1 above the end * of the range below it, it is extending the range below it, so the * new first value not in the set is one greater than the newly * extended range. */ offset = *get_invlist_offset_addr(invlist); - if (array[final_element] == start) { - if (end != UV_MAX) { - array[final_element] = end + 1; - } - else { - /* But if the end is the maximum representable on the machine, + if (array[final_element] == start) { + if (end != UV_MAX) { + array[final_element] = end + 1; + } + else { + /* But if the end is the maximum representable on the machine, * assume that infinity was actually what was meant. Just let * the range that this would extend to have no end */ - invlist_set_len(invlist, len - 1, offset); - } - return; - } + invlist_set_len(invlist, len - 1, offset); + } + return; + } } /* Here the new range doesn't extend any existing set. Add it */ @@ -9440,27 +9441,27 @@ S__append_range_to_invlist(pTHX_ SV* const invlist, /* If wll overflow the existing space, extend, which may cause the array to * be moved */ if (max < len) { - invlist_extend(invlist, len); + invlist_extend(invlist, len); /* Have to set len here to avoid assert failure in invlist_array() */ invlist_set_len(invlist, len, offset); - array = invlist_array(invlist); + array = invlist_array(invlist); } else { - invlist_set_len(invlist, len, offset); + invlist_set_len(invlist, len, offset); } /* The next item on the list starts the range, the one after that is * one past the new range. */ array[len - 2] = start; if (end != UV_MAX) { - array[len - 1] = end + 1; + array[len - 1] = end + 1; } else { - /* But if the end is the maximum representable on the machine, just let - * the range have no end */ - invlist_set_len(invlist, len - 1, offset); + /* But if the end is the maximum representable on the machine, just let + * the range have no end */ + invlist_set_len(invlist, len - 1, offset); } } @@ -9484,7 +9485,7 @@ Perl__invlist_search(SV* const invlist, const UV cp) /* If list is empty, return failure. */ if (high == 0) { - return -1; + return -1; } /* (We can't get the array unless we know the list is non-empty) */ @@ -9535,20 +9536,20 @@ Perl__invlist_search(SV* const invlist, const UV cp) * The loop below converges on the i+1. Note that there may not be an * (i+1)th element in the array, and things work nonetheless */ while (low < high) { - mid = (low + high) / 2; + mid = (low + high) / 2; assert(mid <= highest_element); - if (array[mid] <= cp) { /* cp >= array[mid] */ - low = mid + 1; + if (array[mid] <= cp) { /* cp >= array[mid] */ + low = mid + 1; - /* We could do this extra test to exit the loop early. - if (cp < array[low]) { - return mid; - } - */ - } - else { /* cp < array[mid] */ - high = mid; - } + /* We could do this extra test to exit the loop early. + if (cp < array[low]) { + return mid; + } + */ + } + else { /* cp < array[mid] */ + high = mid; + } } found_entry: @@ -9681,7 +9682,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, SvREFCNT_dec_NN(u); } - return; + return; } /* Here both lists exist and are non-empty */ @@ -9692,8 +9693,8 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * up so are looking at b's complement. */ if (complement_b) { - /* To complement, we invert: if the first element is 0, remove it. To - * do this, we just pretend the array starts one later */ + /* To complement, we invert: if the first element is 0, remove it. To + * do this, we just pretend the array starts one later */ if (array_b[0] == 0) { array_b++; len_b--; @@ -9718,11 +9719,11 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, /* Go through each input list item by item, stopping when have exhausted * one of them */ while (i_a < len_a && i_b < len_b) { - UV cp; /* The element to potentially add to the union's array */ - bool cp_in_set; /* is it in the input list's set or not */ + UV cp; /* The element to potentially add to the union's array */ + bool cp_in_set; /* is it in the input list's set or not */ - /* We need to take one or the other of the two inputs for the union. - * Since we are merging two sorted lists, we take the smaller of the + /* We need to take one or the other of the two inputs for the union. + * Since we are merging two sorted lists, we take the smaller of the * next items. In case of a tie, we take first the one that is in its * set. If we first took the one not in its set, it would decrement * the count, possibly to 0 which would cause it to be output as ending @@ -9732,33 +9733,33 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * momentarily decremented to 0, and thus the two adjoining ranges will * be seamlessly merged. (In a tie and both are in the set or both not * in the set, it doesn't matter which we take first.) */ - if ( array_a[i_a] < array_b[i_b] - || ( array_a[i_a] == array_b[i_b] - && ELEMENT_RANGE_MATCHES_INVLIST(i_a))) - { - cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a); - cp = array_a[i_a++]; - } - else { - cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b); - cp = array_b[i_b++]; - } - - /* Here, have chosen which of the two inputs to look at. Only output - * if the running count changes to/from 0, which marks the - * beginning/end of a range that's in the set */ - if (cp_in_set) { - if (count == 0) { - array_u[i_u++] = cp; - } - count++; - } - else { - count--; - if (count == 0) { - array_u[i_u++] = cp; - } - } + if ( array_a[i_a] < array_b[i_b] + || ( array_a[i_a] == array_b[i_b] + && ELEMENT_RANGE_MATCHES_INVLIST(i_a))) + { + cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a); + cp = array_a[i_a++]; + } + else { + cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b); + cp = array_b[i_b++]; + } + + /* Here, have chosen which of the two inputs to look at. Only output + * if the running count changes to/from 0, which marks the + * beginning/end of a range that's in the set */ + if (cp_in_set) { + if (count == 0) { + array_u[i_u++] = cp; + } + count++; + } + else { + count--; + if (count == 0) { + array_u[i_u++] = cp; + } + } } @@ -9769,9 +9770,9 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * that list is in its set. (i_a and i_b each currently index the element * beyond the one we care about.) */ if ( (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a)) - || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b))) + || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b))) { - count--; + count--; } /* Above we decremented 'count' if the list that had unexamined elements in @@ -9801,11 +9802,11 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, else { IV copy_count = len_a - i_a; if (copy_count > 0) { /* The non-exhausted input is 'a' */ - Copy(array_a + i_a, array_u + i_u, copy_count, UV); + Copy(array_a + i_a, array_u + i_u, copy_count, UV); } else { /* The non-exhausted input is b */ copy_count = len_b - i_b; - Copy(array_b + i_b, array_u + i_u, copy_count, UV); + Copy(array_b + i_b, array_u + i_u, copy_count, UV); } len_u = i_u + copy_count; } @@ -9814,9 +9815,9 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * array_u, so re-find it. (Note that it is unlikely that this will * change, as we are shrinking the space, not enlarging it) */ if (len_u != _invlist_len(u)) { - invlist_set_len(u, len_u, *get_invlist_offset_addr(u)); - invlist_trim(u); - array_u = invlist_array(u); + invlist_set_len(u, len_u, *get_invlist_offset_addr(u)); + invlist_trim(u); + array_u = invlist_array(u); } if (*output == NULL) { /* Simply return the new inversion list */ @@ -9914,7 +9915,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, } invlist_clear(*i); - return; + return; } /* Here both lists exist and are non-empty */ @@ -9925,8 +9926,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * up so are looking at b's complement. */ if (complement_b) { - /* To complement, we invert: if the first element is 0, remove it. To - * do this, we just pretend the array starts one later */ + /* To complement, we invert: if the first element is 0, remove it. To + * do this, we just pretend the array starts one later */ if (array_b[0] == 0) { array_b++; len_b--; @@ -9951,12 +9952,12 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, /* Go through each list item by item, stopping when have exhausted one of * them */ while (i_a < len_a && i_b < len_b) { - UV cp; /* The element to potentially add to the intersection's - array */ - bool cp_in_set; /* Is it in the input list's set or not */ + UV cp; /* The element to potentially add to the intersection's + array */ + bool cp_in_set; /* Is it in the input list's set or not */ - /* We need to take one or the other of the two inputs for the - * intersection. Since we are merging two sorted lists, we take the + /* We need to take one or the other of the two inputs for the + * intersection. Since we are merging two sorted lists, we take the * smaller of the next items. In case of a tie, we take first the one * that is not in its set (a difference from the union algorithm). If * we first took the one in its set, it would increment the count, @@ -9966,33 +9967,33 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * opposite of this, there is no possibility that the count will be * momentarily incremented to 2. (In a tie and both are in the set or * both not in the set, it doesn't matter which we take first.) */ - if ( array_a[i_a] < array_b[i_b] - || ( array_a[i_a] == array_b[i_b] - && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a))) - { - cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a); - cp = array_a[i_a++]; - } - else { - cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b); - cp= array_b[i_b++]; - } - - /* Here, have chosen which of the two inputs to look at. Only output - * if the running count changes to/from 2, which marks the - * beginning/end of a range that's in the intersection */ - if (cp_in_set) { - count++; - if (count == 2) { - array_r[i_r++] = cp; - } - } - else { - if (count == 2) { - array_r[i_r++] = cp; - } - count--; - } + if ( array_a[i_a] < array_b[i_b] + || ( array_a[i_a] == array_b[i_b] + && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a))) + { + cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a); + cp = array_a[i_a++]; + } + else { + cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b); + cp= array_b[i_b++]; + } + + /* Here, have chosen which of the two inputs to look at. Only output + * if the running count changes to/from 2, which marks the + * beginning/end of a range that's in the intersection */ + if (cp_in_set) { + count++; + if (count == 2) { + array_r[i_r++] = cp; + } + } + else { + if (count == 2) { + array_r[i_r++] = cp; + } + count--; + } } @@ -10005,7 +10006,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, if ( (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a)) || (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b))) { - count++; + count++; } /* Above we incremented 'count' if the exhausted list was in its set. This @@ -10035,11 +10036,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, else { /* copy the non-exhausted list, unchanged. */ IV copy_count = len_a - i_a; if (copy_count > 0) { /* a is the one with stuff left */ - Copy(array_a + i_a, array_r + i_r, copy_count, UV); + Copy(array_a + i_a, array_r + i_r, copy_count, UV); } else { /* b is the one with stuff left */ copy_count = len_b - i_b; - Copy(array_b + i_b, array_r + i_r, copy_count, UV); + Copy(array_b + i_b, array_r + i_r, copy_count, UV); } len_r = i_r + copy_count; } @@ -10048,9 +10049,9 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b, * array_r, so re-find it. (Note that it is unlikely that this will * change, as we are shrinking the space, not enlarging it) */ if (len_r != _invlist_len(r)) { - invlist_set_len(r, len_r, *get_invlist_offset_addr(r)); - invlist_trim(r); - array_r = invlist_array(r); + invlist_set_len(r, len_r, *get_invlist_offset_addr(r)); + invlist_trim(r); + array_r = invlist_array(r); } if (*i == NULL) { /* Simply return the calculated intersection */ @@ -10099,7 +10100,7 @@ Perl__add_range_to_invlist(pTHX_ SV* invlist, UV start, UV end) /* This range becomes the whole inversion list if none already existed */ if (invlist == NULL) { - invlist = _new_invlist(2); + invlist = _new_invlist(2); _append_range_to_invlist(invlist, start, end); return invlist; } @@ -10378,8 +10379,8 @@ Perl__invlist_invert(pTHX_ SV* const invlist) /* The inverse of matching nothing is matching everything */ if (_invlist_len(invlist) == 0) { - _append_range_to_invlist(invlist, 0, UV_MAX); - return; + _append_range_to_invlist(invlist, 0, UV_MAX); + return; } *get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist); @@ -10463,21 +10464,21 @@ S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style) invlist_iterinit(invlist); while (invlist_iternext(invlist, &start, &end)) { - if (end == UV_MAX) { - Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c", + if (end == UV_MAX) { + Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c", start, intra_range_delimiter, inter_range_delimiter); - } - else if (end != start) { - Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c", - start, + } + else if (end != start) { + Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c", + start, intra_range_delimiter, end, inter_range_delimiter); - } - else { - Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c", + } + else { + Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c", start, inter_range_delimiter); - } + } } if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */ @@ -10520,20 +10521,20 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level, invlist_iterinit(invlist); while (invlist_iternext(invlist, &start, &end)) { - if (end == UV_MAX) { - Perl_dump_indent(aTHX_ level, file, + if (end == UV_MAX) { + Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf " .. INFTY\n", indent, (UV)count, start); - } - else if (end != start) { - Perl_dump_indent(aTHX_ level, file, + } + else if (end != start) { + Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n", - indent, (UV)count, start, end); - } - else { - Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n", + indent, (UV)count, start, end); + } + else { + Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n", indent, (UV)count, start); - } + } count += 2; } } @@ -10939,7 +10940,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state) : WASTED_G; if (! (wastedflags & wflagbit) ) { wastedflags |= wflagbit; - /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */ + /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */ vWARN5( RExC_parse + 1, "Useless (%s%c) - %suse /%c modifier", @@ -10959,7 +10960,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state) if (ckWARN(WARN_REGEXP)) { if (! (wastedflags & WASTED_C) ) { wastedflags |= WASTED_GC; - /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */ + /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */ vWARN3( RExC_parse + 1, "Useless (%sc) - %suse /gc modifier", @@ -11020,7 +11021,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state) default: fail_modifiers: RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end); - /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ + /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized", UTF8fARG(UTF, RExC_parse-seqstart, seqstart)); NOT_REACHED; /*NOTREACHED*/ @@ -11171,7 +11172,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) && *(RExC_parse - 1) != '('; if (RExC_parse >= RExC_end) { - vFAIL("Unmatched ("); + vFAIL("Unmatched ("); } if (paren == 'r') { /* Atomic script run */ @@ -11179,10 +11180,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) goto parse_rest; } else if ( *RExC_parse == '*') { /* (*VERB:ARG), (*construct:...) */ - char *start_verb = RExC_parse + 1; - STRLEN verb_len; - char *start_arg = NULL; - unsigned char op = 0; + char *start_verb = RExC_parse + 1; + STRLEN verb_len; + char *start_arg = NULL; + unsigned char op = 0; int arg_required = 0; int internal_argval = -1; /* if >-1 we are not allowed an argument*/ bool has_upper = FALSE; @@ -11199,11 +11200,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) vFAIL("In '(*...)', the '(' and '*' must be adjacent"); } } - while (RExC_parse < RExC_end && *RExC_parse != ')' ) { - if ( *RExC_parse == ':' ) { - start_arg = RExC_parse + 1; - break; - } + while (RExC_parse < RExC_end && *RExC_parse != ')' ) { + if ( *RExC_parse == ':' ) { + start_arg = RExC_parse + 1; + break; + } else if (! UTF) { if (isUPPER(*RExC_parse)) { has_upper = TRUE; @@ -11213,18 +11214,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) else { RExC_parse += UTF8SKIP(RExC_parse); } - } - verb_len = RExC_parse - start_verb; - if ( start_arg ) { + } + verb_len = RExC_parse - start_verb; + if ( start_arg ) { if (RExC_parse >= RExC_end) { goto unterminated_verb_pattern; } - RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1; - while ( RExC_parse < RExC_end && *RExC_parse != ')' ) { + RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1; + while ( RExC_parse < RExC_end && *RExC_parse != ')' ) { RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1; } - if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) { + if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) { unterminated_verb_pattern: if (has_upper) { vFAIL("Unterminated verb pattern argument"); @@ -11233,8 +11234,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) vFAIL("Unterminated '(*...' argument"); } } - } else { - if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) { + } else { + if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) { if (has_upper) { vFAIL("Unterminated verb pattern"); } @@ -11242,29 +11243,29 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) vFAIL("Unterminated '(*...' construct"); } } - } + } /* Here, we know that RExC_parse < RExC_end */ - switch ( *start_verb ) { + switch ( *start_verb ) { case 'A': /* (*ACCEPT) */ if ( memEQs(start_verb, verb_len,"ACCEPT") ) { - op = ACCEPT; - internal_argval = RExC_nestroot; - } - break; + op = ACCEPT; + internal_argval = RExC_nestroot; + } + break; case 'C': /* (*COMMIT) */ if ( memEQs(start_verb, verb_len,"COMMIT") ) op = COMMIT; break; case 'F': /* (*FAIL) */ if ( verb_len==1 || memEQs(start_verb, verb_len,"FAIL") ) { - op = OPFAIL; - } - break; + op = OPFAIL; + } + break; case ':': /* (*:NAME) */ - case 'M': /* (*MARK:NAME) */ - if ( verb_len==0 || memEQs(start_verb, verb_len,"MARK") ) { + case 'M': /* (*MARK:NAME) */ + if ( verb_len==0 || memEQs(start_verb, verb_len,"MARK") ) { op = MARKPOINT; arg_required = 1; } @@ -11421,7 +11422,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) ret=reganode(pRExC_state, OPFAIL, 0); nextchar(pRExC_state); return ret; - } + } RExC_parse = start_arg; goto parse_rest; @@ -11430,11 +11431,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) vFAIL2utf8f( "'(*%" UTF8f "' requires a terminating ':'", UTF8fARG(UTF, verb_len, start_verb)); - NOT_REACHED; /*NOTREACHED*/ + NOT_REACHED; /*NOTREACHED*/ - } /* End of switch */ - if ( ! op ) { - RExC_parse += UTF + } /* End of switch */ + if ( ! op ) { + RExC_parse += UTF ? UTF8_SAFE_SKIP(RExC_parse, RExC_end) : 1; if (has_upper || verb_len == 0) { @@ -11447,7 +11448,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) "Unknown '(*...)' construct '%" UTF8f "'", UTF8fARG(UTF, verb_len, start_verb)); } - } + } if ( RExC_parse == start_arg ) { start_arg = NULL; } @@ -11473,12 +11474,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) } if ( internal_argval != -1 ) ARG2L_SET(REGNODE_p(ret), internal_argval); - nextchar(pRExC_state); - return ret; + nextchar(pRExC_state); + return ret; } else if (*RExC_parse == '?') { /* (?...) */ - bool is_logical = 0; - const char * const seqstart = RExC_parse; + bool is_logical = 0; + const char * const seqstart = RExC_parse; const char * endptr; const char non_existent_group_msg[] = "Reference to nonexistent group"; @@ -11489,24 +11490,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) vFAIL("In '(?...)', the '(' and '?' must be adjacent"); } - RExC_parse++; /* past the '?' */ + RExC_parse++; /* past the '?' */ paren = *RExC_parse; /* might be a trailing NUL, if not well-formed */ RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1; if (RExC_parse > RExC_end) { paren = '\0'; } - ret = 0; /* For look-ahead/behind. */ - switch (paren) { + ret = 0; /* For look-ahead/behind. */ + switch (paren) { - case 'P': /* (?P...) variants for those used to PCRE/Python */ - paren = *RExC_parse; - if ( paren == '<') { /* (?P<...>) named capture */ + case 'P': /* (?P...) variants for those used to PCRE/Python */ + paren = *RExC_parse; + if ( paren == '<') { /* (?P<...>) named capture */ RExC_parse++; if (RExC_parse >= RExC_end) { vFAIL("Sequence (?P<... not terminated"); } - goto named_capture; + goto named_capture; } else if (paren == '>') { /* (?P>name) named recursion */ RExC_parse++; @@ -11522,33 +11523,33 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) } RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end); /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ - vFAIL3("Sequence (%.*s...) not recognized", + vFAIL3("Sequence (%.*s...) not recognized", (int) (RExC_parse - seqstart), seqstart); - NOT_REACHED; /*NOTREACHED*/ + NOT_REACHED; /*NOTREACHED*/ case '<': /* (?<...) */ /* If you want to support (?<*...), first reconcile with GH #17363 */ - if (*RExC_parse == '!') - paren = ','; - else if (*RExC_parse != '=') + if (*RExC_parse == '!') + paren = ','; + else if (*RExC_parse != '=') named_capture: - { /* (?<...>) */ - char *name_start; - SV *svname; - paren= '>'; + { /* (?<...>) */ + char *name_start; + SV *svname; + paren= '>'; /* FALLTHROUGH */ case '\'': /* (?'...') */ name_start = RExC_parse; svname = reg_scan_name(pRExC_state, REG_RSN_RETURN_NAME); - if ( RExC_parse == name_start + if ( RExC_parse == name_start || RExC_parse >= RExC_end || *RExC_parse != paren) { - vFAIL2("Sequence (?%c... not terminated", - paren=='>' ? '<' : (char) paren); + vFAIL2("Sequence (?%c... not terminated", + paren=='>' ? '<' : (char) paren); } - { - HE *he_str; - SV *sv_dat = NULL; + { + HE *he_str; + SV *sv_dat = NULL; if (!svname) /* shouldn't happen */ Perl_croak(aTHX_ "panic: reg_scan_name returned NULL"); @@ -11607,56 +11608,56 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) /*sv_dump(sv_dat);*/ } nextchar(pRExC_state); - paren = 1; - goto capturing_parens; - } + paren = 1; + goto capturing_parens; + } RExC_seen |= REG_LOOKBEHIND_SEEN; - RExC_in_lookaround++; - RExC_parse++; + RExC_in_lookaround++; + RExC_parse++; if (RExC_parse >= RExC_end) { vFAIL("Sequence (?... not terminated"); } RExC_seen_zerolen++; break; - case '=': /* (?=...) */ - RExC_seen_zerolen++; + case '=': /* (?=...) */ + RExC_seen_zerolen++; RExC_in_lookaround++; break; - case '!': /* (?!...) */ - RExC_seen_zerolen++; - /* check if we're really just a "FAIL" assertion */ + case '!': /* (?!...) */ + RExC_seen_zerolen++; + /* check if we're really just a "FAIL" assertion */ skip_to_be_ignored_text(pRExC_state, &RExC_parse, FALSE /* Don't force to /x */ ); - if (*RExC_parse == ')') { + if (*RExC_parse == ')') { ret=reganode(pRExC_state, OPFAIL, 0); - nextchar(pRExC_state); - return ret; - } + nextchar(pRExC_state); + return ret; + } RExC_in_lookaround++; - break; - case '|': /* (?|...) */ - /* branch reset, behave like a (?:...) except that - buffers in alternations share the same numbers */ - paren = ':'; - after_freeze = freeze_paren = RExC_npar; + break; + case '|': /* (?|...) */ + /* branch reset, behave like a (?:...) except that + buffers in alternations share the same numbers */ + paren = ':'; + after_freeze = freeze_paren = RExC_npar; /* XXX This construct currently requires an extra pass. * Investigation would be required to see if that could be * changed */ REQUIRE_PARENS_PASS; - break; - case ':': /* (?:...) */ - case '>': /* (?>...) */ - break; - case '$': /* (?$...) */ - case '@': /* (?@...) */ - vFAIL2("Sequence (?%c...) not implemented", (int)paren); - break; - case '0' : /* (?0) */ - case 'R' : /* (?R) */ + break; + case ':': /* (?:...) */ + case '>': /* (?>...) */ + break; + case '$': /* (?$...) */ + case '@': /* (?@...) */ + vFAIL2("Sequence (?%c...) not implemented", (int)paren); + break; + case '0' : /* (?0) */ + case 'R' : /* (?R) */ if (RExC_parse == RExC_end || *RExC_parse != ')') - FAIL("Sequence (?R) not terminated"); + FAIL("Sequence (?R) not terminated"); num = 0; RExC_seen |= REG_RECURSE_SEEN; @@ -11664,9 +11665,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) * It probably could be changed */ REQUIRE_PARENS_PASS; - *flagp |= POSTPONED; + *flagp |= POSTPONED; goto gen_recurse_regop; - /*notreached*/ + /*notreached*/ /* named and numeric backreferences */ case '&': /* (?&NAME) */ parse_start = RExC_parse - 1; @@ -11694,8 +11695,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) } /* FALLTHROUGH */ case '1': case '2': case '3': case '4': /* (?1) */ - case '5': case '6': case '7': case '8': case '9': - RExC_parse = (char *) seqstart + 1; /* Point to the digit */ + case '5': case '6': case '7': case '8': case '9': + RExC_parse = (char *) seqstart + 1; /* Point to the digit */ parse_recursion: { bool is_neg = FALSE; @@ -11725,8 +11726,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) num = -num; } } - if (*RExC_parse!=')') - vFAIL("Expecting close bracket"); + if (*RExC_parse!=')') + vFAIL("Expecting close bracket"); gen_recurse_regop: if (paren == '-' || paren == '+') { @@ -11801,7 +11802,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) Set_Node_Length(REGNODE_p(ret), 1 + regarglen[OP(REGNODE_p(ret))]); /* MJD */ - Set_Node_Offset(REGNODE_p(ret), parse_start); /* MJD */ + Set_Node_Offset(REGNODE_p(ret), parse_start); /* MJD */ *flagp |= POSTPONED; assert(*RExC_parse == ')'); @@ -11810,43 +11811,43 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) /* NOTREACHED */ - case '?': /* (??...) */ - is_logical = 1; - if (*RExC_parse != '{') { + case '?': /* (??...) */ + is_logical = 1; + if (*RExC_parse != '{') { RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end); /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */ vFAIL2utf8f( "Sequence (%" UTF8f "...) not recognized", UTF8fARG(UTF, RExC_parse-seqstart, seqstart)); - NOT_REACHED; /*NOTREACHED*/ - } - *flagp |= POSTPONED; - paren = '{'; + NOT_REACHED; /*NOTREACHED*/ + } + *flagp |= POSTPONED; + paren = '{'; RExC_parse++; - /* FALLTHROUGH */ - case '{': /* (?{...}) */ - { - U32 n = 0; - struct reg_code_block *cb; + /* FALLTHROUGH */ + case '{': /* (?{...}) */ + { + U32 n = 0; + struct reg_code_block *cb; OP * o; - RExC_seen_zerolen++; + RExC_seen_zerolen++; - if ( !pRExC_state->code_blocks - || pRExC_state->code_index + if ( !pRExC_state->code_blocks + || pRExC_state->code_index >= pRExC_state->code_blocks->count - || pRExC_state->code_blocks->cb[pRExC_state->code_index].start - != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0)) - - RExC_start) - ) { - if (RExC_pm_flags & PMf_USE_RE_EVAL) - FAIL("panic: Sequence (?{...}): no code block found\n"); - FAIL("Eval-group not allowed at runtime, use re 'eval'"); - } - /* this is a pre-compiled code block (?{...}) */ - cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index]; - RExC_parse = RExC_start + cb->end; - o = cb->block; + || pRExC_state->code_blocks->cb[pRExC_state->code_index].start + != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0)) + - RExC_start) + ) { + if (RExC_pm_flags & PMf_USE_RE_EVAL) + FAIL("panic: Sequence (?{...}): no code block found\n"); + FAIL("Eval-group not allowed at runtime, use re 'eval'"); + } + /* this is a pre-compiled code block (?{...}) */ + cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index]; + RExC_parse = RExC_start + cb->end; + o = cb->block; if (cb->src_regex) { n = add_data(pRExC_state, STR_WITH_LEN("rl")); RExC_rxi->data->data[n] = @@ -11858,12 +11859,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) (RExC_pm_flags & PMf_HAS_CV) ? "L" : "l", 1); RExC_rxi->data->data[n] = (void*)o; } - pRExC_state->code_index++; - nextchar(pRExC_state); + pRExC_state->code_index++; + nextchar(pRExC_state); - if (is_logical) { + if (is_logical) { regnode_offset eval; - ret = reg_node(pRExC_state, LOGICAL); + ret = reg_node(pRExC_state, LOGICAL); eval = reg2Lanode(pRExC_state, EVAL, n, @@ -11877,24 +11878,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) REQUIRE_BRANCHJ(flagp, 0); } /* deal with the length of this later - MJD */ - return ret; - } - ret = reg2Lanode(pRExC_state, EVAL, n, 0); - Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1); - Set_Node_Offset(REGNODE_p(ret), parse_start); - return ret; - } - case '(': /* (?(?{...})...) and (?(?=...)...) */ - { - int is_define= 0; + return ret; + } + ret = reg2Lanode(pRExC_state, EVAL, n, 0); + Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1); + Set_Node_Offset(REGNODE_p(ret), parse_start); + return ret; + } + case '(': /* (?(?{...})...) and (?(?=...)...) */ + { + int is_define= 0; const int DEFINE_len = sizeof("DEFINE") - 1; - if ( RExC_parse < RExC_end - 1 + if ( RExC_parse < RExC_end - 1 && ( ( RExC_parse[0] == '?' /* (?(?...)) */ && ( RExC_parse[1] == '=' || RExC_parse[1] == '!' || RExC_parse[1] == '<' || RExC_parse[1] == '{')) - || ( RExC_parse[0] == '*' /* (?(*...)) */ + || ( RExC_parse[0] == '*' /* (?(*...)) */ && ( memBEGINs(RExC_parse + 1, (Size_t) (RExC_end - (RExC_parse + 1)), "pla:") @@ -11933,14 +11934,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) } goto insert_if; } - else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */ - || RExC_parse[0] == '\'' ) /* (?('NAME')...) */ - { - char ch = RExC_parse[0] == '<' ? '>' : '\''; - char *name_start= RExC_parse++; - U32 num = 0; - SV *sv_dat=reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA); - if ( RExC_parse == name_start + else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */ + || RExC_parse[0] == '\'' ) /* (?('NAME')...) */ + { + char ch = RExC_parse[0] == '<' ? '>' : '\''; + char *name_start= RExC_parse++; + U32 num = 0; + SV *sv_dat=reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA); + if ( RExC_parse == name_start || RExC_parse >= RExC_end || *RExC_parse != ch) { @@ -11955,23 +11956,23 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) } ret = reganode(pRExC_state, GROUPPN, num); goto insert_if_check_paren; - } - else if (memBEGINs(RExC_parse, + } + else if (memBEGINs(RExC_parse, (STRLEN) (RExC_end - RExC_parse), "DEFINE")) { - ret = reganode(pRExC_state, DEFINEP, 0); - RExC_parse += DEFINE_len; - is_define = 1; - goto insert_if_check_paren; - } - else if (RExC_parse[0] == 'R') { - RExC_parse++; + ret = reganode(pRExC_state, DEFINEP, 0); + RExC_parse += DEFINE_len; + is_define = 1; + goto insert_if_check_paren; + } + else if (RExC_parse[0] == 'R') { + RExC_parse++; /* parno == 0 => /(?(R)YES|NO)/ "in any form of recursion OR eval" * parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)" * parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)" */ - parno = 0; + parno = 0; if (RExC_parse[0] == '0') { parno = 1; RExC_parse++; @@ -11986,20 +11987,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) RExC_parse = (char*)endptr; } /* else "Switch condition not recognized" below */ - } else if (RExC_parse[0] == '&') { - SV *sv_dat; - RExC_parse++; - sv_dat = reg_scan_name(pRExC_state, + } else if (RExC_parse[0] == '&') { + SV *sv_dat; + RExC_parse++; + sv_dat = reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA); if (sv_dat) parno = 1 + *((I32 *)SvPVX(sv_dat)); - } - ret = reganode(pRExC_state, INSUBP, parno); - goto insert_if_check_paren; - } + } + ret = reganode(pRExC_state, INSUBP, parno); + goto insert_if_check_paren; + } else if (inRANGE(RExC_parse[0], '1', '9')) { /* (?(1)...) */ - char c; + char c; UV uv; endptr = RExC_end; if (grok_atoUV(RExC_parse, &uv, &endptr) @@ -12014,21 +12015,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) ret = reganode(pRExC_state, GROUPP, parno); insert_if_check_paren: - if (UCHARAT(RExC_parse) != ')') { + if (UCHARAT(RExC_parse) != ')') { RExC_parse += UTF ? UTF8_SAFE_SKIP(RExC_parse, RExC_end) : 1; - vFAIL("Switch condition not recognized"); - } - nextchar(pRExC_state); - insert_if: + vFAIL("Switch condition not recognized"); + } + nextchar(pRExC_state); + insert_if: if (! REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 0))) { REQUIRE_BRANCHJ(flagp, 0); } br = regbranch(pRExC_state, &flags, 1, depth+1); - if (br == 0) { + if (br == 0) { RETURN_FAIL_ON_RESTART(flags,flagp); FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags); @@ -12038,13 +12039,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) { REQUIRE_BRANCHJ(flagp, 0); } - c = UCHARAT(RExC_parse); + c = UCHARAT(RExC_parse); nextchar(pRExC_state); - if (flags&HASWIDTH) - *flagp |= HASWIDTH; - if (c == '|') { - if (is_define) - vFAIL("(?(DEFINE)....) does not allow branches"); + if (flags&HASWIDTH) + *flagp |= HASWIDTH; + if (c == '|') { + if (is_define) + vFAIL("(?(DEFINE)....) does not allow branches"); /* Fake one for optimizer. */ lastbr = reganode(pRExC_state, IFTHEN, 0); @@ -12058,23 +12059,23 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) REQUIRE_BRANCHJ(flagp, 0); } if (flags&HASWIDTH) - *flagp |= HASWIDTH; + *flagp |= HASWIDTH; c = UCHARAT(RExC_parse); nextchar(pRExC_state); - } - else - lastbr = 0; + } + else + lastbr = 0; if (c != ')') { if (RExC_parse >= RExC_end) vFAIL("Switch (?(condition)... not terminated"); else vFAIL("Switch (?(condition)... contains too many branches"); } - ender = reg_node(pRExC_state, TAIL); + ender = reg_node(pRExC_state, TAIL); if (! REGTAIL(pRExC_state, br, ender)) { REQUIRE_BRANCHJ(flagp, 0); } - if (lastbr) { + if (lastbr) { if (! REGTAIL(pRExC_state, lastbr, ender)) { REQUIRE_BRANCHJ(flagp, 0); } @@ -12086,8 +12087,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) { REQUIRE_BRANCHJ(flagp, 0); } - } - else + } + else if (! REGTAIL(pRExC_state, ret, ender)) { REQUIRE_BRANCHJ(flagp, 0); } @@ -12096,18 +12097,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) For large programs it seems to be required but I can't figure out why. -- dmq*/ #endif - return ret; - } + return ret; + } RExC_parse += UTF ? UTF8_SAFE_SKIP(RExC_parse, RExC_end) : 1; vFAIL("Unknown switch condition (?(...))"); - } - case '[': /* (?[ ... ]) */ + } + case '[': /* (?[ ... ]) */ return handle_regex_sets(pRExC_state, NULL, flagp, depth+1, oregcomp_parse); case 0: /* A NUL */ - RExC_parse--; /* for vFAIL to print correctly */ + RExC_parse--; /* for vFAIL to print correctly */ vFAIL("Sequence (? incomplete"); break; @@ -12117,11 +12118,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) } /* FALLTHROUGH */ case '*': /* If you want to support (?*...), first reconcile with GH #17363 */ - /* FALLTHROUGH */ - default: /* e.g., (?i) */ - RExC_parse = (char *) seqstart + 1; + /* FALLTHROUGH */ + default: /* e.g., (?i) */ + RExC_parse = (char *) seqstart + 1; parse_flags: - parse_lparen_question_flags(pRExC_state); + parse_lparen_question_flags(pRExC_state); if (UCHARAT(RExC_parse) != ':') { if (RExC_parse < RExC_end) nextchar(pRExC_state); @@ -12133,11 +12134,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) ret = 0; goto parse_rest; } /* end switch */ - } + } else if (!(RExC_flags & RXf_PMf_NOCAPTURE)) { /* (...) */ - capturing_parens: - parno = RExC_npar; - RExC_npar++; + capturing_parens: + parno = RExC_npar; + RExC_npar++; if (! ALL_PARENS_COUNTED) { /* If we are in our first pass through (and maybe only pass), * we need to allocate memory for the capturing parentheses @@ -12182,7 +12183,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) } } - ret = reganode(pRExC_state, OPEN, parno); + ret = reganode(pRExC_state, OPEN, parno); if (!RExC_nestroot) RExC_nestroot = parno; if (RExC_open_parens && !RExC_open_parens[parno]) @@ -12196,15 +12197,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) Set_Node_Length(REGNODE_p(ret), 1); /* MJD */ Set_Node_Offset(REGNODE_p(ret), RExC_parse); /* MJD */ - is_open = 1; - } else { + is_open = 1; + } else { /* with RXf_PMf_NOCAPTURE treat (...) as (?:...) */ paren = ':'; - ret = 0; + ret = 0; } } else /* ! paren */ - ret = 0; + ret = 0; parse_rest: /* Pick up the branches, linking them together. */ @@ -12218,18 +12219,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags); } if (*RExC_parse == '|') { - if (RExC_use_BRANCHJ) { - reginsert(pRExC_state, BRANCHJ, br, depth+1); - } - else { /* MJD */ - reginsert(pRExC_state, BRANCH, br, depth+1); + if (RExC_use_BRANCHJ) { + reginsert(pRExC_state, BRANCHJ, br, depth+1); + } + else { /* MJD */ + reginsert(pRExC_state, BRANCH, br, depth+1); Set_Node_Length(REGNODE_p(br), paren != 0); Set_Node_Offset_To_R(br, parse_start-RExC_start); } - have_branch = 1; + have_branch = 1; } else if (paren == ':') { - *flagp |= flags&SIMPLE; + *flagp |= flags&SIMPLE; } if (is_open) { /* Starts with OPEN. */ if (! REGTAIL(pRExC_state, ret, br)) { /* OPEN -> first. */ @@ -12237,82 +12238,82 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) } } else if (paren != '?') /* Not Conditional */ - ret = br; + ret = br; *flagp |= flags & (HASWIDTH | POSTPONED); lastbr = br; while (*RExC_parse == '|') { - if (RExC_use_BRANCHJ) { + if (RExC_use_BRANCHJ) { bool shut_gcc_up; - ender = reganode(pRExC_state, LONGJMP, 0); + ender = reganode(pRExC_state, LONGJMP, 0); /* Append to the previous. */ shut_gcc_up = REGTAIL(pRExC_state, REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))), ender); PERL_UNUSED_VAR(shut_gcc_up); - } - nextchar(pRExC_state); - if (freeze_paren) { - if (RExC_npar > after_freeze) - after_freeze = RExC_npar; + } + nextchar(pRExC_state); + if (freeze_paren) { + if (RExC_npar > after_freeze) + after_freeze = RExC_npar; RExC_npar = freeze_paren; } br = regbranch(pRExC_state, &flags, 0, depth+1); - if (br == 0) { + if (br == 0) { RETURN_FAIL_ON_RESTART(flags, flagp); FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags); } if (! REGTAIL(pRExC_state, lastbr, br)) { /* BRANCH -> BRANCH. */ REQUIRE_BRANCHJ(flagp, 0); } - lastbr = br; - *flagp |= flags & (HASWIDTH | POSTPONED); + lastbr = br; + *flagp |= flags & (HASWIDTH | POSTPONED); } if (have_branch || paren != ':') { regnode * br; - /* Make a closing node, and hook it on the end. */ - switch (paren) { - case ':': - ender = reg_node(pRExC_state, TAIL); - break; - case 1: case 2: - ender = reganode(pRExC_state, CLOSE, parno); + /* Make a closing node, and hook it on the end. */ + switch (paren) { + case ':': + ender = reg_node(pRExC_state, TAIL); + break; + case 1: case 2: + ender = reganode(pRExC_state, CLOSE, parno); if ( RExC_close_parens ) { DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_ "%*s%*s Setting close paren #%" IVdf " to %zu\n", 22, "| |", (int)(depth * 2 + 1), "", (IV)parno, ender)); RExC_close_parens[parno]= ender; - if (RExC_nestroot == parno) - RExC_nestroot = 0; - } + if (RExC_nestroot == parno) + RExC_nestroot = 0; + } Set_Node_Offset(REGNODE_p(ender), RExC_parse+1); /* MJD */ Set_Node_Length(REGNODE_p(ender), 1); /* MJD */ - break; - case 's': - ender = reg_node(pRExC_state, SRCLOSE); + break; + case 's': + ender = reg_node(pRExC_state, SRCLOSE); RExC_in_script_run = 0; - break; - case '<': + break; + case '<': case 'a': case 'A': case 'b': case 'B': - case ',': - case '=': - case '!': - *flagp &= ~HASWIDTH; - /* FALLTHROUGH */ + case ',': + case '=': + case '!': + *flagp &= ~HASWIDTH; + /* FALLTHROUGH */ case 't': /* aTomic */ - case '>': - ender = reg_node(pRExC_state, SUCCEED); - break; - case 0: - ender = reg_node(pRExC_state, END); + case '>': + ender = reg_node(pRExC_state, SUCCEED); + break; + case 0: + ender = reg_node(pRExC_state, END); assert(!RExC_end_op); /* there can only be one! */ RExC_end_op = REGNODE_p(ender); if (RExC_close_parens) { @@ -12323,8 +12324,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) RExC_close_parens[0]= ender; } - break; - } + break; + } DEBUG_PARSE_r({ DEBUG_PARSE_MSG("lsbr"); regprop(RExC_rx, RExC_mysv1, REGNODE_p(lastbr), NULL, pRExC_state); @@ -12341,15 +12342,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) REQUIRE_BRANCHJ(flagp, 0); } - if (have_branch) { + if (have_branch) { char is_nothing= 1; - if (depth==1) + if (depth==1) RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN; - /* Hook the tails of the branches to the closing node. */ - for (br = REGNODE_p(ret); br; br = regnext(br)) { - const U8 op = PL_regkind[OP(br)]; - if (op == BRANCH) { + /* Hook the tails of the branches to the closing node. */ + for (br = REGNODE_p(ret); br; br = regnext(br)) { + const U8 op = PL_regkind[OP(br)]; + if (op == BRANCH) { if (! REGTAIL_STUDY(pRExC_state, REGNODE_OFFSET(NEXTOPER(br)), ender)) @@ -12359,8 +12360,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) if ( OP(NEXTOPER(br)) != NOTHING || regnext(NEXTOPER(br)) != REGNODE_p(ender)) is_nothing= 0; - } - else if (op == BRANCHJ) { + } + else if (op == BRANCHJ) { bool shut_gcc_up = REGTAIL_STUDY(pRExC_state, REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))), ender); @@ -12370,8 +12371,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) || regnext(NEXTOPER(NEXTOPER(br))) != REGNODE_p(ender)) */ is_nothing= 0; - } - } + } + } if (is_nothing) { regnode * ret_as_regnode = REGNODE_p(ret); br= PL_regkind[OP(ret_as_regnode)] != BRANCH @@ -12402,7 +12403,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) NEXT_OFF(br)= REGNODE_p(ender) - br; } } - } + } } { @@ -12411,47 +12412,47 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth) static const char parens[] = "=!aA<,>Bbt"; /* flag below is set to 0 up through 'A'; 1 for larger */ - if (paren && (p = strchr(parens, paren))) { - U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH; - int flag = (p - parens) > 3; + if (paren && (p = strchr(parens, paren))) { + U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH; + int flag = (p - parens) > 3; - if (paren == '>' || paren == 't') { - node = SUSPEND, flag = 0; + if (paren == '>' || paren == 't') { + node = SUSPEND, flag = 0; } - reginsert(pRExC_state, node, ret, depth+1); + reginsert(pRExC_state, node, ret, depth+1); Set_Node_Cur_Length(REGNODE_p(ret), parse_start); - Set_Node_Offset(REGNODE_p(ret), parse_start + 1); - FLAGS(REGNODE_p(ret)) = flag; + Set_Node_Offset(REGNODE_p(ret), parse_start + 1); + FLAGS(REGNODE_p(ret)) = flag; if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL))) { REQUIRE_BRANCHJ(flagp, 0); } - } + } } /* Check for proper termination. */ if (paren) { /* restore original flags, but keep (?p) and, if we've encountered * something in the parse that changes /d rules into /u, keep the /u */ - RExC_flags = oregflags | (RExC_flags & RXf_PMf_KEEPCOPY); + RExC_flags = oregflags | (RExC_flags & RXf_PMf_KEEPCOPY); if (DEPENDS_SEMANTICS && toUSE_UNI_CHARSET_NOT_DEPENDS) { set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET); } - if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') { - RExC_parse = oregcomp_parse; - vFAIL("Unmatched ("); - } - nextchar(pRExC_state); + if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') { + RExC_parse = oregcomp_parse; + vFAIL("Unmatched ("); + } + nextchar(pRExC_state); } else if (!paren && RExC_parse < RExC_end) { - if (*RExC_parse == ')') { - RExC_parse++; - vFAIL("Unmatched )"); - } - else - FAIL("Junk on end of regexp"); /* "Can't happen". */ - NOT_REACHED; /* NOTREACHED */ + if (*RExC_parse == ')') { + RExC_parse++; + vFAIL("Unmatched )"); + } + else + FAIL("Junk on end of regexp"); /* "Can't happen". */ + NOT_REACHED; /* NOTREACHED */ } if (after_freeze > RExC_npar) @@ -12488,12 +12489,12 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) DEBUG_PARSE("brnc"); if (first) - ret = 0; + ret = 0; else { - if (RExC_use_BRANCHJ) - ret = reganode(pRExC_state, BRANCHJ, 0); - else { - ret = reg_node(pRExC_state, BRANCH); + if (RExC_use_BRANCHJ) + ret = reganode(pRExC_state, BRANCHJ, 0); + else { + ret = reg_node(pRExC_state, BRANCH); Set_Node_Length(REGNODE_p(ret), 1); } } @@ -12503,38 +12504,38 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) skip_to_be_ignored_text(pRExC_state, &RExC_parse, FALSE /* Don't force to /x */ ); while (RExC_parse < RExC_end && *RExC_parse != '|' && *RExC_parse != ')') { - flags &= ~TRYAGAIN; + flags &= ~TRYAGAIN; latest = regpiece(pRExC_state, &flags, depth+1); - if (latest == 0) { - if (flags & TRYAGAIN) - continue; + if (latest == 0) { + if (flags & TRYAGAIN) + continue; RETURN_FAIL_ON_RESTART(flags, flagp); FAIL2("panic: regpiece returned failure, flags=%#" UVxf, (UV) flags); - } - else if (ret == 0) + } + else if (ret == 0) ret = latest; - *flagp |= flags&(HASWIDTH|POSTPONED); - if (chain != 0) { - /* FIXME adding one for every branch after the first is probably - * excessive now we have TRIE support. (hv) */ - MARK_NAUGHTY(1); + *flagp |= flags&(HASWIDTH|POSTPONED); + if (chain != 0) { + /* FIXME adding one for every branch after the first is probably + * excessive now we have TRIE support. (hv) */ + MARK_NAUGHTY(1); if (! REGTAIL(pRExC_state, chain, latest)) { /* XXX We could just redo this branch, but figuring out what * bookkeeping needs to be reset is a pain, and it's likely * that other branches that goto END will also be too large */ REQUIRE_BRANCHJ(flagp, 0); } - } - chain = latest; - c++; + } + chain = latest; + c++; } if (chain == 0) { /* Loop ran zero times. */ - chain = reg_node(pRExC_state, NOTHING); - if (ret == 0) - ret = chain; + chain = reg_node(pRExC_state, NOTHING); + if (ret == 0) + ret = chain; } if (c == 1) { - *flagp |= flags&SIMPLE; + *flagp |= flags&SIMPLE; } return ret; @@ -12551,15 +12552,15 @@ Perl_regcurly(const char *s) PERL_ARGS_ASSERT_REGCURLY; if (*s++ != '{') - return FALSE; + return FALSE; if (!isDIGIT(*s)) - return FALSE; + return FALSE; while (isDIGIT(*s)) - s++; + s++; if (*s == ',') { - s++; - while (isDIGIT(*s)) - s++; + s++; + while (isDIGIT(*s)) + s++; } return *s == '}'; @@ -12842,7 +12843,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) } /* Forbid extra quantifiers */ - if (ISMULT2(RExC_parse)) { + if (isQUANTIFIER(RExC_parse, RExC_end)) { RExC_parse++; vFAIL("Nested quantifiers"); } @@ -13351,7 +13352,7 @@ S_new_regcurly(const char *s, const char *e) PERL_ARGS_ASSERT_NEW_REGCURLY; if (s >= e || *s++ != '{') - return FALSE; + return FALSE; while (s < e && isSPACE(*s)) { s++; @@ -13365,7 +13366,7 @@ S_new_regcurly(const char *s, const char *e) } if (*s == ',') { - s++; + s++; while (s < e && isSPACE(*s)) { s++; } @@ -13423,36 +13424,36 @@ S_backref_value(char *p, char *e) A summary of the code structure is: switch (first_byte) { - cases for each special: - handle this special; - break; - case '\\': - switch (2nd byte) { - cases for each unambiguous special: - handle this special; - break; - cases for each ambigous special/literal: - disambiguate; - if (special) handle here - else goto defchar; - default: // unambiguously literal: - goto defchar; - } - default: // is a literal char - // FALL THROUGH - defchar: - create EXACTish node for literal; - while (more input and node isn't full) { - switch (input_byte) { - cases for each special; + cases for each special: + handle this special; + break; + case '\\': + switch (2nd byte) { + cases for each unambiguous special: + handle this special; + break; + cases for each ambigous special/literal: + disambiguate; + if (special) handle here + else goto defchar; + default: // unambiguously literal: + goto defchar; + } + default: // is a literal char + // FALL THROUGH + defchar: + create EXACTish node for literal; + while (more input and node isn't full) { + switch (input_byte) { + cases for each special; make sure parse pointer is set so that the next call to regatom will see this special first goto loopdone; // EXACTish node terminated by prev. char - default: - append char to EXACTISH node; - } - get next input byte; - } + default: + append char to EXACTISH node; + } + get next input byte; + } loopdone: } return the generated node; @@ -13486,37 +13487,37 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) assert(RExC_parse < RExC_end); switch ((U8)*RExC_parse) { case '^': - RExC_seen_zerolen++; - nextchar(pRExC_state); - if (RExC_flags & RXf_PMf_MULTILINE) - ret = reg_node(pRExC_state, MBOL); - else - ret = reg_node(pRExC_state, SBOL); + RExC_seen_zerolen++; + nextchar(pRExC_state); + if (RExC_flags & RXf_PMf_MULTILINE) + ret = reg_node(pRExC_state, MBOL); + else + ret = reg_node(pRExC_state, SBOL); Set_Node_Length(REGNODE_p(ret), 1); /* MJD */ - break; + break; case '$': - nextchar(pRExC_state); - if (*RExC_parse) - RExC_seen_zerolen++; - if (RExC_flags & RXf_PMf_MULTILINE) - ret = reg_node(pRExC_state, MEOL); - else - ret = reg_node(pRExC_state, SEOL); + nextchar(pRExC_state); + if (*RExC_parse) + RExC_seen_zerolen++; + if (RExC_flags & RXf_PMf_MULTILINE) + ret = reg_node(pRExC_state, MEOL); + else + ret = reg_node(pRExC_state, SEOL); Set_Node_Length(REGNODE_p(ret), 1); /* MJD */ - break; + break; case '.': - nextchar(pRExC_state); - if (RExC_flags & RXf_PMf_SINGLELINE) - ret = reg_node(pRExC_state, SANY); - else - ret = reg_node(pRExC_state, REG_ANY); - *flagp |= HASWIDTH|SIMPLE; - MARK_NAUGHTY(1); + nextchar(pRExC_state); + if (RExC_flags & RXf_PMf_SINGLELINE) + ret = reg_node(pRExC_state, SANY); + else + ret = reg_node(pRExC_state, REG_ANY); + *flagp |= HASWIDTH|SIMPLE; + MARK_NAUGHTY(1); Set_Node_Length(REGNODE_p(ret), 1); /* MJD */ - break; + break; case '[': { - char * const oregcomp_parse = ++RExC_parse; + char * const oregcomp_parse = ++RExC_parse; ret = regclass(pRExC_state, flagp, depth+1, FALSE, /* means parse the whole char class */ TRUE, /* allow multi-char folds */ @@ -13529,65 +13530,65 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf, (UV) *flagp); } - if (*RExC_parse != ']') { - RExC_parse = oregcomp_parse; - vFAIL("Unmatched ["); - } - nextchar(pRExC_state); + if (*RExC_parse != ']') { + RExC_parse = oregcomp_parse; + vFAIL("Unmatched ["); + } + nextchar(pRExC_state); Set_Node_Length(REGNODE_p(ret), RExC_parse - oregcomp_parse + 1); /* MJD */ - break; + break; } case '(': - nextchar(pRExC_state); + nextchar(pRExC_state); ret = reg(pRExC_state, 2, &flags, depth+1); - if (ret == 0) { - if (flags & TRYAGAIN) { - if (RExC_parse >= RExC_end) { - /* Make parent create an empty node if needed. */ - *flagp |= TRYAGAIN; - return(0); - } - goto tryagain; - } + if (ret == 0) { + if (flags & TRYAGAIN) { + if (RExC_parse >= RExC_end) { + /* Make parent create an empty node if needed. */ + *flagp |= TRYAGAIN; + return(0); + } + goto tryagain; + } RETURN_FAIL_ON_RESTART(flags, flagp); FAIL2("panic: reg returned failure to regatom, flags=%#" UVxf, (UV) flags); - } - *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED); - break; + } + *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED); + break; case '|': case ')': - if (flags & TRYAGAIN) { - *flagp |= TRYAGAIN; - return 0; - } - vFAIL("Internal urp"); - /* Supposed to be caught earlier. */ - break; + if (flags & TRYAGAIN) { + *flagp |= TRYAGAIN; + return 0; + } + vFAIL("Internal urp"); + /* Supposed to be caught earlier. */ + break; case '?': case '+': case '*': - RExC_parse++; - vFAIL("Quantifier follows nothing"); - break; + RExC_parse++; + vFAIL("Quantifier follows nothing"); + break; case '\\': - /* Special Escapes - - This switch handles escape sequences that resolve to some kind - of special regop and not to literal text. Escape sequences that - resolve to literal text are handled below in the switch marked - "Literal Escapes". - - Every entry in this switch *must* have a corresponding entry - in the literal escape switch. However, the opposite is not - required, as the default for this switch is to jump to the - literal text handling code. - */ - RExC_parse++; - switch ((U8)*RExC_parse) { - /* Special Escapes */ - case 'A': - RExC_seen_zerolen++; + /* Special Escapes + + This switch handles escape sequences that resolve to some kind + of special regop and not to literal text. Escape sequences that + resolve to literal text are handled below in the switch marked + "Literal Escapes". + + Every entry in this switch *must* have a corresponding entry + in the literal escape switch. However, the opposite is not + required, as the default for this switch is to jump to the + literal text handling code. + */ + RExC_parse++; + switch ((U8)*RExC_parse) { + /* Special Escapes */ + case 'A': + RExC_seen_zerolen++; /* Under wildcards, this is changed to match \n; should be * invisible to the user, as they have to compile under /m */ if (RExC_pm_flags & PMf_WILDCARD) { @@ -13599,8 +13600,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) * /\A/ from /^/ in split. */ FLAGS(REGNODE_p(ret)) = 1; } - goto finish_meta_pat; - case 'G': + goto finish_meta_pat; + case 'G': if (RExC_pm_flags & PMf_WILDCARD) { RExC_parse++; /* diag_listed_as: Use of %s is not allowed in Unicode property @@ -13609,10 +13610,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) vFAIL("Use of '\\G' is not allowed in Unicode property" " wildcard subpatterns"); } - ret = reg_node(pRExC_state, GPOS); + ret = reg_node(pRExC_state, GPOS); RExC_seen |= REG_GPOS_SEEN; - goto finish_meta_pat; - case 'K': + goto finish_meta_pat; + case 'K': if (!RExC_in_lookaround) { RExC_seen_zerolen++; ret = reg_node(pRExC_state, KEEPS); @@ -13627,7 +13628,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) ++RExC_parse; /* advance past the 'K' */ vFAIL("\\K not permitted in lookahead/lookbehind"); } - case 'Z': + case 'Z': if (RExC_pm_flags & PMf_WILDCARD) { /* See comment under \A above */ ret = reg_node(pRExC_state, MEOL); @@ -13635,9 +13636,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) else { ret = reg_node(pRExC_state, SEOL); } - RExC_seen_zerolen++; /* Do not optimize RE away */ - goto finish_meta_pat; - case 'z': + RExC_seen_zerolen++; /* Do not optimize RE away */ + goto finish_meta_pat; + case 'z': if (RExC_pm_flags & PMf_WILDCARD) { /* See comment under \A above */ ret = reg_node(pRExC_state, MEOL); @@ -13645,28 +13646,28 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) else { ret = reg_node(pRExC_state, EOS); } - RExC_seen_zerolen++; /* Do not optimize RE away */ - goto finish_meta_pat; - case 'C': - vFAIL("\\C no longer supported"); - case 'X': - ret = reg_node(pRExC_state, CLUMP); - *flagp |= HASWIDTH; - goto finish_meta_pat; + RExC_seen_zerolen++; /* Do not optimize RE away */ + goto finish_meta_pat; + case 'C': + vFAIL("\\C no longer supported"); + case 'X': + ret = reg_node(pRExC_state, CLUMP); + *flagp |= HASWIDTH; + goto finish_meta_pat; - case 'B': + case 'B': invert = 1; /* FALLTHROUGH */ - case 'b': + case 'b': { U8 flags = 0; - regex_charset charset = get_regex_charset(RExC_flags); + regex_charset charset = get_regex_charset(RExC_flags); - RExC_seen_zerolen++; + RExC_seen_zerolen++; RExC_seen |= REG_LOOKBEHIND_SEEN; - op = BOUND + charset; + op = BOUND + charset; - if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') { + if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') { flags = TRADITIONAL_BOUND; if (op > BOUNDA) { /* /aa is same as /a */ op = BOUNDA; @@ -13726,9 +13727,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) default: bad_bound_type: RExC_parse = endbrace; - vFAIL2utf8f( + vFAIL2utf8f( "'%" UTF8f "' is an unknown bound type", - UTF8fARG(UTF, length, endbrace - length)); + UTF8fARG(UTF, length, endbrace - length)); NOT_REACHED; /*NOTREACHED*/ } RExC_parse = endbrace; @@ -13751,7 +13752,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) ? ASCII_RESTRICT_PAT_MODS : ASCII_MORE_RESTRICT_PAT_MODS); } - } + } if (op == BOUND) { RExC_seen_d_op = TRUE; @@ -13764,29 +13765,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) op += NBOUND - BOUND; } - ret = reg_node(pRExC_state, op); + ret = reg_node(pRExC_state, op); FLAGS(REGNODE_p(ret)) = flags; - goto finish_meta_pat; + goto finish_meta_pat; } - case 'R': - ret = reg_node(pRExC_state, LNBREAK); - *flagp |= HASWIDTH|SIMPLE; - goto finish_meta_pat; - - case 'd': - case 'D': - case 'h': - case 'H': - case 'p': - case 'P': - case 's': - case 'S': - case 'v': - case 'V': - case 'w': - case 'W': + case 'R': + ret = reg_node(pRExC_state, LNBREAK); + *flagp |= HASWIDTH|SIMPLE; + goto finish_meta_pat; + + case 'd': + case 'D': + case 'h': + case 'H': + case 'p': + case 'P': + case 's': + case 'S': + case 'v': + case 'V': + case 'w': + case 'W': /* These all have the same meaning inside [brackets], and it knows * how to do the best optimizations for them. So, pretend we found * these within brackets, and let it do the work */ @@ -13824,7 +13825,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) Set_Node_Offset(REGNODE_p(ret), parse_start); Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1); /* MJD */ nextchar(pRExC_state); - break; + break; case 'N': /* Handle \N, \N{} and \N{NAMED SEQUENCE} (the latter meaning the * \N{...} evaluates to a sequence of more than one code points). @@ -13857,7 +13858,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) RExC_parse = parse_start; goto defchar; - case 'k': /* Handle \k<NAME> and \k'NAME' */ + case 'k': /* Handle \k<NAME> and \k'NAME' */ parse_named_seq: { char ch; @@ -13866,11 +13867,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) && ch != '\'' && ch != '{')) { - RExC_parse++; - /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */ - vFAIL2("Sequence %.2s... not terminated", parse_start); - } else { - RExC_parse += 2; + RExC_parse++; + /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */ + vFAIL2("Sequence %.2s... not terminated", parse_start); + } else { + RExC_parse += 2; ret = handle_named_backref(pRExC_state, flagp, parse_start, @@ -13881,30 +13882,30 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) : '\''); } break; - } - case 'g': - case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - { - I32 num; - bool hasbrace = 0; - - if (*RExC_parse == 'g') { + } + case 'g': + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + I32 num; + bool hasbrace = 0; + + if (*RExC_parse == 'g') { bool isrel = 0; - RExC_parse++; - if (*RExC_parse == '{') { - RExC_parse++; - hasbrace = 1; - } - if (*RExC_parse == '-') { - RExC_parse++; - isrel = 1; - } - if (hasbrace && !isDIGIT(*RExC_parse)) { - if (isrel) RExC_parse--; + RExC_parse++; + if (*RExC_parse == '{') { + RExC_parse++; + hasbrace = 1; + } + if (*RExC_parse == '-') { + RExC_parse++; + isrel = 1; + } + if (hasbrace && !isDIGIT(*RExC_parse)) { + if (isrel) RExC_parse--; RExC_parse -= 2; - goto parse_named_seq; + goto parse_named_seq; } if (RExC_parse >= RExC_end) { @@ -13915,7 +13916,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) vFAIL("Reference to invalid group 0"); else if (num == I32_MAX) { if (isDIGIT(*RExC_parse)) - vFAIL("Reference to nonexistent group"); + vFAIL("Reference to nonexistent group"); else unterminated_g: vFAIL("Unterminated \\g... pattern"); @@ -14001,48 +14002,48 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) Set_Node_Cur_Length(REGNODE_p(ret), parse_start-1); skip_to_be_ignored_text(pRExC_state, &RExC_parse, FALSE /* Don't force to /x */ ); - } - break; - case '\0': - if (RExC_parse >= RExC_end) - FAIL("Trailing \\"); - /* FALLTHROUGH */ - default: - /* Do not generate "unrecognized" warnings here, we fall - back into the quick-grab loop below */ + } + break; + case '\0': + if (RExC_parse >= RExC_end) + FAIL("Trailing \\"); + /* FALLTHROUGH */ + default: + /* Do not generate "unrecognized" warnings here, we fall + back into the quick-grab loop below */ RExC_parse = parse_start; - goto defchar; - } /* end of switch on a \foo sequence */ - break; + goto defchar; + } /* end of switch on a \foo sequence */ + break; case '#': /* '#' comments should have been spaced over before this function was * called */ assert((RExC_flags & RXf_PMf_EXTENDED) == 0); - /* + /* if (RExC_flags & RXf_PMf_EXTENDED) { - RExC_parse = reg_skipcomment( pRExC_state, RExC_parse ); - if (RExC_parse < RExC_end) - goto tryagain; - } + RExC_parse = reg_skipcomment( pRExC_state, RExC_parse ); + if (RExC_parse < RExC_end) + goto tryagain; + } */ - /* FALLTHROUGH */ + /* FALLTHROUGH */ default: - defchar: { + defchar: { /* Here, we have determined that the next thing is probably a * literal character. RExC_parse points to the first byte of its * definition. (It still may be an escape sequence that evaluates * to a single character) */ - STRLEN len = 0; - UV ender = 0; - char *p; - char *s, *old_s = NULL, *old_old_s = NULL; - char *s0; + STRLEN len = 0; + UV ender = 0; + char *p; + char *s, *old_s = NULL, *old_old_s = NULL; + char *s0; U32 max_string_len = 255; /* We may have to reparse the node, artificially stopping filling @@ -14116,11 +14117,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) FILL_NODE(ret, node_type); RExC_emit++; - s = STRING(REGNODE_p(ret)); + s = STRING(REGNODE_p(ret)); s0 = s; - reparse: + reparse: p = RExC_parse; len = 0; @@ -14162,7 +14163,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) * The exceptions override this */ Size_t added_len = 1; - oldp = p; + oldp = p; old_old_s = old_s; old_s = s; @@ -14170,62 +14171,62 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) assert( (RExC_flags & RXf_PMf_EXTENDED) == 0 || ! is_PATWS_safe((p), RExC_end, UTF)); - switch ((U8)*p) { + switch ((U8)*p) { const char* message; U32 packed_warn; U8 grok_c_char; - case '^': - case '$': - case '.': - case '[': - case '(': - case ')': - case '|': - goto loopdone; - case '\\': - /* Literal Escapes Switch - - This switch is meant to handle escape sequences that - resolve to a literal character. - - Every escape sequence that represents something - else, like an assertion or a char class, is handled - in the switch marked 'Special Escapes' above in this - routine, but also has an entry here as anything that - isn't explicitly mentioned here will be treated as - an unescaped equivalent literal. - */ - - switch ((U8)*++p) { - - /* These are all the special escapes. */ - case 'A': /* Start assertion */ - case 'b': case 'B': /* Word-boundary assertion*/ - case 'C': /* Single char !DANGEROUS! */ - case 'd': case 'D': /* digit class */ - case 'g': case 'G': /* generic-backref, pos assertion */ - case 'h': case 'H': /* HORIZWS */ - case 'k': case 'K': /* named backref, keep marker */ - case 'p': case 'P': /* Unicode property */ - case 'R': /* LNBREAK */ - case 's': case 'S': /* space class */ - case 'v': case 'V': /* VERTWS */ - case 'w': case 'W': /* word class */ + case '^': + case '$': + case '.': + case '[': + case '(': + case ')': + case '|': + goto loopdone; + case '\\': + /* Literal Escapes Switch + + This switch is meant to handle escape sequences that + resolve to a literal character. + + Every escape sequence that represents something + else, like an assertion or a char class, is handled + in the switch marked 'Special Escapes' above in this + routine, but also has an entry here as anything that + isn't explicitly mentioned here will be treated as + an unescaped equivalent literal. + */ + + switch ((U8)*++p) { + + /* These are all the special escapes. */ + case 'A': /* Start assertion */ + case 'b': case 'B': /* Word-boundary assertion*/ + case 'C': /* Single char !DANGEROUS! */ + case 'd': case 'D': /* digit class */ + case 'g': case 'G': /* generic-backref, pos assertion */ + case 'h': case 'H': /* HORIZWS */ + case 'k': case 'K': /* named backref, keep marker */ + case 'p': case 'P': /* Unicode property */ + case 'R': /* LNBREAK */ + case 's': case 'S': /* space class */ + case 'v': case 'V': /* VERTWS */ + case 'w': case 'W': /* word class */ case 'X': /* eXtended Unicode "combining character sequence" */ - case 'z': case 'Z': /* End of line/string assertion */ - --p; - goto loopdone; - - /* Anything after here is an escape that resolves to a - literal. (Except digits, which may or may not) - */ - case 'n': - ender = '\n'; - p++; - break; - case 'N': /* Handle a single-code point named character. */ + case 'z': case 'Z': /* End of line/string assertion */ + --p; + goto loopdone; + + /* Anything after here is an escape that resolves to a + literal. (Except digits, which may or may not) + */ + case 'n': + ender = '\n'; + p++; + break; + case 'N': /* Handle a single-code point named character. */ RExC_parse = p + 1; if (! grok_bslash_N(pRExC_state, NULL, /* Fail if evaluates to @@ -14269,27 +14270,27 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) } break; - case 'r': - ender = '\r'; - p++; - break; - case 't': - ender = '\t'; - p++; - break; - case 'f': - ender = '\f'; - p++; - break; - case 'e': - ender = ESC_NATIVE; - p++; - break; - case 'a': - ender = '\a'; - p++; - break; - case 'o': + case 'r': + ender = '\r'; + p++; + break; + case 't': + ender = '\t'; + p++; + break; + case 'f': + ender = '\f'; + p++; + break; + case 'e': + ender = ESC_NATIVE; + p++; + break; + case 'a': + ender = '\a'; + p++; + break; + case 'o': if (! grok_bslash_o(&p, RExC_end, &ender, @@ -14308,7 +14309,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) warn_non_literal_string(p, packed_warn, message); } break; - case 'x': + case 'x': if (! grok_bslash_x(&p, RExC_end, &ender, @@ -14335,7 +14336,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) } #endif break; - case 'c': + case 'c': p++; if (! grok_bslash_c(*p, &grok_c_char, &message, &packed_warn)) @@ -14354,7 +14355,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) warn_non_literal_string(p, packed_warn, message); } - break; + break; case '8': case '9': /* must be a backreference */ --p; /* we have an escape like \8 which cannot be an octal escape @@ -14362,7 +14363,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) * escape which may or may not be a legitimate backref. */ goto loopdone; case '1': case '2': case '3':case '4': - case '5': case '6': case '7': + case '5': case '6': case '7': /* When we parse backslash escapes there is ambiguity * between backreferences and octal escapes. Any escape * from \1 - \9 is a backreference, any multi-digit @@ -14387,29 +14388,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) } /* FALLTHROUGH */ case '0': - { - I32 flags = PERL_SCAN_SILENT_ILLDIGIT + { + I32 flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_NOTIFY_ILLDIGIT; - STRLEN numlen = 3; - ender = grok_oct(p, &numlen, &flags, NULL); - p += numlen; + STRLEN numlen = 3; + ender = grok_oct(p, &numlen, &flags, NULL); + p += numlen; if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT) && isDIGIT(*p) /* like \08, \178 */ && ckWARN(WARN_REGEXP)) { - reg_warn_non_literal_string( + reg_warn_non_literal_string( p + 1, form_alien_digit_msg(8, numlen, p, RExC_end, UTF, FALSE)); } - } - break; - case '\0': - if (p >= RExC_end) - FAIL("Trailing \\"); - /* FALLTHROUGH */ - default: - if (isALPHANUMERIC(*p)) { + } + break; + case '\0': + if (p >= RExC_end) + FAIL("Trailing \\"); + /* FALLTHROUGH */ + default: + if (isALPHANUMERIC(*p)) { /* An alpha followed by '{' is going to fail next * iteration, so don't output this warning in that * case */ @@ -14417,11 +14418,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) ckWARN2reg(p + 1, "Unrecognized escape \\%.1s" " passed through", p); } - } - goto normal_default; - } /* End of switch on '\' */ - break; - case '{': + } + goto normal_default; + } /* End of switch on '\' */ + break; + case '{': /* Trying to gain new uses for '{' without breaking too * much existing code is hard. The solution currently * adopted is: @@ -14437,7 +14438,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) * misspelled the quantifier. Without this warning, * the quantifier would silently be taken as a literal * string of characters instead of a meta construct */ - if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) { + if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) { if ( RExC_strict || ( p > parse_start + 1 && isALPHA_A(*(p - 1)) @@ -14450,28 +14451,28 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) } ckWARNreg(p + 1, "Unescaped left brace in regex is" " passed through"); - } - goto normal_default; + } + goto normal_default; case '}': case ']': if (p > RExC_parse && RExC_strict) { ckWARN2reg(p + 1, "Unescaped literal '%c'", *p); } - /*FALLTHROUGH*/ - default: /* A literal character */ - normal_default: - if (! UTF8_IS_INVARIANT(*p) && UTF) { - STRLEN numlen; - ender = utf8n_to_uvchr((U8*)p, RExC_end - p, - &numlen, UTF8_ALLOW_DEFAULT); - p += numlen; - } - else - ender = (U8) *p++; - break; - } /* End of switch on the literal */ - - /* Here, have looked at the literal character, and <ender> + /*FALLTHROUGH*/ + default: /* A literal character */ + normal_default: + if (! UTF8_IS_INVARIANT(*p) && UTF) { + STRLEN numlen; + ender = utf8n_to_uvchr((U8*)p, RExC_end - p, + &numlen, UTF8_ALLOW_DEFAULT); + p += numlen; + } + else + ender = (U8) *p++; + break; + } /* End of switch on the literal */ + + /* Here, have looked at the literal character, and <ender> * contains its ordinal; <p> points to the character after it. * */ @@ -14503,7 +14504,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) * only thing in its new node */ next_is_quantifier = LIKELY(p < RExC_end) - && UNLIKELY(ISMULT2(p)); + && UNLIKELY(isQUANTIFIER(p, RExC_end)); if (next_is_quantifier && LIKELY(len)) { p = oldp; @@ -14733,20 +14734,20 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) * requires UTF-8 to represent. */ : (char) toLOWER_L1(ender); } - } /* End of adding current character to the node */ + } /* End of adding current character to the node */ done_with_this_char: len += added_len; - if (next_is_quantifier) { + if (next_is_quantifier) { /* Here, the next input is a quantifier, and to get here, * the current character is the only one in the node. */ goto loopdone; - } + } - } /* End of loop through literal characters */ + } /* End of loop through literal characters */ /* Here we have either exhausted the input or run out of room in * the node. If the former, we are done. (If we encountered a @@ -15236,7 +15237,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) Safefree(locfold_buf); Safefree(loc_correspondence); } - } /* End of verifying node ends with an appropriate char */ + } /* End of verifying node ends with an appropriate char */ /* We need to start the next node at the character that didn't fit * in this one */ @@ -15360,15 +15361,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) Set_Node_Length(REGNODE_p(ret), p - parse_start - 1); RExC_parse = p; - { - /* len is STRLEN which is unsigned, need to copy to signed */ - IV iv = len; - if (iv < 0) - vFAIL("Internal disaster"); - } + { + /* len is STRLEN which is unsigned, need to copy to signed */ + IV iv = len; + if (iv < 0) + vFAIL("Internal disaster"); + } - } /* End of label 'defchar:' */ - break; + } /* End of label 'defchar:' */ + break; } /* End of giant switch on input character */ /* Position parse to next real character */ @@ -15408,53 +15409,53 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr) ANYOF_BITMAP_ZERO(node); if (*invlist_ptr) { - /* This gets set if we actually need to modify things */ - bool change_invlist = FALSE; + /* This gets set if we actually need to modify things */ + bool change_invlist = FALSE; - UV start, end; + UV start, end; - /* Start looking through *invlist_ptr */ - invlist_iterinit(*invlist_ptr); - while (invlist_iternext(*invlist_ptr, &start, &end)) { - UV high; - int i; + /* Start looking through *invlist_ptr */ + invlist_iterinit(*invlist_ptr); + while (invlist_iternext(*invlist_ptr, &start, &end)) { + UV high; + int i; if (end == UV_MAX && start <= NUM_ANYOF_CODE_POINTS) { ANYOF_FLAGS(node) |= ANYOF_MATCHES_ALL_ABOVE_BITMAP; } - /* Quit if are above what we should change */ - if (start >= NUM_ANYOF_CODE_POINTS) { - break; - } + /* Quit if are above what we should change */ + if (start >= NUM_ANYOF_CODE_POINTS) { + break; + } - change_invlist = TRUE; + change_invlist = TRUE; - /* Set all the bits in the range, up to the max that we are doing */ - high = (end < NUM_ANYOF_CODE_POINTS - 1) + /* Set all the bits in the range, up to the max that we are doing */ + high = (end < NUM_ANYOF_CODE_POINTS - 1) ? end : NUM_ANYOF_CODE_POINTS - 1; - for (i = start; i <= (int) high; i++) { + for (i = start; i <= (int) high; i++) { ANYOF_BITMAP_SET(node, i); - } - } - invlist_iterfinish(*invlist_ptr); + } + } + invlist_iterfinish(*invlist_ptr); /* Done with loop; remove any code points that are in the bitmap from * *invlist_ptr; similarly for code points above the bitmap if we have * a flag to match all of them anyways */ - if (change_invlist) { - _invlist_subtract(*invlist_ptr, PL_InBitmap, invlist_ptr); - } + if (change_invlist) { + _invlist_subtract(*invlist_ptr, PL_InBitmap, invlist_ptr); + } if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) { - _invlist_intersection(*invlist_ptr, PL_InBitmap, invlist_ptr); - } + _invlist_intersection(*invlist_ptr, PL_InBitmap, invlist_ptr); + } - /* If have completely emptied it, remove it completely */ - if (_invlist_len(*invlist_ptr) == 0) { - SvREFCNT_dec_NN(*invlist_ptr); - *invlist_ptr = NULL; - } + /* If have completely emptied it, remove it completely */ + if (_invlist_len(*invlist_ptr) == 0) { + SvREFCNT_dec_NN(*invlist_ptr); + *invlist_ptr = NULL; + } } } @@ -16495,7 +16496,7 @@ redo_curchar: RExC_parse++; RExC_sets_depth++; - node = reg(pRExC_state, 2, flagp, depth+1); + node = reg(pRExC_state, 2, flagp, depth+1); RETURN_FAIL_ON_RESTART(*flagp, flagp); if ( OP(REGNODE_p(node)) != REGEX_SET @@ -17316,7 +17317,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, SV *listsv = NULL; /* List of \p{user-defined} whose definitions aren't available at the time this was called */ STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more - than just initialized. */ + than just initialized. */ SV* properties = NULL; /* Code points that match \p{} \P{} */ SV* posixes = NULL; /* Code points that match classes like [:word:], extended beyond the Latin1 range. These have to @@ -17333,7 +17334,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, leading to less compilation and execution work */ UV element_count = 0; /* Number of distinct elements in the class. - Optimizations may be possible if this is tiny */ + Optimizations may be possible if this is tiny */ AV * multi_char_matches = NULL; /* Code points that fold to more than one character; used under /i */ UV n; @@ -17436,7 +17437,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, assert(RExC_parse <= RExC_end); if (UCHARAT(RExC_parse) == '^') { /* Complement the class */ - RExC_parse++; + RExC_parse++; invert = TRUE; allow_mutiple_chars = FALSE; MARK_NAUGHTY(1); @@ -17471,7 +17472,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, /* allow 1st char to be ']' (allowing it to be '-' is dealt with later) */ if (UCHARAT(RExC_parse) == ']') - goto charclassloop; + goto charclassloop; while (1) { @@ -17499,23 +17500,23 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, charclassloop: - namedclass = OOB_NAMEDCLASS; /* initialize as illegal */ + namedclass = OOB_NAMEDCLASS; /* initialize as illegal */ save_value = value; save_prevvalue = prevvalue; - if (!range) { - rangebegin = RExC_parse; - element_count++; + if (!range) { + rangebegin = RExC_parse; + element_count++; non_portable_endpoint = 0; - } - if (UTF && ! UTF8_IS_INVARIANT(* RExC_parse)) { - value = utf8n_to_uvchr((U8*)RExC_parse, - RExC_end - RExC_parse, - &numlen, UTF8_ALLOW_DEFAULT); - RExC_parse += numlen; - } - else - value = UCHARAT(RExC_parse++); + } + if (UTF && ! UTF8_IS_INVARIANT(* RExC_parse)) { + value = utf8n_to_uvchr((U8*)RExC_parse, + RExC_end - RExC_parse, + &numlen, UTF8_ALLOW_DEFAULT); + RExC_parse += numlen; + } + else + value = UCHARAT(RExC_parse++); if (value == '[') { char * posix_class_end; @@ -17570,20 +17571,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, vFAIL("Unmatched ["); } - if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) { - value = utf8n_to_uvchr((U8*)RExC_parse, - RExC_end - RExC_parse, - &numlen, UTF8_ALLOW_DEFAULT); - RExC_parse += numlen; - } - else - value = UCHARAT(RExC_parse++); + if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) { + value = utf8n_to_uvchr((U8*)RExC_parse, + RExC_end - RExC_parse, + &numlen, UTF8_ALLOW_DEFAULT); + RExC_parse += numlen; + } + else + value = UCHARAT(RExC_parse++); - /* Some compilers cannot handle switching on 64-bit integer - * values, therefore value cannot be an UV. Yes, this will - * be a problem later if we want switch on Unicode. - * A similar issue a little bit later when switching on - * namedclass. --jhi */ + /* Some compilers cannot handle switching on 64-bit integer + * values, therefore value cannot be an UV. Yes, this will + * be a problem later if we want switch on Unicode. + * A similar issue a little bit later when switching on + * namedclass. --jhi */ /* If the \ is escaping white space when white space is being * skipped, it means that that white space is wanted literally, and @@ -17594,16 +17595,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, U32 packed_warn; U8 grok_c_char; - case 'w': namedclass = ANYOF_WORDCHAR; break; - case 'W': namedclass = ANYOF_NWORDCHAR; break; - case 's': namedclass = ANYOF_SPACE; break; - case 'S': namedclass = ANYOF_NSPACE; break; - case 'd': namedclass = ANYOF_DIGIT; break; - case 'D': namedclass = ANYOF_NDIGIT; break; - case 'v': namedclass = ANYOF_VERTWS; break; - case 'V': namedclass = ANYOF_NVERTWS; break; - case 'h': namedclass = ANYOF_HORIZWS; break; - case 'H': namedclass = ANYOF_NHORIZWS; break; + case 'w': namedclass = ANYOF_WORDCHAR; break; + case 'W': namedclass = ANYOF_NWORDCHAR; break; + case 's': namedclass = ANYOF_SPACE; break; + case 'S': namedclass = ANYOF_NSPACE; break; + case 'd': namedclass = ANYOF_DIGIT; break; + case 'D': namedclass = ANYOF_NDIGIT; break; + case 'v': namedclass = ANYOF_VERTWS; break; + case 'V': namedclass = ANYOF_NVERTWS; break; + case 'h': namedclass = ANYOF_HORIZWS; break; + case 'H': namedclass = ANYOF_NHORIZWS; break; case 'N': /* Handle \N{NAME} in class */ { const char * const backslash_N_beg = RExC_parse - 2; @@ -17670,10 +17671,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, unicode_range = TRUE; /* \N{} are Unicode */ } break; - case 'p': - case 'P': - { - char *e; + case 'p': + case 'P': + { + char *e; if (RExC_pm_flags & PMf_WILDCARD) { RExC_parse++; @@ -17684,14 +17685,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, " wildcard subpatterns", (char) value, *(RExC_parse - 1)); } - /* \p means they want Unicode semantics */ - REQUIRE_UNI_RULES(flagp, 0); + /* \p means they want Unicode semantics */ + REQUIRE_UNI_RULES(flagp, 0); - if (RExC_parse >= RExC_end) - vFAIL2("Empty \\%c", (U8)value); - if (*RExC_parse == '{') { - const U8 c = (U8)value; - e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse); + if (RExC_parse >= RExC_end) + vFAIL2("Empty \\%c", (U8)value); + if (*RExC_parse == '{') { + const U8 c = (U8)value; + e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse); if (!e) { RExC_parse++; vFAIL2("Missing right brace on \\%c{}", c); @@ -17703,9 +17704,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * any '^', even when not under /x */ while (isSPACE(*RExC_parse)) { RExC_parse++; - } + } - if (UCHARAT(RExC_parse) == '^') { + if (UCHARAT(RExC_parse) == '^') { /* toggle. (The rhs xor gets the single bit that * differs between P and p; the other xor inverts just @@ -17721,12 +17722,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, if (e == RExC_parse) vFAIL2("Empty \\%c{}", c); - n = e - RExC_parse; - while (isSPACE(*(RExC_parse + n - 1))) - n--; + n = e - RExC_parse; + while (isSPACE(*(RExC_parse + n - 1))) + n--; - } /* The \p isn't immediately followed by a '{' */ - else if (! isALPHA(*RExC_parse)) { + } /* The \p isn't immediately followed by a '{' */ + else if (! isALPHA(*RExC_parse)) { RExC_parse += (UTF) ? UTF8_SAFE_SKIP(RExC_parse, RExC_end) : 1; @@ -17735,10 +17736,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, (U8) value); } else { - e = RExC_parse; - n = 1; - } - { + e = RExC_parse; + n = 1; + } + { char* name = RExC_parse; /* Any message returned about expanding the definition */ @@ -17771,7 +17772,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, mojibake */ RExC_utf8 = TRUE; } - /* diag_listed_as: Can't find Unicode property definition "%s" in regex; marked by <-- HERE in m/%s/ */ + /* diag_listed_as: Can't find Unicode property definition "%s" in regex; marked by <-- HERE in m/%s/ */ vFAIL2utf8f("%" UTF8f, UTF8fARG(SvUTF8(msg), SvCUR(msg), SvPVX(msg))); } @@ -17889,30 +17890,30 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, /* Invert if asking for the complement */ if (value == 'P') { - _invlist_union_complement_2nd(properties, + _invlist_union_complement_2nd(properties, prop_definition, &properties); } else { _invlist_union(properties, prop_definition, &properties); - } + } } } - RExC_parse = e + 1; + RExC_parse = e + 1; namedclass = ANYOF_UNIPROP; /* no official name, but it's named */ - } - break; - case 'n': value = '\n'; break; - case 'r': value = '\r'; break; - case 't': value = '\t'; break; - case 'f': value = '\f'; break; - case 'b': value = '\b'; break; - case 'e': value = ESC_NATIVE; break; - case 'a': value = '\a'; break; - case 'o': - RExC_parse--; /* function expects to be pointed at the 'o' */ + } + break; + case 'n': value = '\n'; break; + case 'r': value = '\r'; break; + case 't': value = '\t'; break; + case 'f': value = '\f'; break; + case 'b': value = '\b'; break; + case 'e': value = ESC_NATIVE; break; + case 'a': value = '\a'; break; + case 'o': + RExC_parse--; /* function expects to be pointed at the 'o' */ if (! grok_bslash_o(&RExC_parse, RExC_end, &value, @@ -17932,9 +17933,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, if (value < 256) { non_portable_endpoint++; } - break; - case 'x': - RExC_parse--; /* function expects to be pointed at the 'x' */ + break; + case 'x': + RExC_parse--; /* function expects to be pointed at the 'x' */ if (! grok_bslash_x(&RExC_parse, RExC_end, &value, @@ -17954,8 +17955,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, if (value < 256) { non_portable_endpoint++; } - break; - case 'c': + break; + case 'c': if (! grok_bslash_c(*RExC_parse, &grok_c_char, &message, &packed_warn)) { @@ -17974,16 +17975,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, } non_portable_endpoint++; - break; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': - { - /* Take 1-3 octal digits */ - I32 flags = PERL_SCAN_SILENT_ILLDIGIT + break; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': + { + /* Take 1-3 octal digits */ + I32 flags = PERL_SCAN_SILENT_ILLDIGIT | PERL_SCAN_NOTIFY_ILLDIGIT; numlen = (strict) ? 4 : 3; value = grok_oct(--RExC_parse, &numlen, &flags, NULL); - RExC_parse += numlen; + RExC_parse += numlen; if (numlen != 3) { if (strict) { RExC_parse += (UTF) @@ -18005,11 +18006,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, if (value < 256) { non_portable_endpoint++; } - break; - } - default: - /* Allow \_ to not give an error */ - if (isWORDCHAR(value) && value != '_') { + break; + } + default: + /* Allow \_ to not give an error */ + if (isWORDCHAR(value) && value != '_') { if (strict) { vFAIL2("Unrecognized escape \\%c in character class", (int)value); @@ -18019,20 +18020,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, "Unrecognized escape \\%c in character class passed through", (int)value); } - } - break; - } /* End of switch on char following backslash */ - } /* end of handling backslash escape sequences */ + } + break; + } /* End of switch on char following backslash */ + } /* end of handling backslash escape sequences */ /* Here, we have the current token in 'value' */ - if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */ + if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */ U8 classnum; - /* a bad range like a-\d, a-[:digit:]. The '-' is taken as a - * literal, as is the character that began the false range, i.e. - * the 'a' in the examples */ - if (range) { + /* a bad range like a-\d, a-[:digit:]. The '-' is taken as a + * literal, as is the character that began the false range, i.e. + * the 'a' in the examples */ + if (range) { const int w = (RExC_parse >= rangebegin) ? RExC_parse - rangebegin : 0; @@ -18050,13 +18051,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, prevvalue); } - range = 0; /* this was not a true range */ + range = 0; /* this was not a true range */ element_count += 2; /* So counts for three values */ - } + } classnum = namedclass_to_classnum(namedclass); - if (LOC && namedclass < ANYOF_POSIXL_MAX + if (LOC && namedclass < ANYOF_POSIXL_MAX #ifndef HAS_ISASCII && classnum != _CC_ASCII #endif @@ -18178,8 +18179,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, namedclass % 2 != 0, posixes_ptr); } - } - } /* end of namedclass \blah */ + } + } /* end of namedclass \blah */ SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end); @@ -18192,20 +18193,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * the next real character to be processed is the range indicator--the * minus sign */ - if (range) { + if (range) { #ifdef EBCDIC /* For unicode ranges, we have to test that the Unicode as opposed * to the native values are not decreasing. (Above 255, there is * no difference between native and Unicode) */ - if (unicode_range && prevvalue < 255 && value < 255) { + if (unicode_range && prevvalue < 255 && value < 255) { if (NATIVE_TO_LATIN1(prevvalue) > NATIVE_TO_LATIN1(value)) { goto backwards_range; } } else #endif - if (prevvalue > value) /* b-a */ { - int w; + if (prevvalue > value) /* b-a */ { + int w; #ifdef EBCDIC backwards_range: #endif @@ -18214,9 +18215,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, "Invalid [] range \"%" UTF8f "\"", UTF8fARG(UTF, w, rangebegin)); NOT_REACHED; /* NOTREACHED */ - } - } - else { + } + } + else { prevvalue = value; /* save the beginning of the potential range */ if (! stop_at_1 /* Can't be a range if parsing just one thing */ && *RExC_parse == '-') @@ -18253,8 +18254,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, range = 1; /* yeah, it's a range! */ continue; /* but do it the next time */ } - } - } + } + } if (namedclass > OOB_NAMEDCLASS) { continue; @@ -18264,8 +18265,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * <prevvalue> is the beginning of the range, if any; or <value> if * not. */ - /* non-Latin1 code point implies unicode semantics. */ - if (value > 255) { + /* non-Latin1 code point implies unicode semantics. */ + if (value > 255) { if (value > MAX_LEGAL_CP && ( value != UV_MAX || prevvalue > MAX_LEGAL_CP)) { @@ -18281,7 +18282,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, PL_extended_cp_format, value); } - } + } /* Ready to process either the single value, or the completed range. * For single-valued non-inverted ranges, we consider the possibility @@ -18518,7 +18519,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, } #endif - range = 0; /* this range (if it was one) is done now */ + range = 0; /* this range (if it was one) is done now */ } /* End of loop through all the text within the brackets */ if ( posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) { @@ -18529,12 +18530,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * deal with them by building up a substitute parse string, and recursively * calling reg() on it, instead of proceeding */ if (multi_char_matches) { - SV * substitute_parse = newSVpvn_flags("?:", 2, SVs_TEMP); + SV * substitute_parse = newSVpvn_flags("?:", 2, SVs_TEMP); I32 cp_count; - STRLEN len; - char *save_end = RExC_end; - char *save_parse = RExC_parse; - char *save_start = RExC_start; + STRLEN len; + char *save_end = RExC_end; + char *save_parse = RExC_parse; + char *save_start = RExC_start; Size_t constructed_prefix_len = 0; /* This gives the length of the constructed portion of the substitute parse. */ @@ -18612,20 +18613,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * reported. See the comments at the definition of * REPORT_LOCATION_ARGS for details */ RExC_copy_start_in_input = (char *) orig_parse; - RExC_start = RExC_parse = SvPV(substitute_parse, len); + RExC_start = RExC_parse = SvPV(substitute_parse, len); RExC_copy_start_in_constructed = RExC_start + constructed_prefix_len; - RExC_end = RExC_parse + len; + RExC_end = RExC_parse + len; RExC_in_multi_char_class = 1; - ret = reg(pRExC_state, 1, ®_flags, depth+1); + ret = reg(pRExC_state, 1, ®_flags, depth+1); *flagp |= reg_flags & (HASWIDTH|SIMPLE|POSTPONED|RESTART_PARSE|NEED_UTF8); /* And restore so can parse the rest of the pattern */ RExC_parse = save_parse; - RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = save_start; - RExC_end = save_end; - RExC_in_multi_char_class = 0; + RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = save_start; + RExC_end = save_end; + RExC_in_multi_char_class = 0; SvREFCNT_dec_NN(multi_char_matches); return ret; } @@ -18771,7 +18772,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, /* Now that we have finished adding all the folds, there is no reason * to keep the foldable list separate */ _invlist_union(cp_list, cp_foldable_list, &cp_list); - SvREFCNT_dec_NN(cp_foldable_list); + SvREFCNT_dec_NN(cp_foldable_list); } /* And combine the result (if any) with any inversion lists from posix @@ -19007,8 +19008,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, { _invlist_invert(cp_list); - /* Clear the invert flag since have just done it here */ - invert = FALSE; + /* Clear the invert flag since have just done it here */ + invert = FALSE; } /* All possible optimizations below still have these characteristics. @@ -19954,15 +19955,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth, * when the target string is UTF-8 (<upper_latin1_only_utf8_matches>). * */ if (upper_latin1_only_utf8_matches) { - if (cp_list) { - _invlist_union(cp_list, + if (cp_list) { + _invlist_union(cp_list, upper_latin1_only_utf8_matches, &cp_list); - SvREFCNT_dec_NN(upper_latin1_only_utf8_matches); - } - else { - cp_list = upper_latin1_only_utf8_matches; - } + SvREFCNT_dec_NN(upper_latin1_only_utf8_matches); + } + else { + cp_list = upper_latin1_only_utf8_matches; + } ANYOF_FLAGS(REGNODE_p(ret)) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP; } @@ -20017,11 +20018,11 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) { assert(! (ANYOF_FLAGS(node) & ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP)); - ARG_SET(node, ANYOF_ONLY_HAS_BITMAP); + ARG_SET(node, ANYOF_ONLY_HAS_BITMAP); } else { - AV * const av = newAV(); - SV *rv; + AV * const av = newAV(); + SV *rv; if (cp_list) { av_store(av, INVLIST_INDEX, SvREFCNT_inc_NN(cp_list)); @@ -20040,10 +20041,10 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state, SvREFCNT_inc_NN(runtime_defns)); } - rv = newRV_noinc(MUTABLE_SV(av)); - n = add_data(pRExC_state, STR_WITH_LEN("s")); - RExC_rxi->data->data[n] = (void*)rv; - ARG_SET(node, n); + rv = newRV_noinc(MUTABLE_SV(av)); + n = add_data(pRExC_state, STR_WITH_LEN("s")); + RExC_rxi->data->data[n] = (void*)rv; + ARG_SET(node, n); } } @@ -20097,12 +20098,12 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, assert(! output_invlist || listsvp); if (data && data->count) { - const U32 n = ARG(node); + const U32 n = ARG(node); - if (data->what[n] == 's') { - SV * const rv = MUTABLE_SV(data->data[n]); - AV * const av = MUTABLE_AV(SvRV(rv)); - SV **const ary = AvARRAY(av); + if (data->what[n] == 's') { + SV * const rv = MUTABLE_SV(data->data[n]); + AV * const av = MUTABLE_AV(SvRV(rv)); + SV **const ary = AvARRAY(av); invlist = ary[INVLIST_INDEX]; @@ -20114,7 +20115,7 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, si = ary[DEFERRED_USER_DEFINED_INDEX]; } - if (doinit && (si || invlist)) { + if (doinit && (si || invlist)) { if (si) { bool user_defined; SV * msg = newSVpvs_flags("", SVs_TEMP); @@ -20156,20 +20157,20 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, : INVLIST_INDEX); si = NULL; } - } - } + } + } } /* If requested, return a printable version of what this ANYOF node matches * */ if (listsvp) { - SV* matches_string = NULL; + SV* matches_string = NULL; /* This function can be called at compile-time, before everything gets * resolved, in which case we return the currently best available * information, which is the string that will eventually be used to do * that resolving, 'si' */ - if (si) { + if (si) { /* Here, we only have 'si' (and possibly some passed-in data in * 'invlist', which is handled below) If the caller only wants * 'si', use that. */ @@ -20268,7 +20269,7 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, SvCUR_set(matches_string, SvCUR(matches_string) - 1); } } /* end of has an 'si' */ - } + } /* Add the stuff that's already known */ if (invlist) { @@ -20291,7 +20292,7 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node, } } - *listsvp = matches_string; + *listsvp = matches_string; } return invlist; @@ -20347,21 +20348,21 @@ S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state, assert( ! UTF || UTF8_IS_INVARIANT(**p) || UTF8_IS_START(**p)); for (;;) { - if (RExC_end - (*p) >= 3 - && *(*p) == '(' - && *(*p + 1) == '?' - && *(*p + 2) == '#') - { - while (*(*p) != ')') { - if ((*p) == RExC_end) - FAIL("Sequence (?#... not terminated"); - (*p)++; - } - (*p)++; - continue; - } - - if (use_xmod) { + if (RExC_end - (*p) >= 3 + && *(*p) == '(' + && *(*p + 1) == '?' + && *(*p + 2) == '#') + { + while (*(*p) != ')') { + if ((*p) == RExC_end) + FAIL("Sequence (?#... not terminated"); + (*p)++; + } + (*p)++; + continue; + } + + if (use_xmod) { const char * save_p = *p; while ((*p) < RExC_end) { STRLEN len; @@ -20378,7 +20379,7 @@ S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state, if (*p != save_p) { continue; } - } + } break; } @@ -20432,7 +20433,7 @@ S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size) char, regexp_internal); if ( RExC_rxi == NULL ) - FAIL("Regexp out of space"); + FAIL("Regexp out of space"); RXi_SET(RExC_rx, RExC_rxi); RExC_emit_start = RExC_rxi->program; @@ -20473,16 +20474,16 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_ assert(extra_size >= regarglen[op] || PL_regkind[op] == ANYOF); if (RExC_offsets) { /* MJD */ - MJD_OFFSET_DEBUG( + MJD_OFFSET_DEBUG( ("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n", name, __LINE__, PL_reg_name[op], (UV)(RExC_emit) > RExC_offsets[0] - ? "Overwriting end of array!\n" : "OK", + ? "Overwriting end of array!\n" : "OK", (UV)(RExC_emit), (UV)(RExC_parse - RExC_start), (UV)RExC_offsets[0])); - Set_Node_Offset(REGNODE_p(RExC_emit), RExC_parse + (op == END)); + Set_Node_Offset(REGNODE_p(RExC_emit), RExC_parse + (op == END)); } #endif return(ret); @@ -20627,21 +20628,21 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op, RExC_end_op += size; while (src > REGNODE_p(operand)) { - StructCopy(--src, --dst, regnode); + StructCopy(--src, --dst, regnode); #ifdef RE_TRACK_PATTERN_OFFSETS if (RExC_offsets) { /* MJD 20010112 */ - MJD_OFFSET_DEBUG( + MJD_OFFSET_DEBUG( ("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n", "reginsert", - __LINE__, - PL_reg_name[op], + __LINE__, + PL_reg_name[op], (UV)(REGNODE_OFFSET(dst)) > RExC_offsets[0] - ? "Overwriting end of array!\n" : "OK", + ? "Overwriting end of array!\n" : "OK", (UV)REGNODE_OFFSET(src), (UV)REGNODE_OFFSET(dst), (UV)RExC_offsets[0])); - Set_Node_Offset_To_R(REGNODE_OFFSET(dst), Node_Offset(src)); - Set_Node_Length_To_R(REGNODE_OFFSET(dst), Node_Length(src)); + Set_Node_Offset_To_R(REGNODE_OFFSET(dst), Node_Offset(src)); + Set_Node_Length_To_R(REGNODE_OFFSET(dst), Node_Length(src)); } #endif } @@ -20649,18 +20650,18 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op, place = REGNODE_p(operand); /* Op node, where operand used to be. */ #ifdef RE_TRACK_PATTERN_OFFSETS if (RExC_offsets) { /* MJD */ - MJD_OFFSET_DEBUG( + MJD_OFFSET_DEBUG( ("%s(%d): (op %s) %s %" UVuf " <- %" UVuf " (max %" UVuf ").\n", "reginsert", - __LINE__, - PL_reg_name[op], + __LINE__, + PL_reg_name[op], (UV)REGNODE_OFFSET(place) > RExC_offsets[0] ? "Overwriting end of array!\n" : "OK", (UV)REGNODE_OFFSET(place), (UV)(RExC_parse - RExC_start), (UV)RExC_offsets[0])); - Set_Node_Offset(place, RExC_parse); - Set_Node_Length(place, 1); + Set_Node_Offset(place, RExC_parse); + Set_Node_Length(place, 1); } #endif src = NEXTOPER(place); @@ -20696,7 +20697,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state, * */ scan = (regnode_offset) p; for (;;) { - regnode * const temp = regnext(REGNODE_p(scan)); + regnode * const temp = regnext(REGNODE_p(scan)); DEBUG_PARSE_r({ DEBUG_PARSE_MSG((scan==p ? "tail" : "")); regprop(RExC_rx, RExC_mysv, REGNODE_p(scan), NULL, pRExC_state); @@ -20773,11 +20774,11 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, regnode * const temp = regnext(REGNODE_p(scan)); #ifdef EXPERIMENTAL_INPLACESCAN if (PL_regkind[OP(REGNODE_p(scan))] == EXACT) { - bool unfolded_multi_char; /* Unexamined in this routine */ + bool unfolded_multi_char; /* Unexamined in this routine */ if (join_exact(pRExC_state, scan, &min, &unfolded_multi_char, 1, REGNODE_p(val), depth+1)) return TRUE; /* Was return EXACT */ - } + } #endif if ( exact ) { if (PL_regkind[OP(REGNODE_p(scan))] == EXACT) { @@ -20798,23 +20799,23 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, scan, PL_reg_name[exact]); }); - if (temp == NULL) - break; - scan = REGNODE_OFFSET(temp); + if (temp == NULL) + break; + scan = REGNODE_OFFSET(temp); } DEBUG_PARSE_r({ DEBUG_PARSE_MSG(""); regprop(RExC_rx, RExC_mysv, REGNODE_p(val), NULL, pRExC_state); Perl_re_printf( aTHX_ "~ attach to %s (%" IVdf ") offset to %" IVdf "\n", - SvPV_nolen_const(RExC_mysv), - (IV)val, - (IV)(val - scan) + SvPV_nolen_const(RExC_mysv), + (IV)val, + (IV)(val - scan) ); }); if (reg_off_by_arg[OP(REGNODE_p(scan))]) { assert((UV) (val - scan) <= U32_MAX); - ARG_SET(REGNODE_p(scan), val - scan); + ARG_SET(REGNODE_p(scan), val - scan); } else { if (val - scan > U16_MAX) { @@ -20824,7 +20825,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p, NEXT_OFF(REGNODE_p(scan)) = U16_MAX; return FALSE; } - NEXT_OFF(REGNODE_p(scan)) = val - scan; + NEXT_OFF(REGNODE_p(scan)) = val - scan; } return TRUE; /* Was 'return exact' */ @@ -20903,9 +20904,9 @@ S_regdump_extflags(pTHX_ const char *lead, const U32 flags) for (bit=0; bit<REG_EXTFLAGS_NAME_SIZE; bit++) { if (flags & (1<<bit)) { - if ((1<<bit) & RXf_PMf_CHARSET) { /* Output separately, below */ - continue; - } + if ((1<<bit) & RXf_PMf_CHARSET) { /* Output separately, below */ + continue; + } if (!set++ && lead) Perl_re_printf( aTHX_ "%s", lead); Perl_re_printf( aTHX_ "%s ", PL_reg_extflags_name[bit]); @@ -20988,10 +20989,10 @@ Perl_regdump(pTHX_ const regexp *r) if (r->check_substr || r->check_utf8) Perl_re_printf( aTHX_ - (const char *) - ( r->check_substr == r->substrs->data[1].substr - && r->check_utf8 == r->substrs->data[1].utf8_substr - ? "(checking floating" : "(checking anchored")); + (const char *) + ( r->check_substr == r->substrs->data[1].substr + && r->check_utf8 == r->substrs->data[1].utf8_substr + ? "(checking floating" : "(checking anchored")); if (r->intflags & PREGf_NOSCAN) Perl_re_printf( aTHX_ " noscan"); if (r->extflags & RXf_CHECK_ALL) @@ -21112,29 +21113,29 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ k = PL_regkind[OP(o)]; if (k == EXACT) { - sv_catpvs(sv, " "); - /* Using is_utf8_string() (via PERL_PV_UNI_DETECT) - * is a crude hack but it may be the best for now since - * we have no flag "this EXACTish node was UTF-8" - * --jhi */ - pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len, + sv_catpvs(sv, " "); + /* Using is_utf8_string() (via PERL_PV_UNI_DETECT) + * is a crude hack but it may be the best for now since + * we have no flag "this EXACTish node was UTF-8" + * --jhi */ + pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len, PL_colors[0], PL_colors[1], - PERL_PV_ESCAPE_UNI_DETECT | - PERL_PV_ESCAPE_NONASCII | - PERL_PV_PRETTY_ELLIPSES | - PERL_PV_PRETTY_LTGT | - PERL_PV_PRETTY_NOCLEAR - ); + PERL_PV_ESCAPE_UNI_DETECT | + PERL_PV_ESCAPE_NONASCII | + PERL_PV_PRETTY_ELLIPSES | + PERL_PV_PRETTY_LTGT | + PERL_PV_PRETTY_NOCLEAR + ); } else if (k == TRIE) { - /* print the details of the trie in dumpuntil instead, as - * progi->data isn't available here */ + /* print the details of the trie in dumpuntil instead, as + * progi->data isn't available here */ const char op = OP(o); const U32 n = ARG(o); const reg_ac_data * const ac = IS_TRIE_AC(op) ? (reg_ac_data *)progi->data->data[n] : NULL; const reg_trie_data * const trie - = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie]; + = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie]; Perl_sv_catpvf(aTHX_ sv, "-%s", PL_reg_name[o->flags]); DEBUG_TRIE_COMPILE_r({ @@ -21167,8 +21168,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } } else if (k == CURLY) { U32 lo = ARG1(o), hi = ARG2(o); - if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX) - Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */ + if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX) + Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */ Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo); if (hi == REG_INFTY) sv_catpvs(sv, "INFTY"); @@ -21177,14 +21178,14 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ sv_catpvs(sv, "}"); } else if (k == WHILEM && o->flags) /* Ordinal/of */ - Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4); + Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4); else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || OP(o)==ACCEPT) { AV *name_list= NULL; U32 parno= OP(o) == ACCEPT ? (U32)ARG2L(o) : ARG(o); Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno); /* Parenth number */ - if ( RXp_PAREN_NAMES(prog) ) { + if ( RXp_PAREN_NAMES(prog) ) { name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]); } else if ( pRExC_state ) { name_list= RExC_paren_name_list; @@ -21192,8 +21193,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ if (name_list) { if ( k != REF || (OP(o) < REFN)) { SV **name= av_fetch(name_list, parno, 0 ); - if (name) - Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name)); + if (name) + Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name)); } else { SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]); @@ -21242,7 +21243,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } else if (k == LOGICAL) /* 2: embedded, otherwise 1 */ - Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); + Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); else if (k == ANYOF || k == ANYOFR) { U8 flags; char * bitmap; @@ -21274,7 +21275,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ arg = ARG(o); } - if (OP(o) == ANYOFL || OP(o) == ANYOFPOSIXL) { + if (OP(o) == ANYOFL || OP(o) == ANYOFPOSIXL) { if (ANYOFL_UTF8_LOCALE_REQD(flags)) { sv_catpvs(sv, "{utf8-locale-reqd}"); } @@ -21328,7 +21329,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } /* Ready to start outputting. First, the initial left bracket */ - Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); + Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); /* ANYOFH by definition doesn't have anything that will fit inside the * bitmap; ANYOFR may or may not. */ @@ -21433,7 +21434,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } /* And finally the matching, closing ']' */ - Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]); + Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]); if (OP(o) == ANYOFHs) { Perl_sv_catpvf(aTHX_ sv, " (Leading UTF-8 bytes=%s", _byte_dump_string((U8 *) ((struct regnode_anyofhs *) o)->string, FLAGS(o), 1)); @@ -21464,13 +21465,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ else if (k == ANYOFM) { SV * cp_list = get_ANYOFM_contents(o); - Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); + Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]); if (OP(o) == NANYOFM) { _invlist_invert(cp_list); } put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, 0, TRUE); - Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]); + Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]); SvREFCNT_dec(cp_list); } @@ -21502,11 +21503,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ sv_catpv(sv, bounds[FLAGS(o)]); } else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) { - Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags)); + Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags)); if (o->next_off) { Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off); } - Perl_sv_catpvf(aTHX_ sv, "]"); + Perl_sv_catpvf(aTHX_ sv, "]"); } else if (OP(o) == SBOL) Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^"); @@ -21546,22 +21547,22 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r) PERL_UNUSED_CONTEXT; DEBUG_COMPILE_r( - { + { if (prog->maxlen > 0) { const char * const s = SvPV_nolen_const(RX_UTF8(r) - ? prog->check_utf8 : prog->check_substr); + ? prog->check_utf8 : prog->check_substr); if (!PL_colorset) reginitcolors(); Perl_re_printf( aTHX_ - "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n", - PL_colors[4], - RX_UTF8(r) ? "utf8 " : "", - PL_colors[5], PL_colors[0], - s, - PL_colors[1], - (strlen(s) > PL_dump_re_max_len ? "..." : "")); + "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n", + PL_colors[4], + RX_UTF8(r) ? "utf8 " : "", + PL_colors[5], PL_colors[0], + s, + PL_colors[1], + (strlen(s) > PL_dump_re_max_len ? "..." : "")); } - } ); + } ); /* use UTF8 check substring if regexp pattern itself is in UTF8 */ return RX_UTF8(r) ? prog->check_utf8 : prog->check_substr; @@ -21609,7 +21610,7 @@ Perl_pregfree2(pTHX_ REGEXP *rx) SvREFCNT_dec(r->substrs->data[i].substr); SvREFCNT_dec(r->substrs->data[i].utf8_substr); } - Safefree(r->substrs); + Safefree(r->substrs); } RX_MATCH_COPY_FREE(rx); #ifdef PERL_ANY_COW @@ -21656,7 +21657,7 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv) PERL_ARGS_ASSERT_REG_TEMP_COPY; if (!dsv) - dsv = (REGEXP*) newSV_type(SVt_REGEXP); + dsv = (REGEXP*) newSV_type(SVt_REGEXP); else { assert(SvTYPE(dsv) == SVt_REGEXP || (SvTYPE(dsv) == SVt_PVLV)); @@ -21673,22 +21674,22 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv) } SvLEN_set(dsv, 0); SvCUR_set(dsv, 0); - SvOK_off((SV *)dsv); + SvOK_off((SV *)dsv); - if (islv) { - /* For PVLVs, the head (sv_any) points to an XPVLV, while + if (islv) { + /* For PVLVs, the head (sv_any) points to an XPVLV, while * the LV's xpvlenu_rx will point to a regexp body, which * we allocate here */ - REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP); - assert(!SvPVX(dsv)); + REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP); + assert(!SvPVX(dsv)); ((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any; - temp->sv_any = NULL; - SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL; - SvREFCNT_dec_NN(temp); - /* SvCUR still resides in the xpvlv struct, so the regexp copy- - ing below will not set it. */ - SvCUR_set(dsv, SvCUR(ssv)); - } + temp->sv_any = NULL; + SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL; + SvREFCNT_dec_NN(temp); + /* SvCUR still resides in the xpvlv struct, so the regexp copy- + ing below will not set it. */ + SvCUR_set(dsv, SvCUR(ssv)); + } } /* This ensures that SvTHINKFIRST(sv) is true, and hence that sv_force_normal(sv) is called. */ @@ -21702,7 +21703,7 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv) The string pointer is copied here, being part of the regexp struct. */ memcpy(&(drx->xpv_cur), &(srx->xpv_cur), - sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur)); + sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur)); if (!islv) SvLEN_set(dsv, 0); if (srx->offs) { @@ -21713,15 +21714,15 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv) if (srx->substrs) { int i; Newx(drx->substrs, 1, struct reg_substr_data); - StructCopy(srx->substrs, drx->substrs, struct reg_substr_data); + StructCopy(srx->substrs, drx->substrs, struct reg_substr_data); for (i = 0; i < 2; i++) { SvREFCNT_inc_void(drx->substrs->data[i].substr); SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr); } - /* check_substr and check_utf8, if non-NULL, point to either their - anchored or float namesakes, and don't hold a second reference. */ + /* check_substr and check_utf8, if non-NULL, point to either their + anchored or float namesakes, and don't hold a second reference. */ } RX_MATCH_COPIED_off(dsv); #ifdef PERL_ANY_COW @@ -21763,10 +21764,10 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) } DEBUG_COMPILE_r({ - if (!PL_colorset) - reginitcolors(); - { - SV *dsv= sv_newmortal(); + if (!PL_colorset) + reginitcolors(); + { + SV *dsv= sv_newmortal(); RE_PV_QUOTED_DECL(s, RX_UTF8(rx), dsv, RX_PRECOMP(rx), RX_PRELEN(rx), PL_dump_re_max_len); Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n", @@ -21782,24 +21783,24 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) S_free_codeblocks(aTHX_ ri->code_blocks); if (ri->data) { - int n = ri->data->count; + int n = ri->data->count; - while (--n >= 0) { + while (--n >= 0) { /* If you add a ->what type here, update the comment in regcomp.h */ - switch (ri->data->what[n]) { - case 'a': - case 'r': - case 's': - case 'S': - case 'u': - SvREFCNT_dec(MUTABLE_SV(ri->data->data[n])); - break; - case 'f': - Safefree(ri->data->data[n]); - break; - case 'l': - case 'L': - break; + switch (ri->data->what[n]) { + case 'a': + case 'r': + case 's': + case 'S': + case 'u': + SvREFCNT_dec(MUTABLE_SV(ri->data->data[n])); + break; + case 'f': + Safefree(ri->data->data[n]); + break; + case 'l': + case 'L': + break; case 'T': { /* Aho Corasick add-on structure for a trie node. Used in stclass optimization only */ @@ -21811,7 +21812,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) if ( !refcount ) { PerlMemShared_free(aho->states); PerlMemShared_free(aho->fail); - /* do this last!!!! */ + /* do this last!!!! */ PerlMemShared_free(ri->data->data[n]); /* we should only ever get called once, so * assert as much, and also guard the free @@ -21826,11 +21827,11 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) } } break; - case 't': - { - /* trie structure. */ - U32 refcount; - reg_trie_data *trie=(reg_trie_data*)ri->data->data[n]; + case 't': + { + /* trie structure. */ + U32 refcount; + reg_trie_data *trie=(reg_trie_data*)ri->data->data[n]; OP_REFCNT_LOCK; refcount = --trie->refcount; OP_REFCNT_UNLOCK; @@ -21842,19 +21843,19 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) PerlMemShared_free(trie->bitmap); if (trie->jump) PerlMemShared_free(trie->jump); - PerlMemShared_free(trie->wordinfo); + PerlMemShared_free(trie->wordinfo); /* do this last!!!! */ PerlMemShared_free(ri->data->data[n]); - } - } - break; - default: - Perl_croak(aTHX_ "panic: regfree data code '%c'", + } + } + break; + default: + Perl_croak(aTHX_ "panic: regfree data code '%c'", ri->data->what[n]); - } - } - Safefree(ri->data->what); - Safefree(ri->data); + } + } + Safefree(ri->data->what); + Safefree(ri->data); } Safefree(ri); @@ -21896,15 +21897,15 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) Copy(r->offs, ret->offs, npar, regexp_paren_pair); if (ret->substrs) { - /* Do it this way to avoid reading from *r after the StructCopy(). - That way, if any of the sv_dup_inc()s dislodge *r from the L1 - cache, it doesn't matter. */ + /* Do it this way to avoid reading from *r after the StructCopy(). + That way, if any of the sv_dup_inc()s dislodge *r from the L1 + cache, it doesn't matter. */ int i; - const bool anchored = r->check_substr - ? r->check_substr == r->substrs->data[0].substr - : r->check_utf8 == r->substrs->data[0].utf8_substr; + const bool anchored = r->check_substr + ? r->check_substr == r->substrs->data[0].substr + : r->check_utf8 == r->substrs->data[0].utf8_substr; Newx(ret->substrs, 1, struct reg_substr_data); - StructCopy(r->substrs, ret->substrs, struct reg_substr_data); + StructCopy(r->substrs, ret->substrs, struct reg_substr_data); for (i = 0; i < 2; i++) { ret->substrs->data[i].substr = @@ -21913,29 +21914,29 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) sv_dup_inc(ret->substrs->data[i].utf8_substr, param); } - /* check_substr and check_utf8, if non-NULL, point to either their - anchored or float namesakes, and don't hold a second reference. */ + /* check_substr and check_utf8, if non-NULL, point to either their + anchored or float namesakes, and don't hold a second reference. */ - if (ret->check_substr) { - if (anchored) { - assert(r->check_utf8 == r->substrs->data[0].utf8_substr); + if (ret->check_substr) { + if (anchored) { + assert(r->check_utf8 == r->substrs->data[0].utf8_substr); - ret->check_substr = ret->substrs->data[0].substr; - ret->check_utf8 = ret->substrs->data[0].utf8_substr; - } else { - assert(r->check_substr == r->substrs->data[1].substr); - assert(r->check_utf8 == r->substrs->data[1].utf8_substr); + ret->check_substr = ret->substrs->data[0].substr; + ret->check_utf8 = ret->substrs->data[0].utf8_substr; + } else { + assert(r->check_substr == r->substrs->data[1].substr); + assert(r->check_utf8 == r->substrs->data[1].utf8_substr); - ret->check_substr = ret->substrs->data[1].substr; - ret->check_utf8 = ret->substrs->data[1].utf8_substr; - } - } else if (ret->check_utf8) { - if (anchored) { - ret->check_utf8 = ret->substrs->data[0].utf8_substr; - } else { - ret->check_utf8 = ret->substrs->data[1].utf8_substr; - } - } + ret->check_substr = ret->substrs->data[1].substr; + ret->check_utf8 = ret->substrs->data[1].utf8_substr; + } + } else if (ret->check_utf8) { + if (anchored) { + ret->check_utf8 = ret->substrs->data[0].utf8_substr; + } else { + ret->check_utf8 = ret->substrs->data[1].utf8_substr; + } + } } RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param); @@ -21944,12 +21945,12 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) Newx(ret->recurse_locinput, r->nparens + 1, char *); if (ret->pprivate) - RXi_SET(ret, CALLREGDUPE_PVT(dstr, param)); + RXi_SET(ret, CALLREGDUPE_PVT(dstr, param)); if (RX_MATCH_COPIED(dstr)) - ret->subbeg = SAVEPVN(ret->subbeg, ret->sublen); + ret->subbeg = SAVEPVN(ret->subbeg, ret->sublen); else - ret->subbeg = NULL; + ret->subbeg = NULL; #ifdef PERL_ANY_COW ret->saved_copy = NULL; #endif @@ -21957,9 +21958,9 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param) /* Whether mother_re be set or no, we need to copy the string. We cannot refrain from copying it when the storage points directly to our mother regexp, because that's - 1: a buffer in a different thread - 2: something we no longer hold a reference on - so we need to copy it locally. */ + 1: a buffer in a different thread + 2: something we no longer hold a reference on + so we need to copy it locally. */ RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1); /* set malloced length to a non-zero value so it will be freed * (otherwise in combination with SVf_FAKE it looks like an alien @@ -22002,37 +22003,37 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) if (ri->code_blocks) { - int n; - Newx(reti->code_blocks, 1, struct reg_code_blocks); - Newx(reti->code_blocks->cb, ri->code_blocks->count, + int n; + Newx(reti->code_blocks, 1, struct reg_code_blocks); + Newx(reti->code_blocks->cb, ri->code_blocks->count, struct reg_code_block); - Copy(ri->code_blocks->cb, reti->code_blocks->cb, + Copy(ri->code_blocks->cb, reti->code_blocks->cb, ri->code_blocks->count, struct reg_code_block); - for (n = 0; n < ri->code_blocks->count; n++) - reti->code_blocks->cb[n].src_regex = (REGEXP*) - sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param); + for (n = 0; n < ri->code_blocks->count; n++) + reti->code_blocks->cb[n].src_regex = (REGEXP*) + sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param); reti->code_blocks->count = ri->code_blocks->count; reti->code_blocks->refcnt = 1; } else - reti->code_blocks = NULL; + reti->code_blocks = NULL; reti->regstclass = NULL; if (ri->data) { - struct reg_data *d; + struct reg_data *d; const int count = ri->data->count; - int i; + int i; - Newxc(d, sizeof(struct reg_data) + count*sizeof(void *), - char, struct reg_data); - Newx(d->what, count, U8); + Newxc(d, sizeof(struct reg_data) + count*sizeof(void *), + char, struct reg_data); + Newx(d->what, count, U8); - d->count = count; - for (i = 0; i < count; i++) { - d->what[i] = ri->data->what[i]; - switch (d->what[i]) { - /* see also regcomp.h and regfree_internal() */ + d->count = count; + for (i = 0; i < count; i++) { + d->what[i] = ri->data->what[i]; + switch (d->what[i]) { + /* see also regcomp.h and regfree_internal() */ case 'a': /* actually an AV, but the dup function is identical. values seem to be "plain sv's" generally. */ case 'r': /* a compiled regex (but still just another SV) */ @@ -22042,9 +22043,9 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) case 'S': /* actually an SV, but the dup function is identical. */ case 'u': /* actually an HV, but the dup function is identical. values are "plain sv's" */ - d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param); - break; - case 'f': + d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param); + break; + case 'f': /* Synthetic Start Class - "Fake" charclass we generate to optimize * patterns which could start with several different things. Pre-TRIE * this was more important than it is now, however this still helps @@ -22052,40 +22053,40 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) * to [xa]. This is used by Perl_re_intuit_start() and S_find_byclass() * in regexec.c */ - /* This is cheating. */ - Newx(d->data[i], 1, regnode_ssc); - StructCopy(ri->data->data[i], d->data[i], regnode_ssc); - reti->regstclass = (regnode*)d->data[i]; - break; - case 'T': + /* This is cheating. */ + Newx(d->data[i], 1, regnode_ssc); + StructCopy(ri->data->data[i], d->data[i], regnode_ssc); + reti->regstclass = (regnode*)d->data[i]; + break; + case 'T': /* AHO-CORASICK fail table */ /* Trie stclasses are readonly and can thus be shared - * without duplication. We free the stclass in pregfree - * when the corresponding reg_ac_data struct is freed. - */ - reti->regstclass= ri->regstclass; - /* FALLTHROUGH */ - case 't': + * without duplication. We free the stclass in pregfree + * when the corresponding reg_ac_data struct is freed. + */ + reti->regstclass= ri->regstclass; + /* FALLTHROUGH */ + case 't': /* TRIE transition table */ - OP_REFCNT_LOCK; - ((reg_trie_data*)ri->data->data[i])->refcount++; - OP_REFCNT_UNLOCK; - /* FALLTHROUGH */ + OP_REFCNT_LOCK; + ((reg_trie_data*)ri->data->data[i])->refcount++; + OP_REFCNT_UNLOCK; + /* FALLTHROUGH */ case 'l': /* (?{...}) or (??{ ... }) code (cb->block) */ case 'L': /* same when RExC_pm_flags & PMf_HAS_CV and code is not from another regexp */ - d->data[i] = ri->data->data[i]; - break; + d->data[i] = ri->data->data[i]; + break; default: Perl_croak(aTHX_ "panic: re_dup_guts unknown data code '%c'", ri->data->what[i]); - } - } + } + } - reti->data = d; + reti->data = d; } else - reti->data = NULL; + reti->data = NULL; reti->name_list_idx = ri->name_list_idx; @@ -22114,16 +22115,16 @@ Perl_regnext(pTHX_ regnode *p) I32 offset; if (!p) - return(NULL); + return(NULL); if (OP(p) > REGNODE_MAX) { /* regnode.type is unsigned */ - Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", + Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d", (int)OP(p), (int)REGNODE_MAX); } offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p)); if (offset == 0) - return(NULL); + return(NULL); return(p+offset); } @@ -22142,7 +22143,7 @@ S_re_croak(pTHX_ bool utf8, const char* pat,...) PERL_ARGS_ASSERT_RE_CROAK; if (len > 510) - len = 510; + len = 510; Copy(pat, buf, len , char); buf[len] = '\n'; buf[len + 1] = '\0'; @@ -22151,7 +22152,7 @@ S_re_croak(pTHX_ bool utf8, const char* pat,...) va_end(args); message = SvPV_const(msv, len); if (len > 512) - len = 512; + len = 512; Copy(message, buf, len , char); /* len-1 to avoid \n */ Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, len-1, buf)); @@ -22169,8 +22170,8 @@ Perl_save_re_context(pTHX) /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */ if (PL_curpm) { - const REGEXP * const rx = PM_GETRE(PL_curpm); - if (rx) + const REGEXP * const rx = PM_GETRE(PL_curpm); + if (rx) nparens = RX_NPARENS(rx); } @@ -22209,13 +22210,13 @@ S_put_code_point(pTHX_ SV *sv, UV c) Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c); } else if (isPRINT(c)) { - const char string = (char) c; + const char string = (char) c; /* We use {phrase} as metanotation in the class, so also escape literal * braces */ - if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}') - sv_catpvs(sv, "\\"); - sv_catpvn(sv, &string, 1); + if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}') + sv_catpvs(sv, "\\"); + sv_catpvn(sv, &string, 1); } else if (isMNEMONIC_CNTRL(c)) { Perl_sv_catpvf(aTHX_ sv, "%s", cntrl_to_mnemonic((U8) c)); @@ -22782,10 +22783,10 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, || ( SvCUR(inverted_display) + inverted_bias < SvCUR(as_is_display) + as_is_bias))) { - sv_catsv(sv, inverted_display); + sv_catsv(sv, inverted_display); } else if (as_is_display) { - sv_catsv(sv, as_is_display); + sv_catsv(sv, as_is_display); } SvREFCNT_dec(as_is_display); @@ -22814,8 +22815,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, STATIC const regnode * S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, - const regnode *last, const regnode *plast, - SV* sv, I32 indent, U32 depth) + const regnode *last, const regnode *plast, + SV* sv, I32 indent, U32 depth) { U8 op = PSEUDO; /* Arbitrary non-END op. */ const regnode *next; @@ -22836,25 +22837,25 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, while (PL_regkind[op] != END && (!last || node < last)) { assert(node); - /* While that wasn't END last time... */ - NODE_ALIGN(node); - op = OP(node); - if (op == CLOSE || op == SRCLOSE || op == WHILEM) - indent--; - next = regnext((regnode *)node); - - /* Where, what. */ - if (OP(node) == OPTIMIZED) { - if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE)) - optstart = node; - else - goto after_print; - } else - CLEAR_OPTSTART; + /* While that wasn't END last time... */ + NODE_ALIGN(node); + op = OP(node); + if (op == CLOSE || op == SRCLOSE || op == WHILEM) + indent--; + next = regnext((regnode *)node); + + /* Where, what. */ + if (OP(node) == OPTIMIZED) { + if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE)) + optstart = node; + else + goto after_print; + } else + CLEAR_OPTSTART; regprop(r, sv, node, NULL, NULL); Perl_re_printf( aTHX_ "%4" IVdf ":%*s%s", (IV)(node - start), - (int)(2*indent + 1), "", SvPVX_const(sv)); + (int)(2*indent + 1), "", SvPVX_const(sv)); if (OP(node) != OPTIMIZED) { if (next == NULL) /* Next ptr. */ @@ -22868,39 +22869,39 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, } after_print: - if (PL_regkind[(U8)op] == BRANCHJ) { - assert(next); - { + if (PL_regkind[(U8)op] == BRANCHJ) { + assert(next); + { const regnode *nnode = (OP(next) == LONGJMP ? regnext((regnode *)next) : next); if (last && nnode > last) nnode = last; DUMPUNTIL(NEXTOPER(NEXTOPER(node)), nnode); - } - } - else if (PL_regkind[(U8)op] == BRANCH) { - assert(next); - DUMPUNTIL(NEXTOPER(node), next); - } - else if ( PL_regkind[(U8)op] == TRIE ) { - const regnode *this_trie = node; - const char op = OP(node); + } + } + else if (PL_regkind[(U8)op] == BRANCH) { + assert(next); + DUMPUNTIL(NEXTOPER(node), next); + } + else if ( PL_regkind[(U8)op] == TRIE ) { + const regnode *this_trie = node; + const char op = OP(node); const U32 n = ARG(node); - const reg_ac_data * const ac = op>=AHOCORASICK ? + const reg_ac_data * const ac = op>=AHOCORASICK ? (reg_ac_data *)ri->data->data[n] : NULL; - const reg_trie_data * const trie = - (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie]; + const reg_trie_data * const trie = + (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie]; #ifdef DEBUGGING - AV *const trie_words + AV *const trie_words = MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]); #endif - const regnode *nextbranch= NULL; - I32 word_idx; + const regnode *nextbranch= NULL; + I32 word_idx; SvPVCLEAR(sv); - for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) { - SV ** const elem_ptr = av_fetch(trie_words, word_idx, 0); + for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) { + SV ** const elem_ptr = av_fetch(trie_words, word_idx, 0); Perl_re_indentf( aTHX_ "%s ", indent+3, @@ -22923,41 +22924,41 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, if (dist) { if (!nextbranch) nextbranch= this_trie + trie->jump[0]; - DUMPUNTIL(this_trie + dist, nextbranch); + DUMPUNTIL(this_trie + dist, nextbranch); } if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH) nextbranch= regnext((regnode *)nextbranch); } else { Perl_re_printf( aTHX_ "\n"); - } - } - if (last && next > last) - node= last; - else - node= next; - } - else if ( op == CURLY ) { /* "next" might be very big: optimizer */ - DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS, + } + } + if (last && next > last) + node= last; + else + node= next; + } + else if ( op == CURLY ) { /* "next" might be very big: optimizer */ + DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS, NEXTOPER(node) + EXTRA_STEP_2ARGS + 1); - } - else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) { - assert(next); - DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS, next); - } - else if ( op == PLUS || op == STAR) { - DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1); - } - else if (PL_regkind[(U8)op] == EXACT || op == ANYOFHs) { + } + else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) { + assert(next); + DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS, next); + } + else if ( op == PLUS || op == STAR) { + DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1); + } + else if (PL_regkind[(U8)op] == EXACT || op == ANYOFHs) { /* Literal string, where present. */ - node += NODE_SZ_STR(node) - 1; - node = NEXTOPER(node); - } - else { - node = NEXTOPER(node); - node += regarglen[(U8)op]; - } - if (op == CURLYX || op == OPEN || op == SROPEN) - indent++; + node += NODE_SZ_STR(node) - 1; + node = NEXTOPER(node); + } + else { + node = NEXTOPER(node); + node += regarglen[(U8)op]; + } + if (op == CURLYX || op == OPEN || op == SROPEN) + indent++; } CLEAR_OPTSTART; #ifdef DEBUG_DUMPUNTIL @@ -23218,7 +23219,7 @@ S_compile_wildcard(pTHX_ const char * subpattern, const STRLEN len, STATIC I32 S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend, - char *strbeg, SSize_t minend, SV *screamer, U32 nosave) + char *strbeg, SSize_t minend, SV *screamer, U32 nosave) { I32 result; DECLARE_AND_GET_RE_DEBUG_FLAGS; @@ -24912,7 +24913,7 @@ S_parse_uniprop_string(pTHX_ COPHH * hinthash = (IN_PERL_COMPILETIME) ? CopHINTHASH_get(&PL_compiling) : CopHINTHASH_get(PL_curcop); - SV * pu_overrides = cophh_fetch_pv(hinthash, "private_use", 0, 0); + SV * pu_overrides = cophh_fetch_pv(hinthash, "private_use", 0, 0); if (UNLIKELY(pu_overrides && SvPOK(pu_overrides))) { |