summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-11-15 21:06:11 -0700
committerKarl Williamson <khw@cpan.org>2021-01-20 06:51:49 -0700
commita7b8d88a7db0f93e2ec0bef63f0460d0d3247b10 (patch)
treea3d531e8f5d6476f3bbe556398a2c76c34d30f32 /regcomp.c
parent012ac233b0f87e11d3ffed84dbca75e927e854aa (diff)
downloadperl-a7b8d88a7db0f93e2ec0bef63f0460d0d3247b10.tar.gz
regcomp.c: Change names of 2 macros for mnemonics
The new names are more understandable to me. This also adds a second parameter to one macro, that is unused until the next commit in the series.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c5253
1 files changed, 2627 insertions, 2626 deletions
diff --git a/regcomp.c b/regcomp.c
index bf9e2742ef..7e8425f392 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -224,11 +224,11 @@ struct RExC_state_t {
regnode *end_op; /* END node in program */
I32 utf8; /* whether the pattern is utf8 or not */
I32 orig_utf8; /* whether the pattern was originally in utf8 */
- /* XXX use this for future optimisation of case
- * where pattern must be upgraded to utf8. */
+ /* XXX use this for future optimisation of case
+ * where pattern must be upgraded to utf8. */
I32 uni_semantics; /* If a d charset modifier should use unicode
- rules, even if the pattern is not in
- utf8 */
+ rules, even if the pattern is not in
+ utf8 */
I32 recurse_count; /* Number of recurse regops we have generated */
regnode **recurse; /* Recurse regops */
@@ -242,7 +242,7 @@ struct RExC_state_t {
I32 in_multi_char_class;
int code_index; /* next code_blocks[] slot */
struct reg_code_blocks *code_blocks;/* positions of literal (?{})
- within pattern */
+ within pattern */
SSize_t maxlen; /* mininum possible number of chars in string to match */
scan_frame *frame_head;
scan_frame *frame_last;
@@ -360,8 +360,9 @@ struct RExC_state_t {
if (RExC_naughty < TOO_NAUGHTY) \
RExC_naughty += RExC_naughty / (exp) + (add)
-#define ISMULT1(c) ((c) == '*' || (c) == '+' || (c) == '?')
-#define ISMULT2(s) (ISMULT1(*s) || ((*s) == '{' && regcurly(s)))
+#define isNON_BRACE_QUANTIFIER(c) ((c) == '*' || (c) == '+' || (c) == '?')
+#define isQUANTIFIER(s,e) ( isNON_BRACE_QUANTIFIER(*s) \
+ || ((*s) == '{' && regcurly(s)))
/*
* Flags to be passed up and down.
@@ -801,23 +802,23 @@ static const scan_data_t zero_scan_data = {
#define _FAIL(code) STMT_START { \
const char *ellipses = ""; \
IV len = RExC_precomp_end - RExC_precomp; \
- \
+ \
PREPARE_TO_DIE; \
if (len > RegexLengthToShowInErrorMessages) { \
- /* chop 10 shorter than the max, to ensure meaning of "..." */ \
- len = RegexLengthToShowInErrorMessages - 10; \
- ellipses = "..."; \
+ /* chop 10 shorter than the max, to ensure meaning of "..." */ \
+ len = RegexLengthToShowInErrorMessages - 10; \
+ ellipses = "..."; \
} \
code; \
} STMT_END
#define FAIL(msg) _FAIL( \
Perl_croak(aTHX_ "%s in regex m/%" UTF8f "%s/", \
- msg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
+ msg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
#define FAIL2(msg,arg) _FAIL( \
Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/", \
- arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
+ arg, UTF8fARG(UTF, len, RExC_precomp), ellipses))
#define FAIL3(msg,arg1,arg2) _FAIL( \
Perl_croak(aTHX_ msg " in regex m/%" UTF8f "%s/", \
@@ -828,7 +829,7 @@ static const scan_data_t zero_scan_data = {
*/
#define Simple_vFAIL(m) STMT_START { \
Perl_croak(aTHX_ "%s" REPORT_LOCATION, \
- m, REPORT_LOCATION_ARGS(RExC_parse)); \
+ m, REPORT_LOCATION_ARGS(RExC_parse)); \
} STMT_END
/*
@@ -861,7 +862,7 @@ static const scan_data_t zero_scan_data = {
*/
#define Simple_vFAIL3(m, a1, a2) STMT_START { \
S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, \
- REPORT_LOCATION_ARGS(RExC_parse)); \
+ REPORT_LOCATION_ARGS(RExC_parse)); \
} STMT_END
/*
@@ -877,7 +878,7 @@ static const scan_data_t zero_scan_data = {
*/
#define Simple_vFAIL4(m, a1, a2, a3) STMT_START { \
S_re_croak(aTHX_ UTF, m REPORT_LOCATION, a1, a2, a3, \
- REPORT_LOCATION_ARGS(RExC_parse)); \
+ REPORT_LOCATION_ARGS(RExC_parse)); \
} STMT_END
#define vFAIL4(m,a1,a2,a3) STMT_START { \
@@ -968,7 +969,7 @@ static const scan_data_t zero_scan_data = {
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), \
m REPORT_LOCATION, \
- REPORT_LOCATION_ARGS(loc)))
+ REPORT_LOCATION_ARGS(loc)))
#define vWARN(loc, m) \
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
@@ -980,26 +981,26 @@ static const scan_data_t zero_scan_data = {
_WARN_HELPER(loc, packWARN(WARN_DEPRECATED), \
Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), \
m REPORT_LOCATION, \
- REPORT_LOCATION_ARGS(loc)))
+ REPORT_LOCATION_ARGS(loc)))
#define ckWARNdep(loc,m) \
_WARN_HELPER(loc, packWARN(WARN_DEPRECATED), \
Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \
- m REPORT_LOCATION, \
- REPORT_LOCATION_ARGS(loc)))
+ m REPORT_LOCATION, \
+ REPORT_LOCATION_ARGS(loc)))
#define ckWARNregdep(loc,m) \
_WARN_HELPER(loc, packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, \
WARN_REGEXP), \
- m REPORT_LOCATION, \
- REPORT_LOCATION_ARGS(loc)))
+ m REPORT_LOCATION, \
+ REPORT_LOCATION_ARGS(loc)))
#define ckWARN2reg_d(loc,m, a1) \
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP), \
- m REPORT_LOCATION, \
- a1, REPORT_LOCATION_ARGS(loc)))
+ m REPORT_LOCATION, \
+ a1, REPORT_LOCATION_ARGS(loc)))
#define ckWARN2reg(loc, m, a1) \
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
@@ -1011,34 +1012,34 @@ static const scan_data_t zero_scan_data = {
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
Perl_warner(aTHX_ packWARN(WARN_REGEXP), \
m REPORT_LOCATION, \
- a1, a2, REPORT_LOCATION_ARGS(loc)))
+ a1, a2, REPORT_LOCATION_ARGS(loc)))
#define ckWARN3reg(loc, m, a1, a2) \
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), \
m REPORT_LOCATION, \
- a1, a2, \
+ a1, a2, \
REPORT_LOCATION_ARGS(loc)))
#define vWARN4(loc, m, a1, a2, a3) \
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
Perl_warner(aTHX_ packWARN(WARN_REGEXP), \
m REPORT_LOCATION, \
- a1, a2, a3, \
+ a1, a2, a3, \
REPORT_LOCATION_ARGS(loc)))
#define ckWARN4reg(loc, m, a1, a2, a3) \
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), \
m REPORT_LOCATION, \
- a1, a2, a3, \
+ a1, a2, a3, \
REPORT_LOCATION_ARGS(loc)))
#define vWARN5(loc, m, a1, a2, a3, a4) \
_WARN_HELPER(loc, packWARN(WARN_REGEXP), \
Perl_warner(aTHX_ packWARN(WARN_REGEXP), \
m REPORT_LOCATION, \
- a1, a2, a3, a4, \
+ a1, a2, a3, a4, \
REPORT_LOCATION_ARGS(loc)))
#define ckWARNexperimental(loc, class, m) \
@@ -1080,14 +1081,14 @@ static const scan_data_t zero_scan_data = {
#define ProgLen(ri) ri->u.offsets[0]
#define SetProgLen(ri,x) ri->u.offsets[0] = x
#define Set_Node_Offset_To_R(offset,byte) STMT_START { \
- MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \
- __LINE__, (int)(offset), (int)(byte))); \
- if((offset) < 0) { \
- Perl_croak(aTHX_ "value of node is %d in Offset macro", \
+ MJD_OFFSET_DEBUG(("** (%d) offset of node %d is %d.\n", \
+ __LINE__, (int)(offset), (int)(byte))); \
+ if((offset) < 0) { \
+ Perl_croak(aTHX_ "value of node is %d in Offset macro", \
(int)(offset)); \
- } else { \
+ } else { \
RExC_offsets[2*(offset)-1] = (byte); \
- } \
+ } \
} STMT_END
#define Set_Node_Offset(node,byte) \
@@ -1095,14 +1096,14 @@ static const scan_data_t zero_scan_data = {
#define Set_Cur_Node_Offset Set_Node_Offset(RExC_emit, RExC_parse)
#define Set_Node_Length_To_R(node,len) STMT_START { \
- MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n", \
- __LINE__, (int)(node), (int)(len))); \
- if((node) < 0) { \
- Perl_croak(aTHX_ "value of node is %d in Length macro", \
+ MJD_OFFSET_DEBUG(("** (%d) size of node %d is %d.\n", \
+ __LINE__, (int)(node), (int)(len))); \
+ if((node) < 0) { \
+ Perl_croak(aTHX_ "value of node is %d in Length macro", \
(int)(node)); \
- } else { \
- RExC_offsets[2*(node)] = (len); \
- } \
+ } else { \
+ RExC_offsets[2*(node)] = (len); \
+ } \
} STMT_END
#define Set_Node_Length(node,len) \
@@ -1476,13 +1477,13 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
if ((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
const U8 i = data->cur_is_floating;
- SvSetMagicSV(longest_sv, data->last_found);
+ SvSetMagicSV(longest_sv, data->last_found);
data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
- if (!i) /* fixed */
- data->substrs[0].max_offset = data->substrs[0].min_offset;
- else { /* float */
- data->substrs[1].max_offset =
+ if (!i) /* fixed */
+ data->substrs[0].max_offset = data->substrs[0].min_offset;
+ else { /* float */
+ data->substrs[1].max_offset =
(is_inf)
? OPTIMIZE_INFTY
: (l
@@ -1490,8 +1491,8 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
/* temporary underflow guard for 5.32 */
: data->pos_delta < 0 ? OPTIMIZE_INFTY
: (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
- ? OPTIMIZE_INFTY
- : data->pos_min + data->pos_delta));
+ ? OPTIMIZE_INFTY
+ : data->pos_min + data->pos_delta));
}
data->substrs[i].flags &= ~SF_BEFORE_EOL;
@@ -1502,12 +1503,12 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
SvCUR_set(data->last_found, 0);
{
- SV * const sv = data->last_found;
- if (SvUTF8(sv) && SvMAGICAL(sv)) {
- MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
- if (mg)
- mg->mg_len = 0;
- }
+ SV * const sv = data->last_found;
+ if (SvUTF8(sv) && SvMAGICAL(sv)) {
+ MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
+ if (mg)
+ mg->mg_len = 0;
+ }
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
@@ -1596,10 +1597,10 @@ S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
* test cases for locale, many parts of it may not work properly, it is
* safest to avoid locale unless necessary. */
if (RExC_contains_locale) {
- ANYOF_POSIXL_SETALL(ssc);
+ ANYOF_POSIXL_SETALL(ssc);
}
else {
- ANYOF_POSIXL_ZERO(ssc);
+ ANYOF_POSIXL_ZERO(ssc);
}
}
@@ -2254,7 +2255,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
STATIC void
S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
- AV *revcharmap, U32 depth)
+ AV *revcharmap, U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
@@ -2268,14 +2269,14 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
depth+1, "Match","Base","Ofs" );
for( state = 0 ; state < trie->uniquecharcount ; state++ ) {
- SV ** const tmp = av_fetch( revcharmap, state, 0);
+ SV ** const tmp = av_fetch( revcharmap, state, 0);
if ( tmp ) {
Perl_re_printf( aTHX_ "%*s",
colwidth,
pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
- PL_colors[0], PL_colors[1],
- (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
- PERL_PV_ESCAPE_FIRSTCHAR
+ PL_colors[0], PL_colors[1],
+ (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
+ PERL_PV_ESCAPE_FIRSTCHAR
)
);
}
@@ -2288,7 +2289,7 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
Perl_re_printf( aTHX_ "\n");
for( state = 1 ; state < trie->statecount ; state++ ) {
- const U32 base = trie->states[ state ].trans.base;
+ const U32 base = trie->states[ state ].trans.base;
Perl_re_indentf( aTHX_ "#%4" UVXf "|", depth+1, (UV)state);
@@ -2335,8 +2336,8 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
depth);
for (word=1; word <= trie->wordcount; word++) {
Perl_re_printf( aTHX_ " %d:(%d,%d)",
- (int)word, (int)(trie->wordinfo[word].prev),
- (int)(trie->wordinfo[word].len));
+ (int)word, (int)(trie->wordinfo[word].prev),
+ (int)(trie->wordinfo[word].len));
}
Perl_re_printf( aTHX_ "\n" );
}
@@ -2348,8 +2349,8 @@ S_dump_trie(pTHX_ const struct _reg_trie_data *trie, HV *widecharmap,
*/
STATIC void
S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
- HV *widecharmap, AV *revcharmap, U32 next_alloc,
- U32 depth)
+ HV *widecharmap, AV *revcharmap, U32 next_alloc,
+ U32 depth)
{
U32 state;
SV *sv=sv_newmortal();
@@ -2377,9 +2378,9 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
);
}
for( charid = 1 ; charid <= TRIE_LIST_USED( state ) ; charid++ ) {
- SV ** const tmp = av_fetch( revcharmap,
+ SV ** const tmp = av_fetch( revcharmap,
TRIE_LIST_ITEM(state, charid).forid, 0);
- if ( tmp ) {
+ if ( tmp ) {
Perl_re_printf( aTHX_ "%*s:%3X=%4" UVXf " | ",
colwidth,
pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp),
@@ -2408,8 +2409,8 @@ S_dump_trie_interim_list(pTHX_ const struct _reg_trie_data *trie,
*/
STATIC void
S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
- HV *widecharmap, AV *revcharmap, U32 next_alloc,
- U32 depth)
+ HV *widecharmap, AV *revcharmap, U32 next_alloc,
+ U32 depth)
{
U32 state;
U16 charid;
@@ -2427,14 +2428,14 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
Perl_re_indentf( aTHX_ "Char : ", depth+1 );
for( charid = 0 ; charid < trie->uniquecharcount ; charid++ ) {
- SV ** const tmp = av_fetch( revcharmap, charid, 0);
+ SV ** const tmp = av_fetch( revcharmap, charid, 0);
if ( tmp ) {
Perl_re_printf( aTHX_ "%*s",
colwidth,
pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), colwidth,
- PL_colors[0], PL_colors[1],
- (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
- PERL_PV_ESCAPE_FIRSTCHAR
+ PL_colors[0], PL_colors[1],
+ (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
+ PERL_PV_ESCAPE_FIRSTCHAR
)
);
}
@@ -2479,9 +2480,9 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
/* make_trie(startbranch,first,last,tail,word_count,flags,depth)
startbranch: the first branch in the whole branch sequence
first : start branch of sequence of branch-exact nodes.
- May be the same as startbranch
+ May be the same as startbranch
last : Thing following the last branch.
- May be the same as tail.
+ May be the same as tail.
tail : item following the branch sequence
count : words in the sequence
flags : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS|L|FLU8)/
@@ -2557,10 +2558,10 @@ and should turn into:
1: CURLYM[1] {1,32767}(18)
5: TRIE(16)
- [Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
- <ac>
- <ad>
- <ab>
+ [Words:3 Chars Stored:6 Unique Chars:4 States:5 NCP:1]
+ <ac>
+ <ad>
+ <ab>
16: SUCCEED(0)
17: NOTHING(18)
18: END(0)
@@ -2580,8 +2581,8 @@ and would end up looking like:
1: TRIE(8)
[Words:2 Chars Stored:6 Unique Chars:5 States:7 NCP:1]
- <foo>
- <bar>
+ <foo>
+ <bar>
7: TAIL(8)
8: EXACT <baz>(10)
10: END(0)
@@ -2595,19 +2596,19 @@ is the recommended Unicode-aware way of saying
#define TRIE_STORE_REVCHAR(val) \
STMT_START { \
- if (UTF) { \
+ if (UTF) { \
SV *zlopp = newSV(UTF8_MAXBYTES); \
- unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp); \
+ unsigned char *flrbbbbb = (unsigned char *) SvPVX(zlopp); \
unsigned char *const kapow = uvchr_to_utf8(flrbbbbb, val); \
*kapow = '\0'; \
- SvCUR_set(zlopp, kapow - flrbbbbb); \
- SvPOK_on(zlopp); \
- SvUTF8_on(zlopp); \
- av_push(revcharmap, zlopp); \
- } else { \
+ SvCUR_set(zlopp, kapow - flrbbbbb); \
+ SvPOK_on(zlopp); \
+ SvUTF8_on(zlopp); \
+ av_push(revcharmap, zlopp); \
+ } else { \
char ooooff = (char)val; \
- av_push(revcharmap, newSVpvn(&ooooff, 1)); \
- } \
+ av_push(revcharmap, newSVpvn(&ooooff, 1)); \
+ } \
} STMT_END
/* This gets the next character from the input, folding it if not already
@@ -2638,8 +2639,8 @@ is the recommended Unicode-aware way of saying
#define TRIE_LIST_PUSH(state,fid,ns) STMT_START { \
if ( TRIE_LIST_CUR( state ) >=TRIE_LIST_LEN( state ) ) { \
- U32 ging = TRIE_LIST_LEN( state ) * 2; \
- Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
+ U32 ging = TRIE_LIST_LEN( state ) * 2; \
+ Renew( trie->states[ state ].trans.list, ging, reg_trie_trans_le ); \
TRIE_LIST_LEN( state ) = ging; \
} \
TRIE_LIST_ITEM( state, TRIE_LIST_CUR( state ) ).forid = fid; \
@@ -2649,7 +2650,7 @@ is the recommended Unicode-aware way of saying
#define TRIE_LIST_NEW(state) STMT_START { \
Newx( trie->states[ state ].trans.list, \
- 4, reg_trie_trans_le ); \
+ 4, reg_trie_trans_le ); \
TRIE_LIST_CUR( state ) = 1; \
TRIE_LIST_LEN( state ) = 4; \
} STMT_END
@@ -2688,8 +2689,8 @@ is the recommended Unicode-aware way of saying
/* It's a dupe. Pre-insert into the wordinfo[].prev */\
/* chain, so that when the bits of chain are later */\
/* linked together, the dups appear in the chain */\
- trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \
- trie->wordinfo[dupe].prev = curword; \
+ trie->wordinfo[curword].prev = trie->wordinfo[dupe].prev; \
+ trie->wordinfo[dupe].prev = curword; \
} else { \
/* we haven't inserted this word yet. */ \
trie->states[ state ].wordnum = curword; \
@@ -2769,11 +2770,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
switch (flags) {
case EXACT: case EXACT_REQ8: case EXACTL: break;
- case EXACTFAA:
+ case EXACTFAA:
case EXACTFUP:
- case EXACTFU:
- case EXACTFLU8: folder = PL_fold_latin1; break;
- case EXACTF: folder = PL_fold; break;
+ case EXACTFU:
+ case EXACTFLU8: folder = PL_fold_latin1; break;
+ case EXACTF: folder = PL_fold; break;
default: Perl_croak( aTHX_ "panic! In trie construction, unknown node type %u %s", (unsigned) flags, PL_reg_name[flags] );
}
@@ -2784,7 +2785,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
RExC_rxi->data->data[ data_slot ] = (void*)trie;
trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
if (flags == EXACT || flags == EXACT_REQ8 || flags == EXACTL)
- trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
+ trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc(
trie->wordcount+1, sizeof(reg_trie_wordinfo));
@@ -2964,8 +2965,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
TRIE_STORE_REVCHAR( uvc );
}
if ( set_bit ) {
- /* store the codepoint in the bitmap, and its folded
- * equivalent. */
+ /* store the codepoint in the bitmap, and its folded
+ * equivalent. */
TRIE_BITMAP_SET_FOLDED(trie, uvc, folder);
set_bit = 0; /* We've done our bit :-) */
}
@@ -3010,8 +3011,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
"TRIE(%s): W:%d C:%d Uq:%d Min:%d Max:%d\n",
depth+1,
( widecharmap ? "UTF8" : "NATIVE" ), (int)word_count,
- (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
- (int)trie->minlen, (int)trie->maxlen )
+ (int)TRIE_CHARCOUNT(trie), trie->uniquecharcount,
+ (int)trie->minlen, (int)trie->maxlen )
);
/*
@@ -3059,17 +3060,17 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_ "Compiling trie using list compiler\n",
depth+1));
- trie->states = (reg_trie_state *)
- PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
- sizeof(reg_trie_state) );
+ trie->states = (reg_trie_state *)
+ PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
+ sizeof(reg_trie_state) );
TRIE_LIST_NEW(1);
next_alloc = 2;
for ( cur = first ; cur < last ; cur = regnext( cur ) ) {
regnode *noper = NEXTOPER( cur );
- U32 state = 1; /* required init */
- U16 charid = 0; /* sanity init */
+ U32 state = 1; /* required init */
+ U16 charid = 0; /* sanity init */
U32 wordlen = 0; /* required init */
if (OP(noper) == NOTHING) {
@@ -3096,7 +3097,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
if ( uvc < 256 ) {
charid = trie->charmap[ uvc ];
- } else {
+ } else {
SV** const svpp = hv_fetch( widecharmap,
(char*)&uvc,
sizeof( UV ),
@@ -3106,7 +3107,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
} else {
charid=(U16)SvIV( *svpp );
}
- }
+ }
/* charid is now 0 if we dont know the char read, or
* nonzero if we do */
if ( charid ) {
@@ -3117,7 +3118,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
charid--;
if ( !trie->states[ state ].trans.list ) {
TRIE_LIST_NEW( state );
- }
+ }
for ( check = 1;
check <= TRIE_LIST_USED( state );
check++ )
@@ -3131,15 +3132,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
}
if ( ! newstate ) {
newstate = next_alloc++;
- prev_states[newstate] = state;
+ prev_states[newstate] = state;
TRIE_LIST_PUSH( state, charid, newstate );
transcount++;
}
state = newstate;
} else {
Perl_croak( aTHX_ "panic! In trie construction, no char mapping for %" IVdf, uvc );
- }
- }
+ }
+ }
} else {
/* If we end up here it is because we skipped past a NOTHING, but did not end up
* on a trieable type. So we need to reset noper back to point at the first regop
@@ -3154,18 +3155,18 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
/* next alloc is the NEXT state to be allocated */
trie->statecount = next_alloc;
trie->states = (reg_trie_state *)
- PerlMemShared_realloc( trie->states,
- next_alloc
- * sizeof(reg_trie_state) );
+ PerlMemShared_realloc( trie->states,
+ next_alloc
+ * sizeof(reg_trie_state) );
/* and now dump it out before we compress it */
DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_list(trie, widecharmap,
- revcharmap, next_alloc,
- depth+1)
+ revcharmap, next_alloc,
+ depth+1)
);
trie->trans = (reg_trie_trans *)
- PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
+ PerlMemShared_calloc( transcount, sizeof(reg_trie_trans) );
{
U32 state;
U32 tp = 0;
@@ -3184,22 +3185,22 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
if (trie->states[state].trans.list) {
U16 minid=TRIE_LIST_ITEM( state, 1).forid;
U16 maxid=minid;
- U16 idx;
+ U16 idx;
for( idx = 2 ; idx <= TRIE_LIST_USED( state ) ; idx++ ) {
- const U16 forid = TRIE_LIST_ITEM( state, idx).forid;
- if ( forid < minid ) {
- minid=forid;
- } else if ( forid > maxid ) {
- maxid=forid;
- }
+ const U16 forid = TRIE_LIST_ITEM( state, idx).forid;
+ if ( forid < minid ) {
+ minid=forid;
+ } else if ( forid > maxid ) {
+ maxid=forid;
+ }
}
if ( transcount < tp + maxid - minid + 1) {
transcount *= 2;
- trie->trans = (reg_trie_trans *)
- PerlMemShared_realloc( trie->trans,
- transcount
- * sizeof(reg_trie_trans) );
+ trie->trans = (reg_trie_trans *)
+ PerlMemShared_realloc( trie->trans,
+ transcount
+ * sizeof(reg_trie_trans) );
Zero( trie->trans + (transcount / 2),
transcount / 2,
reg_trie_trans );
@@ -3285,13 +3286,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
DEBUG_TRIE_COMPILE_MORE_r( Perl_re_indentf( aTHX_ "Compiling trie using table compiler\n",
depth+1));
- trie->trans = (reg_trie_trans *)
- PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
- * trie->uniquecharcount + 1,
- sizeof(reg_trie_trans) );
+ trie->trans = (reg_trie_trans *)
+ PerlMemShared_calloc( ( TRIE_CHARCOUNT(trie) + 1 )
+ * trie->uniquecharcount + 1,
+ sizeof(reg_trie_trans) );
trie->states = (reg_trie_state *)
- PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
- sizeof(reg_trie_state) );
+ PerlMemShared_calloc( TRIE_CHARCOUNT(trie) + 2,
+ sizeof(reg_trie_state) );
next_alloc = trie->uniquecharcount + 1;
@@ -3342,8 +3343,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
if ( !trie->trans[ state + charid ].next ) {
trie->trans[ state + charid ].next = next_alloc;
trie->trans[ state ].check++;
- prev_states[TRIE_NODENUM(next_alloc)]
- = TRIE_NODENUM(state);
+ prev_states[TRIE_NODENUM(next_alloc)]
+ = TRIE_NODENUM(state);
next_alloc += trie->uniquecharcount;
}
state = trie->trans[ state + charid ].next;
@@ -3367,8 +3368,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
/* and now dump it out before we compress it */
DEBUG_TRIE_COMPILE_MORE_r(dump_trie_interim_table(trie, widecharmap,
- revcharmap,
- next_alloc, depth+1));
+ revcharmap,
+ next_alloc, depth+1));
{
/*
@@ -3433,15 +3434,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
demq
*/
const U32 laststate = TRIE_NODENUM( next_alloc );
- U32 state, charid;
+ U32 state, charid;
U32 pos = 0, zp=0;
trie->statecount = laststate;
for ( state = 1 ; state < laststate ; state++ ) {
U8 flag = 0;
- const U32 stateidx = TRIE_NODEIDX( state );
- const U32 o_used = trie->trans[ stateidx ].check;
- U32 used = trie->trans[ stateidx ].check;
+ const U32 stateidx = TRIE_NODEIDX( state );
+ const U32 o_used = trie->trans[ stateidx ].check;
+ U32 used = trie->trans[ stateidx ].check;
trie->trans[ stateidx ].check = 0;
for ( charid = 0;
@@ -3484,8 +3485,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
}
trie->lasttrans = pos + 1;
trie->states = (reg_trie_state *)
- PerlMemShared_realloc( trie->states, laststate
- * sizeof(reg_trie_state) );
+ PerlMemShared_realloc( trie->states, laststate
+ * sizeof(reg_trie_state) );
DEBUG_TRIE_COMPILE_MORE_r(
Perl_re_indentf( aTHX_ "Alloc: %d Orig: %" IVdf " elements, Final:%" IVdf ". Savings of %%%5.2f\n",
depth+1,
@@ -3506,8 +3507,8 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
);
/* resize the trans array to remove unused space */
trie->trans = (reg_trie_trans *)
- PerlMemShared_realloc( trie->trans, trie->lasttrans
- * sizeof(reg_trie_trans) );
+ PerlMemShared_realloc( trie->trans, trie->lasttrans
+ * sizeof(reg_trie_trans) );
{ /* Modify the program and insert the new TRIE node */
U8 nodetype =(U8)(flags & 0xFF);
@@ -3602,20 +3603,20 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
(UV)state));
if (first_ofs >= 0) {
SV ** const tmp = av_fetch( revcharmap, first_ofs, 0);
- const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
+ const U8 * const ch = (U8*)SvPV_nolen_const( *tmp );
TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
DEBUG_OPTIMISE_r(
Perl_re_printf( aTHX_ "%s", (char*)ch)
);
- }
- }
+ }
+ }
/* store the current firstchar in the bitmap */
TRIE_BITMAP_SET_FOLDED(trie,*ch, folder);
DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "%s", ch));
- }
+ }
first_ofs = ofs;
- }
+ }
}
if ( count == 1 ) {
/* This state has only one transition, its transition is part
@@ -3630,9 +3631,9 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
depth+1,
(UV)state, (UV)first_ofs,
pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 6,
- PL_colors[0], PL_colors[1],
- (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
- PERL_PV_ESCAPE_FIRSTCHAR
+ PL_colors[0], PL_colors[1],
+ (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0) |
+ PERL_PV_ESCAPE_FIRSTCHAR
)
);
});
@@ -3645,15 +3646,15 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
setSTR_LEN(convert, (U8)(STR_LEN(convert) + len));
while (len--)
*str++ = *ch++;
- } else {
+ } else {
#ifdef DEBUGGING
- if (state>1)
+ if (state>1)
DEBUG_OPTIMISE_r(Perl_re_printf( aTHX_ "]\n"));
#endif
- break;
- }
- }
- trie->prefixlen = (state-1);
+ break;
+ }
+ }
+ trie->prefixlen = (state-1);
if (str) {
regnode *n = convert+NODE_SZ_STR(convert);
assert( NODE_SZ_STR(convert) <= U16_MAX );
@@ -3694,7 +3695,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
#endif
if (trie->maxlen) {
convert = n;
- } else {
+ } else {
NEXT_OFF(convert) = (U16)(tail - convert);
DEBUG_r(optimize= n);
}
@@ -3703,23 +3704,23 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
if (!jumper)
jumper = last;
if ( trie->maxlen ) {
- NEXT_OFF( convert ) = (U16)(tail - convert);
- ARG_SET( convert, data_slot );
- /* Store the offset to the first unabsorbed branch in
- jump[0], which is otherwise unused by the jump logic.
- We use this when dumping a trie and during optimisation. */
- if (trie->jump)
- trie->jump[0] = (U16)(nextbranch - convert);
+ NEXT_OFF( convert ) = (U16)(tail - convert);
+ ARG_SET( convert, data_slot );
+ /* Store the offset to the first unabsorbed branch in
+ jump[0], which is otherwise unused by the jump logic.
+ We use this when dumping a trie and during optimisation. */
+ if (trie->jump)
+ trie->jump[0] = (U16)(nextbranch - convert);
/* If the start state is not accepting (meaning there is no empty string/NOTHING)
- * and there is a bitmap
- * and the first "jump target" node we found leaves enough room
- * then convert the TRIE node into a TRIEC node, with the bitmap
- * embedded inline in the opcode - this is hypothetically faster.
- */
+ * and there is a bitmap
+ * and the first "jump target" node we found leaves enough room
+ * then convert the TRIE node into a TRIEC node, with the bitmap
+ * embedded inline in the opcode - this is hypothetically faster.
+ */
if ( !trie->states[trie->startstate].wordnum
- && trie->bitmap
- && ( (char *)jumper - (char *)convert) >= (int)sizeof(struct regnode_charclass) )
+ && trie->bitmap
+ && ( (char *)jumper - (char *)convert) >= (int)sizeof(struct regnode_charclass) )
{
OP( convert ) = TRIEC;
Copy(trie->bitmap, ((struct regnode_charclass *)convert)->bitmap, ANYOF_BITMAP_SIZE, char);
@@ -3768,26 +3769,26 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
* already linked up earlier.
*/
{
- U16 word;
- U32 state;
- U16 prev;
-
- for (word=1; word <= trie->wordcount; word++) {
- prev = 0;
- if (trie->wordinfo[word].prev)
- continue;
- state = trie->wordinfo[word].accept;
- while (state) {
- state = prev_states[state];
- if (!state)
- break;
- prev = trie->states[state].wordnum;
- if (prev)
- break;
- }
- trie->wordinfo[word].prev = prev;
- }
- Safefree(prev_states);
+ U16 word;
+ U32 state;
+ U16 prev;
+
+ for (word=1; word <= trie->wordcount; word++) {
+ prev = 0;
+ if (trie->wordinfo[word].prev)
+ continue;
+ state = trie->wordinfo[word].accept;
+ while (state) {
+ state = prev_states[state];
+ if (!state)
+ break;
+ prev = trie->states[state].wordnum;
+ if (prev)
+ break;
+ }
+ trie->wordinfo[word].prev = prev;
+ }
+ Safefree(prev_states);
}
@@ -3884,20 +3885,20 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
fail[ 0 ] = fail[ 1 ] = 1;
for ( charid = 0; charid < ucharcount ; charid++ ) {
- const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 );
- if ( newstate ) {
+ const U32 newstate = TRIE_TRANS_STATE( 1, base, ucharcount, charid, 0 );
+ if ( newstate ) {
q[ q_write ] = newstate;
/* set to point at the root */
fail[ q[ q_write++ ] ]=1;
}
}
while ( q_read < q_write) {
- const U32 cur = q[ q_read++ % numstates ];
+ const U32 cur = q[ q_read++ % numstates ];
base = trie->states[ cur ].trans.base;
for ( charid = 0 ; charid < ucharcount ; charid++ ) {
- const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 );
- if (ch_state) {
+ const U32 ch_state = TRIE_TRANS_STATE( cur, base, ucharcount, charid, 1 );
+ if (ch_state) {
U32 fail_state = cur;
U32 fail_base;
do {
@@ -4259,16 +4260,16 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
}
#ifdef EXPERIMENTAL_INPLACESCAN
- if (flags && !NEXT_OFF(n)) {
- DEBUG_PEEP("atch", val, depth, 0);
- if (reg_off_by_arg[OP(n)]) {
- ARG_SET(n, val - n);
- }
- else {
- NEXT_OFF(n) = val - n;
- }
- stopnow = 1;
- }
+ if (flags && !NEXT_OFF(n)) {
+ DEBUG_PEEP("atch", val, depth, 0);
+ if (reg_off_by_arg[OP(n)]) {
+ ARG_SET(n, val - n);
+ }
+ else {
+ NEXT_OFF(n) = val - n;
+ }
+ stopnow = 1;
+ }
#endif
}
@@ -4294,11 +4295,11 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
int total_count_delta = 0; /* Total delta number of characters that
multi-char folds expand to */
- /* One pass is made over the node's string looking for all the
- * possibilities. To avoid some tests in the loop, there are two main
- * cases, for UTF-8 patterns (which can't have EXACTF nodes) and
- * non-UTF-8 */
- if (UTF) {
+ /* One pass is made over the node's string looking for all the
+ * possibilities. To avoid some tests in the loop, there are two main
+ * cases, for UTF-8 patterns (which can't have EXACTF nodes) and
+ * non-UTF-8 */
+ if (UTF) {
U8* folded = NULL;
if (OP(scan) == EXACTFL) {
@@ -4355,7 +4356,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
* executed */
while (s < s_end - 1) /* Can stop 1 before the end, as minimum
length sequence we are looking for is 2 */
- {
+ {
int count = 0; /* How many characters in a multi-char fold */
int len = is_MULTI_CHAR_FOLD_utf8_safe(s, s_end);
if (! len) { /* Not a multi-char fold: get next char */
@@ -4391,7 +4392,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
* the character that folds to the sequence is) */
total_count_delta += count - 1;
next_iteration: ;
- }
+ }
/* We created a temporary folded copy of the string in EXACTFL
* nodes. Therefore we need to be sure it doesn't go below zero,
@@ -4406,8 +4407,8 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
*min_subtract += total_count_delta;
Safefree(folded);
- }
- else if (OP(scan) == EXACTFAA) {
+ }
+ else if (OP(scan) == EXACTFAA) {
/* Non-UTF-8 pattern, EXACTFAA node. There can't be a multi-char
* fold to the ASCII range (and there are no existing ones in the
@@ -4418,7 +4419,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
#if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \
|| (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \
|| UNICODE_DOT_DOT_VERSION > 0)
- while (s < s_end) {
+ while (s < s_end) {
if (*s == LATIN_SMALL_LETTER_SHARP_S) {
OP(scan) = EXACTFAA_NO_TRIE;
*unfolded_multi_char = TRUE;
@@ -4427,7 +4428,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
s++;
}
}
- else if (OP(scan) != EXACTFAA_NO_TRIE) {
+ else if (OP(scan) != EXACTFAA_NO_TRIE) {
/* Non-UTF-8 pattern, not EXACTFAA node. Look for the multi-char
* folds that are all Latin1. As explained in the comments
@@ -4435,11 +4436,11 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
* and EXACTFL nodes; it can be in the final position. Otherwise
* we can stop looking 1 byte earlier because have to find at least
* two characters for a multi-fold */
- const U8* upper = (OP(scan) == EXACTF || OP(scan) == EXACTFL)
+ const U8* upper = (OP(scan) == EXACTF || OP(scan) == EXACTFL)
? s_end
: s_end -1;
- while (s < upper) {
+ while (s < upper) {
int len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end);
if (! len) { /* Not a multi-char fold. */
if (*s == LATIN_SMALL_LETTER_SHARP_S
@@ -4465,13 +4466,13 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
if (OP(scan) != EXACTF && OP(scan) != EXACTFL) {
OP(scan) = EXACTFUP;
}
- }
+ }
*min_subtract += len - 1;
s += len;
- }
+ }
#endif
- }
+ }
}
#ifdef DEBUGGING
@@ -4479,9 +4480,9 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
* ops and/or strings with fake optimized ops */
n = scan + NODE_SZ_STR(scan);
while (n <= stop) {
- OP(n) = OPTIMIZED;
- FLAGS(n) = 0;
- NEXT_OFF(n) = 0;
+ OP(n) = OPTIMIZED;
+ FLAGS(n) = 0;
+ NEXT_OFF(n) = 0;
n++;
}
#endif
@@ -4552,19 +4553,19 @@ S_rck_elide_nothing(pTHX_ regnode *node)
STATIC SSize_t
S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
SSize_t *minlenp, SSize_t *deltap,
- regnode *last,
- scan_data_t *data,
- I32 stopparen,
+ regnode *last,
+ scan_data_t *data,
+ I32 stopparen,
U32 recursed_depth,
- regnode_ssc *and_withp,
- U32 flags, U32 depth, bool was_mutate_ok)
- /* scanp: Start here (read-write). */
- /* deltap: Write maxlen-minlen here. */
- /* last: Stop before this one. */
- /* data: string data about the pattern */
- /* stopparen: treat close N as END */
- /* recursed: which subroutines have we recursed into */
- /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
+ regnode_ssc *and_withp,
+ U32 flags, U32 depth, bool was_mutate_ok)
+ /* scanp: Start here (read-write). */
+ /* deltap: Write maxlen-minlen here. */
+ /* last: Stop before this one. */
+ /* data: string data about the pattern */
+ /* stopparen: treat close N as END */
+ /* recursed: which subroutines have we recursed into */
+ /* and_withp: Valid if flags & SCF_DO_STCLASS_OR */
{
SSize_t final_minlen;
/* There must be at least this number of characters to match */
@@ -4627,12 +4628,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
UV min_subtract = 0; /* How mmany chars to subtract from the minimum
node length to get a real minimum (because
the folded version may be shorter) */
- bool unfolded_multi_char = FALSE;
+ bool unfolded_multi_char = FALSE;
/* avoid mutating ops if we are anywhere within the recursed or
* enframed handling for a GOSUB: the outermost level will handle it.
*/
bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
- /* Peephole optimizer: */
+ /* Peephole optimizer: */
DEBUG_STUDYDATA("Peep", data, depth, is_inf);
DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4690,21 +4691,21 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
OP(scan) == BRANCHJ ||
OP(scan) == IFTHEN
) {
- next = regnext(scan);
- code = OP(scan);
+ next = regnext(scan);
+ code = OP(scan);
/* The op(next)==code check below is to see if we
* have "BRANCH-BRANCH", "BRANCHJ-BRANCHJ", "IFTHEN-IFTHEN"
* IFTHEN is special as it might not appear in pairs.
* Not sure whether BRANCH-BRANCHJ is possible, regardless
* we dont handle it cleanly. */
- if (OP(next) == code || code == IFTHEN) {
+ if (OP(next) == code || code == IFTHEN) {
/* NOTE - There is similar code to this block below for
* handling TRIE nodes on a re-study. If you change stuff here
* check there too. */
- SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0;
- regnode_ssc accum;
- regnode * const startbranch=scan;
+ SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0;
+ regnode_ssc accum;
+ regnode * const startbranch=scan;
if (flags & SCF_DO_SUBSTR) {
/* Cannot merge strings after this. */
@@ -4712,164 +4713,164 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
if (flags & SCF_DO_STCLASS)
- ssc_init_zero(pRExC_state, &accum);
+ ssc_init_zero(pRExC_state, &accum);
- while (OP(scan) == code) {
- SSize_t deltanext, minnext, fake;
- I32 f = 0;
- regnode_ssc this_class;
+ while (OP(scan) == code) {
+ SSize_t deltanext, minnext, fake;
+ I32 f = 0;
+ regnode_ssc this_class;
DEBUG_PEEP("Branch", scan, depth, flags);
- num++;
+ num++;
StructCopy(&zero_scan_data, &data_fake, scan_data_t);
- if (data) {
- data_fake.whilem_c = data->whilem_c;
- data_fake.last_closep = data->last_closep;
- }
- else
- data_fake.last_closep = &fake;
+ if (data) {
+ data_fake.whilem_c = data->whilem_c;
+ data_fake.last_closep = data->last_closep;
+ }
+ else
+ data_fake.last_closep = &fake;
- data_fake.pos_delta = delta;
- next = regnext(scan);
+ data_fake.pos_delta = delta;
+ next = regnext(scan);
scan = NEXTOPER(scan); /* everything */
if (code != BRANCH) /* everything but BRANCH */
- scan = NEXTOPER(scan);
+ scan = NEXTOPER(scan);
- if (flags & SCF_DO_STCLASS) {
- ssc_init(pRExC_state, &this_class);
- data_fake.start_class = &this_class;
- f = SCF_DO_STCLASS_AND;
- }
- if (flags & SCF_WHILEM_VISITED_POS)
- f |= SCF_WHILEM_VISITED_POS;
+ if (flags & SCF_DO_STCLASS) {
+ ssc_init(pRExC_state, &this_class);
+ data_fake.start_class = &this_class;
+ f = SCF_DO_STCLASS_AND;
+ }
+ if (flags & SCF_WHILEM_VISITED_POS)
+ f |= SCF_WHILEM_VISITED_POS;
- /* we suppose the run is continuous, last=next...*/
+ /* we suppose the run is continuous, last=next...*/
/* recurse study_chunk() for each BRANCH in an alternation */
- minnext = study_chunk(pRExC_state, &scan, minlenp,
+ minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, next, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1,
mutate_ok);
- if (min1 > minnext)
- min1 = minnext;
- if (deltanext == OPTIMIZE_INFTY) {
- is_inf = is_inf_internal = 1;
- max1 = OPTIMIZE_INFTY;
- } else if (max1 < minnext + deltanext)
- max1 = minnext + deltanext;
- scan = next;
- if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
- pars++;
- if (data_fake.flags & SCF_SEEN_ACCEPT) {
- if ( stopmin > minnext)
- stopmin = min + min1;
- flags &= ~SCF_DO_SUBSTR;
- if (data)
- data->flags |= SCF_SEEN_ACCEPT;
- }
- if (data) {
- if (data_fake.flags & SF_HAS_EVAL)
- data->flags |= SF_HAS_EVAL;
- data->whilem_c = data_fake.whilem_c;
- }
- if (flags & SCF_DO_STCLASS)
- ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
- }
- if (code == IFTHEN && num < 2) /* Empty ELSE branch */
- min1 = 0;
- if (flags & SCF_DO_SUBSTR) {
- data->pos_min += min1;
- if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1))
- data->pos_delta = OPTIMIZE_INFTY;
- else
- data->pos_delta += max1 - min1;
- if (max1 != min1 || is_inf)
- data->cur_is_floating = 1;
- }
- min += min1;
- if (delta == OPTIMIZE_INFTY
- || OPTIMIZE_INFTY - delta - (max1 - min1) < 0)
- delta = OPTIMIZE_INFTY;
- else
- delta += max1 - min1;
- if (flags & SCF_DO_STCLASS_OR) {
- ssc_or(pRExC_state, data->start_class, (regnode_charclass*) &accum);
- if (min1) {
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
- flags &= ~SCF_DO_STCLASS;
- }
- }
- else if (flags & SCF_DO_STCLASS_AND) {
- if (min1) {
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
- flags &= ~SCF_DO_STCLASS;
- }
- else {
- /* Switch to OR mode: cache the old value of
- * data->start_class */
- INIT_AND_WITHP;
- StructCopy(data->start_class, and_withp, regnode_ssc);
- flags &= ~SCF_DO_STCLASS_AND;
- StructCopy(&accum, data->start_class, regnode_ssc);
- flags |= SCF_DO_STCLASS_OR;
- }
- }
+ if (min1 > minnext)
+ min1 = minnext;
+ if (deltanext == OPTIMIZE_INFTY) {
+ is_inf = is_inf_internal = 1;
+ max1 = OPTIMIZE_INFTY;
+ } else if (max1 < minnext + deltanext)
+ max1 = minnext + deltanext;
+ scan = next;
+ if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
+ pars++;
+ if (data_fake.flags & SCF_SEEN_ACCEPT) {
+ if ( stopmin > minnext)
+ stopmin = min + min1;
+ flags &= ~SCF_DO_SUBSTR;
+ if (data)
+ data->flags |= SCF_SEEN_ACCEPT;
+ }
+ if (data) {
+ if (data_fake.flags & SF_HAS_EVAL)
+ data->flags |= SF_HAS_EVAL;
+ data->whilem_c = data_fake.whilem_c;
+ }
+ if (flags & SCF_DO_STCLASS)
+ ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
+ }
+ if (code == IFTHEN && num < 2) /* Empty ELSE branch */
+ min1 = 0;
+ if (flags & SCF_DO_SUBSTR) {
+ data->pos_min += min1;
+ if (data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1))
+ data->pos_delta = OPTIMIZE_INFTY;
+ else
+ data->pos_delta += max1 - min1;
+ if (max1 != min1 || is_inf)
+ data->cur_is_floating = 1;
+ }
+ min += min1;
+ if (delta == OPTIMIZE_INFTY
+ || OPTIMIZE_INFTY - delta - (max1 - min1) < 0)
+ delta = OPTIMIZE_INFTY;
+ else
+ delta += max1 - min1;
+ if (flags & SCF_DO_STCLASS_OR) {
+ ssc_or(pRExC_state, data->start_class, (regnode_charclass*) &accum);
+ if (min1) {
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+ flags &= ~SCF_DO_STCLASS;
+ }
+ }
+ else if (flags & SCF_DO_STCLASS_AND) {
+ if (min1) {
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
+ flags &= ~SCF_DO_STCLASS;
+ }
+ else {
+ /* Switch to OR mode: cache the old value of
+ * data->start_class */
+ INIT_AND_WITHP;
+ StructCopy(data->start_class, and_withp, regnode_ssc);
+ flags &= ~SCF_DO_STCLASS_AND;
+ StructCopy(&accum, data->start_class, regnode_ssc);
+ flags |= SCF_DO_STCLASS_OR;
+ }
+ }
if (PERL_ENABLE_TRIE_OPTIMISATION
&& OP(startbranch) == BRANCH
&& mutate_ok
) {
- /* demq.
+ /* demq.
Assuming this was/is a branch we are dealing with: 'scan'
now points at the item that follows the branch sequence,
whatever it is. We now start at the beginning of the
sequence and look for subsequences of
- BRANCH->EXACT=>x1
- BRANCH->EXACT=>x2
- tail
+ BRANCH->EXACT=>x1
+ BRANCH->EXACT=>x2
+ tail
which would be constructed from a pattern like
/A|LIST|OF|WORDS/
- If we can find such a subsequence we need to turn the first
- element into a trie and then add the subsequent branch exact
- strings to the trie.
+ If we can find such a subsequence we need to turn the first
+ element into a trie and then add the subsequent branch exact
+ strings to the trie.
- We have two cases
+ We have two cases
1. patterns where the whole set of branches can be
converted.
- 2. patterns where only a subset can be converted.
+ 2. patterns where only a subset can be converted.
- In case 1 we can replace the whole set with a single regop
- for the trie. In case 2 we need to keep the start and end
- branches so
+ In case 1 we can replace the whole set with a single regop
+ for the trie. In case 2 we need to keep the start and end
+ branches so
- 'BRANCH EXACT; BRANCH EXACT; BRANCH X'
- becomes BRANCH TRIE; BRANCH X;
+ 'BRANCH EXACT; BRANCH EXACT; BRANCH X'
+ becomes BRANCH TRIE; BRANCH X;
- There is an additional case, that being where there is a
- common prefix, which gets split out into an EXACT like node
- preceding the TRIE node.
+ There is an additional case, that being where there is a
+ common prefix, which gets split out into an EXACT like node
+ preceding the TRIE node.
- If x(1..n)==tail then we can do a simple trie, if not we make
- a "jump" trie, such that when we match the appropriate word
- we "jump" to the appropriate tail node. Essentially we turn
- a nested if into a case structure of sorts.
+ If x(1..n)==tail then we can do a simple trie, if not we make
+ a "jump" trie, such that when we match the appropriate word
+ we "jump" to the appropriate tail node. Essentially we turn
+ a nested if into a case structure of sorts.
- */
+ */
- int made=0;
- if (!re_trie_maxbuff) {
- re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
- if (!SvIOK(re_trie_maxbuff))
- sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
- }
+ int made=0;
+ if (!re_trie_maxbuff) {
+ re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
+ if (!SvIOK(re_trie_maxbuff))
+ sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
+ }
if ( SvIV(re_trie_maxbuff)>=0 ) {
regnode *cur;
regnode *first = (regnode *)NULL;
@@ -5005,8 +5006,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
Perl_re_printf( aTHX_ "(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n",
REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
- PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype]
- );
+ PL_reg_name[trietype], PL_reg_name[noper_trietype], PL_reg_name[noper_next_trietype]
+ );
});
/* Is noper a trieable nodetype that can be merged
@@ -5029,15 +5030,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
* otherwise we update the end pointer. */
if ( !first ) {
first = cur;
- if ( noper_trietype == NOTHING ) {
+ if ( noper_trietype == NOTHING ) {
#if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
- regnode * const noper_next = regnext( noper );
+ regnode * const noper_next = regnext( noper );
U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
- U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
+ U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
#endif
if ( noper_next_trietype ) {
- trietype = noper_next_trietype;
+ trietype = noper_next_trietype;
} else if (noper_next_type) {
/* a NOTHING regop is 1 regop wide.
* We need at least two for a trie
@@ -5052,8 +5053,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
trietype = noper_trietype;
prev = cur;
}
- if (first)
- count++;
+ if (first)
+ count++;
} /* end handle mergable triable node */
else {
/* handle unmergable node -
@@ -5156,12 +5157,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
} /* TRIE_MAXBUF is non zero */
} /* do trie */
- }
- else if ( code == BRANCHJ ) { /* single branch is optimized. */
- scan = NEXTOPER(NEXTOPER(scan));
- } else /* single branch is optimized. */
- scan = NEXTOPER(scan);
- continue;
+ }
+ else if ( code == BRANCHJ ) { /* single branch is optimized. */
+ scan = NEXTOPER(NEXTOPER(scan));
+ } else /* single branch is optimized. */
+ scan = NEXTOPER(scan);
+ continue;
} else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) {
I32 paren = 0;
regnode *start = NULL;
@@ -5249,12 +5250,12 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
flags &= ~SCF_DO_STCLASS;
start= NULL; /* reset start so we dont recurse later on. */
- }
+ }
} else {
- paren = stopparen;
+ paren = stopparen;
start = scan + 2;
- end = regnext(scan);
- }
+ end = regnext(scan);
+ }
if (start) {
scan_frame *newframe;
assert(end);
@@ -5285,73 +5286,73 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
DEBUG_PEEP("fnew", scan, depth, flags);
- frame = newframe;
- scan = start;
- stopparen = paren;
- last = end;
+ frame = newframe;
+ scan = start;
+ stopparen = paren;
+ last = end;
depth = depth + 1;
recursed_depth= my_recursed_depth;
- continue;
- }
- }
- else if (PL_regkind[OP(scan)] == EXACT && ! isEXACTFish(OP(scan))) {
- SSize_t bytelen = STR_LEN(scan), charlen;
- UV uc;
+ continue;
+ }
+ }
+ else if (PL_regkind[OP(scan)] == EXACT && ! isEXACTFish(OP(scan))) {
+ SSize_t bytelen = STR_LEN(scan), charlen;
+ UV uc;
assert(bytelen);
- if (UTF) {
- const U8 * const s = (U8*)STRING(scan);
- uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
- charlen = utf8_length(s, s + bytelen);
- } else {
- uc = *((U8*)STRING(scan));
+ if (UTF) {
+ const U8 * const s = (U8*)STRING(scan);
+ uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
+ charlen = utf8_length(s, s + bytelen);
+ } else {
+ uc = *((U8*)STRING(scan));
charlen = bytelen;
- }
- min += charlen;
- if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
- /* The code below prefers earlier match for fixed
- offset, later match for variable offset. */
- if (data->last_end == -1) { /* Update the start info. */
- data->last_start_min = data->pos_min;
+ }
+ min += charlen;
+ if (flags & SCF_DO_SUBSTR) { /* Update longest substr. */
+ /* The code below prefers earlier match for fixed
+ offset, later match for variable offset. */
+ if (data->last_end == -1) { /* Update the start info. */
+ data->last_start_min = data->pos_min;
data->last_start_max =
is_inf ? OPTIMIZE_INFTY
: (data->pos_delta > OPTIMIZE_INFTY - data->pos_min)
? OPTIMIZE_INFTY : data->pos_min + data->pos_delta;
- }
- sv_catpvn(data->last_found, STRING(scan), bytelen);
- if (UTF)
- SvUTF8_on(data->last_found);
- {
- SV * const sv = data->last_found;
- MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
- mg_find(sv, PERL_MAGIC_utf8) : NULL;
- if (mg && mg->mg_len >= 0)
- mg->mg_len += charlen;
- }
- data->last_end = data->pos_min + charlen;
- data->pos_min += charlen; /* As in the first entry. */
- data->flags &= ~SF_BEFORE_EOL;
- }
+ }
+ sv_catpvn(data->last_found, STRING(scan), bytelen);
+ if (UTF)
+ SvUTF8_on(data->last_found);
+ {
+ SV * const sv = data->last_found;
+ MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+ mg_find(sv, PERL_MAGIC_utf8) : NULL;
+ if (mg && mg->mg_len >= 0)
+ mg->mg_len += charlen;
+ }
+ data->last_end = data->pos_min + charlen;
+ data->pos_min += charlen; /* As in the first entry. */
+ data->flags &= ~SF_BEFORE_EOL;
+ }
/* ANDing the code point leaves at most it, and not in locale, and
* can't match null string */
- if (flags & SCF_DO_STCLASS_AND) {
+ if (flags & SCF_DO_STCLASS_AND) {
ssc_cp_and(data->start_class, uc);
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
ssc_clear_locale(data->start_class);
- }
- else if (flags & SCF_DO_STCLASS_OR) {
+ }
+ else if (flags & SCF_DO_STCLASS_OR) {
ssc_add_cp(data->start_class, uc);
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
/* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
- }
- flags &= ~SCF_DO_STCLASS;
- }
+ }
+ flags &= ~SCF_DO_STCLASS;
+ }
else if (PL_regkind[OP(scan)] == EXACT) {
/* But OP != EXACT!, so is EXACTFish */
- SSize_t bytelen = STR_LEN(scan), charlen;
+ SSize_t bytelen = STR_LEN(scan), charlen;
const U8 * s = (U8*)STRING(scan);
/* Replace a length 1 ASCII fold pair node with an ANYOFM node,
@@ -5374,28 +5375,28 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
continue;
}
- /* Search for fixed substrings supports EXACT only. */
- if (flags & SCF_DO_SUBSTR) {
- assert(data);
+ /* Search for fixed substrings supports EXACT only. */
+ if (flags & SCF_DO_SUBSTR) {
+ assert(data);
scan_commit(pRExC_state, data, minlenp, is_inf);
- }
+ }
charlen = UTF ? (SSize_t) utf8_length(s, s + bytelen) : bytelen;
- if (unfolded_multi_char) {
+ if (unfolded_multi_char) {
RExC_seen |= REG_UNFOLDED_MULTI_SEEN;
- }
- min += charlen - min_subtract;
+ }
+ min += charlen - min_subtract;
assert (min >= 0);
delta += min_subtract;
- if (flags & SCF_DO_SUBSTR) {
- data->pos_min += charlen - min_subtract;
- if (data->pos_min < 0) {
+ if (flags & SCF_DO_SUBSTR) {
+ data->pos_min += charlen - min_subtract;
+ if (data->pos_min < 0) {
data->pos_min = 0;
}
data->pos_delta += min_subtract;
- if (min_subtract) {
- data->cur_is_floating = 1; /* float */
- }
- }
+ if (min_subtract) {
+ data->cur_is_floating = 1; /* float */
+ }
+ }
if (flags & SCF_DO_STCLASS) {
SV* EXACTF_invlist = make_exactf_invlist(pRExC_state, scan);
@@ -5418,41 +5419,41 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
flags &= ~SCF_DO_STCLASS;
SvREFCNT_dec(EXACTF_invlist);
}
- }
- else if (REGNODE_VARIES(OP(scan))) {
- SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
- I32 fl = 0, f = flags;
- regnode * const oscan = scan;
- regnode_ssc this_class;
- regnode_ssc *oclass = NULL;
- I32 next_is_eval = 0;
-
- switch (PL_regkind[OP(scan)]) {
- case WHILEM: /* End of (?:...)* . */
- scan = NEXTOPER(scan);
- goto finish;
- case PLUS:
- if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
- next = NEXTOPER(scan);
- if ( ( PL_regkind[OP(next)] == EXACT
+ }
+ else if (REGNODE_VARIES(OP(scan))) {
+ SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
+ I32 fl = 0, f = flags;
+ regnode * const oscan = scan;
+ regnode_ssc this_class;
+ regnode_ssc *oclass = NULL;
+ I32 next_is_eval = 0;
+
+ switch (PL_regkind[OP(scan)]) {
+ case WHILEM: /* End of (?:...)* . */
+ scan = NEXTOPER(scan);
+ goto finish;
+ case PLUS:
+ if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
+ next = NEXTOPER(scan);
+ if ( ( PL_regkind[OP(next)] == EXACT
&& ! isEXACTFish(OP(next)))
|| (flags & SCF_DO_STCLASS))
{
- mincount = 1;
- maxcount = REG_INFTY;
- next = regnext(scan);
- scan = NEXTOPER(scan);
- goto do_curly;
- }
- }
- if (flags & SCF_DO_SUBSTR)
- data->pos_min++;
+ mincount = 1;
+ maxcount = REG_INFTY;
+ next = regnext(scan);
+ scan = NEXTOPER(scan);
+ goto do_curly;
+ }
+ }
+ if (flags & SCF_DO_SUBSTR)
+ data->pos_min++;
/* This will bypass the formal 'min += minnext * mincount'
* calculation in the do_curly path, so assumes min width
* of the PLUS payload is exactly one. */
- min++;
- /* FALLTHROUGH */
- case STAR:
+ min++;
+ /* FALLTHROUGH */
+ case STAR:
next = NEXTOPER(scan);
/* This temporary node can now be turned into EXACTFU, and
@@ -5483,121 +5484,121 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
FLAGS(next) = mask;
}
- if (flags & SCF_DO_STCLASS) {
- mincount = 0;
- maxcount = REG_INFTY;
- next = regnext(scan);
- scan = NEXTOPER(scan);
- goto do_curly;
- }
- if (flags & SCF_DO_SUBSTR) {
+ if (flags & SCF_DO_STCLASS) {
+ mincount = 0;
+ maxcount = REG_INFTY;
+ next = regnext(scan);
+ scan = NEXTOPER(scan);
+ goto do_curly;
+ }
+ if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
/* Cannot extend fixed substrings */
- data->cur_is_floating = 1; /* float */
- }
+ data->cur_is_floating = 1; /* float */
+ }
is_inf = is_inf_internal = 1;
scan = regnext(scan);
- goto optimize_curly_tail;
- case CURLY:
- if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
- && (scan->flags == stopparen))
- {
- mincount = 1;
- maxcount = 1;
- } else {
- mincount = ARG1(scan);
- maxcount = ARG2(scan);
- }
- next = regnext(scan);
- if (OP(scan) == CURLYX) {
- I32 lp = (data ? *(data->last_closep) : 0);
- scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
- }
- scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
- next_is_eval = (OP(scan) == EVAL);
- do_curly:
- if (flags & SCF_DO_SUBSTR) {
+ goto optimize_curly_tail;
+ case CURLY:
+ if (stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
+ && (scan->flags == stopparen))
+ {
+ mincount = 1;
+ maxcount = 1;
+ } else {
+ mincount = ARG1(scan);
+ maxcount = ARG2(scan);
+ }
+ next = regnext(scan);
+ if (OP(scan) == CURLYX) {
+ I32 lp = (data ? *(data->last_closep) : 0);
+ scan->flags = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
+ }
+ scan = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
+ next_is_eval = (OP(scan) == EVAL);
+ do_curly:
+ if (flags & SCF_DO_SUBSTR) {
if (mincount == 0)
scan_commit(pRExC_state, data, minlenp, is_inf);
/* Cannot extend fixed substrings */
- pos_before = data->pos_min;
- }
- if (data) {
- fl = data->flags;
- data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
- if (is_inf)
- data->flags |= SF_IS_INF;
- }
- if (flags & SCF_DO_STCLASS) {
- ssc_init(pRExC_state, &this_class);
- oclass = data->start_class;
- data->start_class = &this_class;
- f |= SCF_DO_STCLASS_AND;
- f &= ~SCF_DO_STCLASS_OR;
- }
- /* Exclude from super-linear cache processing any {n,m}
- regops for which the combination of input pos and regex
- pos is not enough information to determine if a match
- will be possible.
-
- For example, in the regex /foo(bar\s*){4,8}baz/ with the
- regex pos at the \s*, the prospects for a match depend not
- only on the input position but also on how many (bar\s*)
- repeats into the {4,8} we are. */
+ pos_before = data->pos_min;
+ }
+ if (data) {
+ fl = data->flags;
+ data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
+ if (is_inf)
+ data->flags |= SF_IS_INF;
+ }
+ if (flags & SCF_DO_STCLASS) {
+ ssc_init(pRExC_state, &this_class);
+ oclass = data->start_class;
+ data->start_class = &this_class;
+ f |= SCF_DO_STCLASS_AND;
+ f &= ~SCF_DO_STCLASS_OR;
+ }
+ /* Exclude from super-linear cache processing any {n,m}
+ regops for which the combination of input pos and regex
+ pos is not enough information to determine if a match
+ will be possible.
+
+ For example, in the regex /foo(bar\s*){4,8}baz/ with the
+ regex pos at the \s*, the prospects for a match depend not
+ only on the input position but also on how many (bar\s*)
+ repeats into the {4,8} we are. */
if ((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY))
- f &= ~SCF_WHILEM_VISITED_POS;
+ f &= ~SCF_WHILEM_VISITED_POS;
- /* This will finish on WHILEM, setting scan, or on NULL: */
+ /* This will finish on WHILEM, setting scan, or on NULL: */
/* recurse study_chunk() on loop bodies */
- minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
+ minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
last, data, stopparen, recursed_depth, NULL,
(mincount == 0
? (f & ~SCF_DO_SUBSTR)
: f)
, depth+1, mutate_ok);
- if (flags & SCF_DO_STCLASS)
- data->start_class = oclass;
- if (mincount == 0 || minnext == 0) {
- if (flags & SCF_DO_STCLASS_OR) {
- ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
- }
- else if (flags & SCF_DO_STCLASS_AND) {
- /* Switch to OR mode: cache the old value of
- * data->start_class */
- INIT_AND_WITHP;
- StructCopy(data->start_class, and_withp, regnode_ssc);
- flags &= ~SCF_DO_STCLASS_AND;
- StructCopy(&this_class, data->start_class, regnode_ssc);
- flags |= SCF_DO_STCLASS_OR;
+ if (flags & SCF_DO_STCLASS)
+ data->start_class = oclass;
+ if (mincount == 0 || minnext == 0) {
+ if (flags & SCF_DO_STCLASS_OR) {
+ ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
+ }
+ else if (flags & SCF_DO_STCLASS_AND) {
+ /* Switch to OR mode: cache the old value of
+ * data->start_class */
+ INIT_AND_WITHP;
+ StructCopy(data->start_class, and_withp, regnode_ssc);
+ flags &= ~SCF_DO_STCLASS_AND;
+ StructCopy(&this_class, data->start_class, regnode_ssc);
+ flags |= SCF_DO_STCLASS_OR;
ANYOF_FLAGS(data->start_class)
|= SSC_MATCHES_EMPTY_STRING;
- }
- } else { /* Non-zero len */
- if (flags & SCF_DO_STCLASS_OR) {
- ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
- }
- else if (flags & SCF_DO_STCLASS_AND)
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
- flags &= ~SCF_DO_STCLASS;
- }
- if (!scan) /* It was not CURLYX, but CURLY. */
- scan = next;
- if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
- /* ? quantifier ok, except for (?{ ... }) */
- && (next_is_eval || !(mincount == 0 && maxcount == 1))
- && (minnext == 0) && (deltanext == 0)
- && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
+ }
+ } else { /* Non-zero len */
+ if (flags & SCF_DO_STCLASS_OR) {
+ ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+ }
+ else if (flags & SCF_DO_STCLASS_AND)
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
+ flags &= ~SCF_DO_STCLASS;
+ }
+ if (!scan) /* It was not CURLYX, but CURLY. */
+ scan = next;
+ if (((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
+ /* ? quantifier ok, except for (?{ ... }) */
+ && (next_is_eval || !(mincount == 0 && maxcount == 1))
+ && (minnext == 0) && (deltanext == 0)
+ && data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
&& maxcount <= REG_INFTY/3) /* Complement check for big
count */
- {
- _WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP),
+ {
+ _WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP),
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
"Quantifier unexpected on zero-length expression "
"in regex m/%" UTF8f "/",
- UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
- RExC_precomp)));
+ UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
+ RExC_precomp)));
}
if ( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
@@ -5606,146 +5607,146 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
FAIL("Regexp out of space");
}
- min += minnext * mincount;
- is_inf_internal |= deltanext == OPTIMIZE_INFTY
+ min += minnext * mincount;
+ is_inf_internal |= deltanext == OPTIMIZE_INFTY
|| (maxcount == REG_INFTY && minnext + deltanext > 0);
- is_inf |= is_inf_internal;
+ is_inf |= is_inf_internal;
if (is_inf) {
- delta = OPTIMIZE_INFTY;
+ delta = OPTIMIZE_INFTY;
} else {
- delta += (minnext + deltanext) * maxcount
+ delta += (minnext + deltanext) * maxcount
- minnext * mincount;
}
- /* Try powerful optimization CURLYX => CURLYN. */
- if ( OP(oscan) == CURLYX && data
- && data->flags & SF_IN_PAR
- && !(data->flags & SF_HAS_EVAL)
- && !deltanext && minnext == 1
+ /* Try powerful optimization CURLYX => CURLYN. */
+ if ( OP(oscan) == CURLYX && data
+ && data->flags & SF_IN_PAR
+ && !(data->flags & SF_HAS_EVAL)
+ && !deltanext && minnext == 1
&& mutate_ok
) {
- /* Try to optimize to CURLYN. */
- regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
- regnode * const nxt1 = nxt;
+ /* Try to optimize to CURLYN. */
+ regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
+ regnode * const nxt1 = nxt;
#ifdef DEBUGGING
- regnode *nxt2;
+ regnode *nxt2;
#endif
- /* Skip open. */
- nxt = regnext(nxt);
- if (!REGNODE_SIMPLE(OP(nxt))
- && !(PL_regkind[OP(nxt)] == EXACT
- && STR_LEN(nxt) == 1))
- goto nogo;
+ /* Skip open. */
+ nxt = regnext(nxt);
+ if (!REGNODE_SIMPLE(OP(nxt))
+ && !(PL_regkind[OP(nxt)] == EXACT
+ && STR_LEN(nxt) == 1))
+ goto nogo;
#ifdef DEBUGGING
- nxt2 = nxt;
+ nxt2 = nxt;
#endif
- nxt = regnext(nxt);
- if (OP(nxt) != CLOSE)
- goto nogo;
- if (RExC_open_parens) {
+ nxt = regnext(nxt);
+ if (OP(nxt) != CLOSE)
+ goto nogo;
+ if (RExC_open_parens) {
/*open->CURLYM*/
RExC_open_parens[ARG(nxt1)] = REGNODE_OFFSET(oscan);
/*close->while*/
RExC_close_parens[ARG(nxt1)] = REGNODE_OFFSET(nxt) + 2;
- }
- /* Now we know that nxt2 is the only contents: */
- oscan->flags = (U8)ARG(nxt);
- OP(oscan) = CURLYN;
- OP(nxt1) = NOTHING; /* was OPEN. */
+ }
+ /* Now we know that nxt2 is the only contents: */
+ oscan->flags = (U8)ARG(nxt);
+ OP(oscan) = CURLYN;
+ OP(nxt1) = NOTHING; /* was OPEN. */
#ifdef DEBUGGING
- OP(nxt1 + 1) = OPTIMIZED; /* was count. */
- NEXT_OFF(nxt1+ 1) = 0; /* just for consistency. */
- NEXT_OFF(nxt2) = 0; /* just for consistency with CURLY. */
- OP(nxt) = OPTIMIZED; /* was CLOSE. */
- OP(nxt + 1) = OPTIMIZED; /* was count. */
- NEXT_OFF(nxt+ 1) = 0; /* just for consistency. */
+ OP(nxt1 + 1) = OPTIMIZED; /* was count. */
+ NEXT_OFF(nxt1+ 1) = 0; /* just for consistency. */
+ NEXT_OFF(nxt2) = 0; /* just for consistency with CURLY. */
+ OP(nxt) = OPTIMIZED; /* was CLOSE. */
+ OP(nxt + 1) = OPTIMIZED; /* was count. */
+ NEXT_OFF(nxt+ 1) = 0; /* just for consistency. */
#endif
- }
- nogo:
-
- /* Try optimization CURLYX => CURLYM. */
- if ( OP(oscan) == CURLYX && data
- && !(data->flags & SF_HAS_PAR)
- && !(data->flags & SF_HAS_EVAL)
- && !deltanext /* atom is fixed width */
- && minnext != 0 /* CURLYM can't handle zero width */
+ }
+ nogo:
+
+ /* Try optimization CURLYX => CURLYM. */
+ if ( OP(oscan) == CURLYX && data
+ && !(data->flags & SF_HAS_PAR)
+ && !(data->flags & SF_HAS_EVAL)
+ && !deltanext /* atom is fixed width */
+ && minnext != 0 /* CURLYM can't handle zero width */
/* Nor characters whose fold at run-time may be
* multi-character */
&& ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
&& mutate_ok
- ) {
- /* XXXX How to optimize if data == 0? */
- /* Optimize to a simpler form. */
- regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */
- regnode *nxt2;
-
- OP(oscan) = CURLYM;
- while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
- && (OP(nxt2) != WHILEM))
- nxt = nxt2;
- OP(nxt2) = SUCCEED; /* Whas WHILEM */
- /* Need to optimize away parenths. */
- if ((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) {
- /* Set the parenth number. */
- regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
-
- oscan->flags = (U8)ARG(nxt);
- if (RExC_open_parens) {
+ ) {
+ /* XXXX How to optimize if data == 0? */
+ /* Optimize to a simpler form. */
+ regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN */
+ regnode *nxt2;
+
+ OP(oscan) = CURLYM;
+ while ( (nxt2 = regnext(nxt)) /* skip over embedded stuff*/
+ && (OP(nxt2) != WHILEM))
+ nxt = nxt2;
+ OP(nxt2) = SUCCEED; /* Whas WHILEM */
+ /* Need to optimize away parenths. */
+ if ((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) {
+ /* Set the parenth number. */
+ regnode *nxt1 = NEXTOPER(oscan) + EXTRA_STEP_2ARGS; /* OPEN*/
+
+ oscan->flags = (U8)ARG(nxt);
+ if (RExC_open_parens) {
/*open->CURLYM*/
RExC_open_parens[ARG(nxt1)] = REGNODE_OFFSET(oscan);
/*close->NOTHING*/
RExC_close_parens[ARG(nxt1)] = REGNODE_OFFSET(nxt2)
+ 1;
- }
- OP(nxt1) = OPTIMIZED; /* was OPEN. */
- OP(nxt) = OPTIMIZED; /* was CLOSE. */
+ }
+ OP(nxt1) = OPTIMIZED; /* was OPEN. */
+ OP(nxt) = OPTIMIZED; /* was CLOSE. */
#ifdef DEBUGGING
- OP(nxt1 + 1) = OPTIMIZED; /* was count. */
- OP(nxt + 1) = OPTIMIZED; /* was count. */
- NEXT_OFF(nxt1 + 1) = 0; /* just for consistency. */
- NEXT_OFF(nxt + 1) = 0; /* just for consistency. */
+ OP(nxt1 + 1) = OPTIMIZED; /* was count. */
+ OP(nxt + 1) = OPTIMIZED; /* was count. */
+ NEXT_OFF(nxt1 + 1) = 0; /* just for consistency. */
+ NEXT_OFF(nxt + 1) = 0; /* just for consistency. */
#endif
#if 0
- while ( nxt1 && (OP(nxt1) != WHILEM)) {
- regnode *nnxt = regnext(nxt1);
- if (nnxt == nxt) {
- if (reg_off_by_arg[OP(nxt1)])
- ARG_SET(nxt1, nxt2 - nxt1);
- else if (nxt2 - nxt1 < U16_MAX)
- NEXT_OFF(nxt1) = nxt2 - nxt1;
- else
- OP(nxt) = NOTHING; /* Cannot beautify */
- }
- nxt1 = nnxt;
- }
+ while ( nxt1 && (OP(nxt1) != WHILEM)) {
+ regnode *nnxt = regnext(nxt1);
+ if (nnxt == nxt) {
+ if (reg_off_by_arg[OP(nxt1)])
+ ARG_SET(nxt1, nxt2 - nxt1);
+ else if (nxt2 - nxt1 < U16_MAX)
+ NEXT_OFF(nxt1) = nxt2 - nxt1;
+ else
+ OP(nxt) = NOTHING; /* Cannot beautify */
+ }
+ nxt1 = nnxt;
+ }
#endif
- /* Optimize again: */
+ /* Optimize again: */
/* recurse study_chunk() on optimised CURLYX => CURLYM */
- study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
+ study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
NULL, stopparen, recursed_depth, NULL, 0,
depth+1, mutate_ok);
- }
- else
- oscan->flags = 0;
- }
- else if ((OP(oscan) == CURLYX)
- && (flags & SCF_WHILEM_VISITED_POS)
- /* See the comment on a similar expression above.
- However, this time it's not a subexpression
- we care about, but the expression itself. */
- && (maxcount == REG_INFTY)
- && data) {
- /* This stays as CURLYX, we can put the count/of pair. */
- /* Find WHILEM (as in regexec.c) */
- regnode *nxt = oscan + NEXT_OFF(oscan);
-
- if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
- nxt += ARG(nxt);
+ }
+ else
+ oscan->flags = 0;
+ }
+ else if ((OP(oscan) == CURLYX)
+ && (flags & SCF_WHILEM_VISITED_POS)
+ /* See the comment on a similar expression above.
+ However, this time it's not a subexpression
+ we care about, but the expression itself. */
+ && (maxcount == REG_INFTY)
+ && data) {
+ /* This stays as CURLYX, we can put the count/of pair. */
+ /* Find WHILEM (as in regexec.c) */
+ regnode *nxt = oscan + NEXT_OFF(oscan);
+
+ if (OP(PREVOPER(nxt)) == NOTHING) /* LONGJMP */
+ nxt += ARG(nxt);
nxt = PREVOPER(nxt);
if (nxt->flags & 0xf) {
/* we've already set whilem count on this node */
@@ -5754,68 +5755,68 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
nxt->flags = (U8)(data->whilem_c
| (RExC_whilem_seen << 4)); /* On WHILEM */
}
- }
- if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
- pars++;
- if (flags & SCF_DO_SUBSTR) {
- SV *last_str = NULL;
+ }
+ if (data && fl & (SF_HAS_PAR|SF_IN_PAR))
+ pars++;
+ if (flags & SCF_DO_SUBSTR) {
+ SV *last_str = NULL;
STRLEN last_chrs = 0;
- int counted = mincount != 0;
+ int counted = mincount != 0;
if (data->last_end > 0 && mincount != 0) { /* Ends with a
string. */
- SSize_t b = pos_before >= data->last_start_min
- ? pos_before : data->last_start_min;
- STRLEN l;
- const char * const s = SvPV_const(data->last_found, l);
- SSize_t old = b - data->last_start_min;
+ SSize_t b = pos_before >= data->last_start_min
+ ? pos_before : data->last_start_min;
+ STRLEN l;
+ const char * const s = SvPV_const(data->last_found, l);
+ SSize_t old = b - data->last_start_min;
assert(old >= 0);
- if (UTF)
- old = utf8_hop_forward((U8*)s, old,
+ if (UTF)
+ old = utf8_hop_forward((U8*)s, old,
(U8 *) SvEND(data->last_found))
- (U8*)s;
- l -= old;
- /* Get the added string: */
- last_str = newSVpvn_utf8(s + old, l, UTF);
+ l -= old;
+ /* Get the added string: */
+ last_str = newSVpvn_utf8(s + old, l, UTF);
last_chrs = UTF ? utf8_length((U8*)(s + old),
(U8*)(s + old + l)) : l;
- if (deltanext == 0 && pos_before == b) {
- /* What was added is a constant string */
- if (mincount > 1) {
+ if (deltanext == 0 && pos_before == b) {
+ /* What was added is a constant string */
+ if (mincount > 1) {
- SvGROW(last_str, (mincount * l) + 1);
- repeatcpy(SvPVX(last_str) + l,
- SvPVX_const(last_str), l,
+ SvGROW(last_str, (mincount * l) + 1);
+ repeatcpy(SvPVX(last_str) + l,
+ SvPVX_const(last_str), l,
mincount - 1);
- SvCUR_set(last_str, SvCUR(last_str) * mincount);
- /* Add additional parts. */
- SvCUR_set(data->last_found,
- SvCUR(data->last_found) - l);
- sv_catsv(data->last_found, last_str);
- {
- SV * sv = data->last_found;
- MAGIC *mg =
- SvUTF8(sv) && SvMAGICAL(sv) ?
- mg_find(sv, PERL_MAGIC_utf8) : NULL;
- if (mg && mg->mg_len >= 0)
- mg->mg_len += last_chrs * (mincount-1);
- }
+ SvCUR_set(last_str, SvCUR(last_str) * mincount);
+ /* Add additional parts. */
+ SvCUR_set(data->last_found,
+ SvCUR(data->last_found) - l);
+ sv_catsv(data->last_found, last_str);
+ {
+ SV * sv = data->last_found;
+ MAGIC *mg =
+ SvUTF8(sv) && SvMAGICAL(sv) ?
+ mg_find(sv, PERL_MAGIC_utf8) : NULL;
+ if (mg && mg->mg_len >= 0)
+ mg->mg_len += last_chrs * (mincount-1);
+ }
last_chrs *= mincount;
- data->last_end += l * (mincount - 1);
- }
- } else {
- /* start offset must point into the last copy */
- data->last_start_min += minnext * (mincount - 1);
- data->last_start_max =
+ data->last_end += l * (mincount - 1);
+ }
+ } else {
+ /* start offset must point into the last copy */
+ data->last_start_min += minnext * (mincount - 1);
+ data->last_start_max =
is_inf
? OPTIMIZE_INFTY
- : data->last_start_max +
+ : data->last_start_max +
(maxcount - 1) * (minnext + data->pos_delta);
- }
- }
- /* It is counted once already... */
- data->pos_min += minnext * (mincount - counted);
+ }
+ }
+ /* It is counted once already... */
+ data->pos_min += minnext * (mincount - counted);
#if 0
Perl_re_printf( aTHX_ "counted=%" UVuf " deltanext=%" UVuf
" OPTIMIZE_INFTY=%" UVuf " minnext=%" UVuf
@@ -5827,52 +5828,52 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
(UV)(-counted * deltanext + (minnext + deltanext) * maxcount
- minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
#endif
- if (deltanext == OPTIMIZE_INFTY
+ if (deltanext == OPTIMIZE_INFTY
|| -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= OPTIMIZE_INFTY - data->pos_delta)
- data->pos_delta = OPTIMIZE_INFTY;
- else
- data->pos_delta += - counted * deltanext +
- (minnext + deltanext) * maxcount - minnext * mincount;
- if (mincount != maxcount) {
- /* Cannot extend fixed substrings found inside
- the group. */
+ data->pos_delta = OPTIMIZE_INFTY;
+ else
+ data->pos_delta += - counted * deltanext +
+ (minnext + deltanext) * maxcount - minnext * mincount;
+ if (mincount != maxcount) {
+ /* Cannot extend fixed substrings found inside
+ the group. */
scan_commit(pRExC_state, data, minlenp, is_inf);
- if (mincount && last_str) {
- SV * const sv = data->last_found;
- MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
- mg_find(sv, PERL_MAGIC_utf8) : NULL;
-
- if (mg)
- mg->mg_len = -1;
- sv_setsv(sv, last_str);
- data->last_end = data->pos_min;
- data->last_start_min = data->pos_min - last_chrs;
- data->last_start_max = is_inf
- ? OPTIMIZE_INFTY
- : data->pos_min + data->pos_delta - last_chrs;
- }
- data->cur_is_floating = 1; /* float */
- }
- SvREFCNT_dec(last_str);
- }
- if (data && (fl & SF_HAS_EVAL))
- data->flags |= SF_HAS_EVAL;
- optimize_curly_tail:
- rck_elide_nothing(oscan);
- continue;
-
- default:
+ if (mincount && last_str) {
+ SV * const sv = data->last_found;
+ MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
+ mg_find(sv, PERL_MAGIC_utf8) : NULL;
+
+ if (mg)
+ mg->mg_len = -1;
+ sv_setsv(sv, last_str);
+ data->last_end = data->pos_min;
+ data->last_start_min = data->pos_min - last_chrs;
+ data->last_start_max = is_inf
+ ? OPTIMIZE_INFTY
+ : data->pos_min + data->pos_delta - last_chrs;
+ }
+ data->cur_is_floating = 1; /* float */
+ }
+ SvREFCNT_dec(last_str);
+ }
+ if (data && (fl & SF_HAS_EVAL))
+ data->flags |= SF_HAS_EVAL;
+ optimize_curly_tail:
+ rck_elide_nothing(oscan);
+ continue;
+
+ default:
Perl_croak(aTHX_ "panic: unexpected varying REx opcode %d",
OP(scan));
case REF:
case CLUMP:
- if (flags & SCF_DO_SUBSTR) {
+ if (flags & SCF_DO_SUBSTR) {
/* Cannot expect anything... */
scan_commit(pRExC_state, data, minlenp, is_inf);
- data->cur_is_floating = 1; /* float */
- }
- is_inf = is_inf_internal = 1;
- if (flags & SCF_DO_STCLASS_OR) {
+ data->cur_is_floating = 1; /* float */
+ }
+ is_inf = is_inf_internal = 1;
+ if (flags & SCF_DO_STCLASS_OR) {
if (OP(scan) == CLUMP) {
/* Actually is any start char, but very few code points
* aren't start characters */
@@ -5882,12 +5883,12 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
ssc_anything(data->start_class);
}
}
- flags &= ~SCF_DO_STCLASS;
- break;
- }
- }
- else if (OP(scan) == LNBREAK) {
- if (flags & SCF_DO_STCLASS) {
+ flags &= ~SCF_DO_STCLASS;
+ break;
+ }
+ }
+ else if (OP(scan) == LNBREAK) {
+ if (flags & SCF_DO_STCLASS) {
if (flags & SCF_DO_STCLASS_AND) {
ssc_intersection(data->start_class,
PL_XPosix_ptrs[_CC_VERTSPACE], FALSE);
@@ -5899,16 +5900,16 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
ssc_union(data->start_class,
PL_XPosix_ptrs[_CC_VERTSPACE],
FALSE);
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
/* See commit msg for
* 749e076fceedeb708a624933726e7989f2302f6a */
ANYOF_FLAGS(data->start_class)
&= ~SSC_MATCHES_EMPTY_STRING;
}
- flags &= ~SCF_DO_STCLASS;
+ flags &= ~SCF_DO_STCLASS;
}
- min++;
+ min++;
if (delta != OPTIMIZE_INFTY)
delta++; /* Because of the 2 char string cr-lf */
if (flags & SCF_DO_SUBSTR) {
@@ -5918,17 +5919,17 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
if (data->pos_delta != OPTIMIZE_INFTY) {
data->pos_delta += 1;
}
- data->cur_is_floating = 1; /* float */
+ data->cur_is_floating = 1; /* float */
}
- }
- else if (REGNODE_SIMPLE(OP(scan))) {
+ }
+ else if (REGNODE_SIMPLE(OP(scan))) {
- if (flags & SCF_DO_SUBSTR) {
+ if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
- data->pos_min++;
- }
- min++;
- if (flags & SCF_DO_STCLASS) {
+ data->pos_min++;
+ }
+ min++;
+ if (flags & SCF_DO_STCLASS) {
bool invert = 0;
SV* my_invlist = NULL;
U8 namedclass;
@@ -5936,21 +5937,21 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
/* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
- /* Some of the logic below assumes that switching
- locale on will only add false positives. */
- switch (OP(scan)) {
+ /* Some of the logic below assumes that switching
+ locale on will only add false positives. */
+ switch (OP(scan)) {
- default:
+ default:
#ifdef DEBUGGING
Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d",
OP(scan));
#endif
- case SANY:
- if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
- ssc_match_all_cp(data->start_class);
- break;
+ case SANY:
+ if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
+ ssc_match_all_cp(data->start_class);
+ break;
- case REG_ANY:
+ case REG_ANY:
{
SV* REG_ANY_invlist = _new_invlist(2);
REG_ANY_invlist = add_cp_to_invlist(REG_ANY_invlist,
@@ -5970,8 +5971,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
ssc_clear_locale(data->start_class);
}
SvREFCNT_dec_NN(REG_ANY_invlist);
- }
- break;
+ }
+ break;
case ANYOFD:
case ANYOFL:
@@ -5981,13 +5982,13 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
case ANYOFHr:
case ANYOFHs:
case ANYOF:
- if (flags & SCF_DO_STCLASS_AND)
- ssc_and(pRExC_state, data->start_class,
+ if (flags & SCF_DO_STCLASS_AND)
+ ssc_and(pRExC_state, data->start_class,
(regnode_charclass *) scan);
- else
- ssc_or(pRExC_state, data->start_class,
+ else
+ ssc_or(pRExC_state, data->start_class,
(regnode_charclass *) scan);
- break;
+ break;
case NANYOFM: /* NANYOFM already contains the inversion of the
input ANYOF data, so, unlike things like
@@ -6028,11 +6029,11 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
break;
}
- case NPOSIXL:
+ case NPOSIXL:
invert = 1;
/* FALLTHROUGH */
- case POSIXL:
+ case POSIXL:
namedclass = classnum_to_namedclass(FLAGS(scan)) + invert;
if (flags & SCF_DO_STCLASS_AND) {
bool was_there = cBOOL(
@@ -6072,16 +6073,16 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
what's matched */
invert = 1;
/* FALLTHROUGH */
- case POSIXA:
+ case POSIXA:
my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)], NULL);
goto join_posix_and_ascii;
- case NPOSIXD:
- case NPOSIXU:
+ case NPOSIXD:
+ case NPOSIXU:
invert = 1;
/* FALLTHROUGH */
- case POSIXD:
- case POSIXU:
+ case POSIXD:
+ case POSIXU:
my_invlist = invlist_clone(PL_XPosix_ptrs[FLAGS(scan)], NULL);
/* NPOSIXD matches all upper Latin1 code points unless the
@@ -6105,23 +6106,23 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
ssc_union(data->start_class, my_invlist, invert);
}
SvREFCNT_dec(my_invlist);
- }
- if (flags & SCF_DO_STCLASS_OR)
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
- flags &= ~SCF_DO_STCLASS;
- }
- }
- else if (PL_regkind[OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
- data->flags |= (OP(scan) == MEOL
- ? SF_BEFORE_MEOL
- : SF_BEFORE_SEOL);
+ }
+ if (flags & SCF_DO_STCLASS_OR)
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+ flags &= ~SCF_DO_STCLASS;
+ }
+ }
+ else if (PL_regkind[OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
+ data->flags |= (OP(scan) == MEOL
+ ? SF_BEFORE_MEOL
+ : SF_BEFORE_SEOL);
scan_commit(pRExC_state, data, minlenp, is_inf);
- }
- else if ( PL_regkind[OP(scan)] == BRANCHJ
- /* Lookbehind, or need to calculate parens/evals/stclass: */
- && (scan->flags || data || (flags & SCF_DO_STCLASS))
- && (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
+ }
+ else if ( PL_regkind[OP(scan)] == BRANCHJ
+ /* Lookbehind, or need to calculate parens/evals/stclass: */
+ && (scan->flags || data || (flags & SCF_DO_STCLASS))
+ && (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
{
if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
|| OP(scan) == UNLESSM )
@@ -6139,16 +6140,16 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
if (data) {
data_fake.whilem_c = data->whilem_c;
data_fake.last_closep = data->last_closep;
- }
+ }
else
data_fake.last_closep = &fake;
- data_fake.pos_delta = delta;
+ data_fake.pos_delta = delta;
if ( flags & SCF_DO_STCLASS && !scan->flags
&& OP(scan) == IFMATCH ) { /* Lookahead */
ssc_init(pRExC_state, &intrnl);
data_fake.start_class = &intrnl;
f |= SCF_DO_STCLASS_AND;
- }
+ }
if (flags & SCF_WHILEM_VISITED_POS)
f |= SCF_WHILEM_VISITED_POS;
next = regnext(scan);
@@ -6165,7 +6166,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
|| minnext > (I32)U8_MAX
|| minnext + deltanext > (I32)U8_MAX)
{
- FAIL2("Lookbehind longer than %" UVuf " not implemented",
+ FAIL2("Lookbehind longer than %" UVuf " not implemented",
(UV)U8_MAX);
}
@@ -6190,24 +6191,24 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
data->whilem_c = data_fake.whilem_c;
}
if (f & SCF_DO_STCLASS_AND) {
- if (flags & SCF_DO_STCLASS_OR) {
- /* OR before, AND after: ideally we would recurse with
- * data_fake to get the AND applied by study of the
- * remainder of the pattern, and then derecurse;
- * *** HACK *** for now just treat as "no information".
- * See [perl #56690].
- */
- ssc_init(pRExC_state, data->start_class);
- } else {
+ if (flags & SCF_DO_STCLASS_OR) {
+ /* OR before, AND after: ideally we would recurse with
+ * data_fake to get the AND applied by study of the
+ * remainder of the pattern, and then derecurse;
+ * *** HACK *** for now just treat as "no information".
+ * See [perl #56690].
+ */
+ ssc_init(pRExC_state, data->start_class);
+ } else {
/* AND before and after: combine and continue. These
* assertions are zero-length, so can match an EMPTY
* string */
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
ANYOF_FLAGS(data->start_class)
|= SSC_MATCHES_EMPTY_STRING;
- }
+ }
}
- }
+ }
#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
else {
/* Positive Lookahead/lookbehind
@@ -6245,9 +6246,9 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
data_fake.flags = 0;
data_fake.substrs[0].flags = 0;
data_fake.substrs[1].flags = 0;
- data_fake.pos_delta = delta;
+ data_fake.pos_delta = delta;
if (is_inf)
- data_fake.flags |= SF_IS_INF;
+ data_fake.flags |= SF_IS_INF;
if ( flags & SCF_DO_STCLASS && !scan->flags
&& OP(scan) == IFMATCH ) { /* Lookahead */
ssc_init(pRExC_state, &intrnl);
@@ -6272,7 +6273,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
|| *minnextp > (I32)U8_MAX
|| *minnextp + deltanext > (I32)U8_MAX)
{
- FAIL2("Lookbehind longer than %" UVuf " not implemented",
+ FAIL2("Lookbehind longer than %" UVuf " not implemented",
(UV)U8_MAX);
}
@@ -6314,65 +6315,65 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
}
}
}
- }
+ }
#endif
- }
- else if (OP(scan) == OPEN) {
- if (stopparen != (I32)ARG(scan))
- pars++;
- }
- else if (OP(scan) == CLOSE) {
- if (stopparen == (I32)ARG(scan)) {
- break;
- }
- if ((I32)ARG(scan) == is_par) {
- next = regnext(scan);
-
- if ( next && (OP(next) != WHILEM) && next < last)
- is_par = 0; /* Disable optimization */
- }
- if (data)
- *(data->last_closep) = ARG(scan);
- }
- else if (OP(scan) == EVAL) {
- if (data)
- data->flags |= SF_HAS_EVAL;
- }
- else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
- if (flags & SCF_DO_SUBSTR) {
+ }
+ else if (OP(scan) == OPEN) {
+ if (stopparen != (I32)ARG(scan))
+ pars++;
+ }
+ else if (OP(scan) == CLOSE) {
+ if (stopparen == (I32)ARG(scan)) {
+ break;
+ }
+ if ((I32)ARG(scan) == is_par) {
+ next = regnext(scan);
+
+ if ( next && (OP(next) != WHILEM) && next < last)
+ is_par = 0; /* Disable optimization */
+ }
+ if (data)
+ *(data->last_closep) = ARG(scan);
+ }
+ else if (OP(scan) == EVAL) {
+ if (data)
+ data->flags |= SF_HAS_EVAL;
+ }
+ else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
+ if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
- flags &= ~SCF_DO_SUBSTR;
- }
- if (data && OP(scan)==ACCEPT) {
- data->flags |= SCF_SEEN_ACCEPT;
- if (stopmin > min)
- stopmin = min;
- }
- }
- else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
- {
- if (flags & SCF_DO_SUBSTR) {
+ flags &= ~SCF_DO_SUBSTR;
+ }
+ if (data && OP(scan)==ACCEPT) {
+ data->flags |= SCF_SEEN_ACCEPT;
+ if (stopmin > min)
+ stopmin = min;
+ }
+ }
+ else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
+ {
+ if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
- data->cur_is_floating = 1; /* float */
- }
- is_inf = is_inf_internal = 1;
- if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
- ssc_anything(data->start_class);
- flags &= ~SCF_DO_STCLASS;
- }
- else if (OP(scan) == GPOS) {
+ data->cur_is_floating = 1; /* float */
+ }
+ is_inf = is_inf_internal = 1;
+ if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
+ ssc_anything(data->start_class);
+ flags &= ~SCF_DO_STCLASS;
+ }
+ else if (OP(scan) == GPOS) {
if (!(RExC_rx->intflags & PREGf_GPOS_FLOAT) &&
- !(delta || is_inf || (data && data->pos_delta)))
- {
+ !(delta || is_inf || (data && data->pos_delta)))
+ {
if (!(RExC_rx->intflags & PREGf_ANCH) && (flags & SCF_DO_SUBSTR))
RExC_rx->intflags |= PREGf_ANCH_GPOS;
- if (RExC_rx->gofs < (STRLEN)min)
- RExC_rx->gofs = min;
+ if (RExC_rx->gofs < (STRLEN)min)
+ RExC_rx->gofs = min;
} else {
RExC_rx->intflags |= PREGf_GPOS_FLOAT;
RExC_rx->gofs = 0;
}
- }
+ }
#ifdef TRIE_STUDY_OPT
#ifdef FULL_TRIE_STUDY
else if (PL_regkind[OP(scan)] == TRIE) {
@@ -6411,7 +6412,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
}
else
data_fake.last_closep = &fake;
- data_fake.pos_delta = delta;
+ data_fake.pos_delta = delta;
if (flags & SCF_DO_STCLASS) {
ssc_init(pRExC_state, &this_class);
data_fake.start_class = &this_class;
@@ -6448,11 +6449,11 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
pars++;
if (data_fake.flags & SCF_SEEN_ACCEPT) {
if ( stopmin > min + min1)
- stopmin = min + min1;
- flags &= ~SCF_DO_SUBSTR;
- if (data)
- data->flags |= SCF_SEEN_ACCEPT;
- }
+ stopmin = min + min1;
+ flags &= ~SCF_DO_SUBSTR;
+ if (data)
+ data->flags |= SCF_SEEN_ACCEPT;
+ }
if (data) {
if (data_fake.flags & SF_HAS_EVAL)
data->flags |= SF_HAS_EVAL;
@@ -6490,7 +6491,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
else {
/* Switch to OR mode: cache the old value of
* data->start_class */
- INIT_AND_WITHP;
+ INIT_AND_WITHP;
StructCopy(data->start_class, and_withp, regnode_ssc);
flags &= ~SCF_DO_STCLASS_AND;
StructCopy(&accum, data->start_class, regnode_ssc);
@@ -6501,24 +6502,24 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
continue;
}
#else
- else if (PL_regkind[OP(scan)] == TRIE) {
- reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
- U8*bang=NULL;
+ else if (PL_regkind[OP(scan)] == TRIE) {
+ reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
+ U8*bang=NULL;
- min += trie->minlen;
- delta += (trie->maxlen - trie->minlen);
- flags &= ~SCF_DO_STCLASS; /* xxx */
+ min += trie->minlen;
+ delta += (trie->maxlen - trie->minlen);
+ flags &= ~SCF_DO_STCLASS; /* xxx */
if (flags & SCF_DO_SUBSTR) {
/* Cannot expect anything... */
scan_commit(pRExC_state, data, minlenp, is_inf);
data->pos_min += trie->minlen;
data->pos_delta += (trie->maxlen - trie->minlen);
- if (trie->maxlen != trie->minlen)
- data->cur_is_floating = 1; /* float */
+ if (trie->maxlen != trie->minlen)
+ data->cur_is_floating = 1; /* float */
}
if (trie->jump) /* no more substrings -- for now /grr*/
flags &= ~SCF_DO_SUBSTR;
- }
+ }
else if (OP(scan) == REGEX_SET) {
Perl_croak(aTHX_ "panic: %s regnode should be resolved"
" before optimization", reg_name[REGEX_SET]);
@@ -6527,8 +6528,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
#endif /* old or new */
#endif /* TRIE_STUDY_OPT */
- /* Else: zero-length, ignore. */
- scan = regnext(scan);
+ /* Else: zero-length, ignore. */
+ scan = regnext(scan);
}
finish:
@@ -6557,19 +6558,19 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
*deltap = is_inf_internal ? OPTIMIZE_INFTY : delta;
if (flags & SCF_DO_SUBSTR && is_inf)
- data->pos_delta = OPTIMIZE_INFTY - data->pos_min;
+ data->pos_delta = OPTIMIZE_INFTY - data->pos_min;
if (is_par > (I32)U8_MAX)
- is_par = 0;
+ is_par = 0;
if (is_par && pars==1 && data) {
- data->flags |= SF_IN_PAR;
- data->flags &= ~SF_HAS_PAR;
+ data->flags |= SF_IN_PAR;
+ data->flags &= ~SF_HAS_PAR;
}
else if (pars && data) {
- data->flags |= SF_HAS_PAR;
- data->flags &= ~SF_IN_PAR;
+ data->flags |= SF_HAS_PAR;
+ data->flags &= ~SF_IN_PAR;
}
if (flags & SCF_DO_STCLASS_OR)
- ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
+ ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
if (flags & SCF_TRIE_RESTUDY)
data->flags |= SCF_TRIE_RESTUDY;
@@ -6595,12 +6596,12 @@ S_add_data(RExC_state_t* const pRExC_state, const char* const s, const U32 n)
PERL_ARGS_ASSERT_ADD_DATA;
Renewc(RExC_rxi->data,
- sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1),
- char, struct reg_data);
+ sizeof(*RExC_rxi->data) + sizeof(void*) * (count + n - 1),
+ char, struct reg_data);
if(count)
- Renew(RExC_rxi->data->what, count + n, U8);
+ Renew(RExC_rxi->data->what, count + n, U8);
else
- Newx(RExC_rxi->data->what, n, U8);
+ Newx(RExC_rxi->data->what, n, U8);
RExC_rxi->data->count = count + n;
Copy(s, RExC_rxi->data->what + count, n, U8);
return count;
@@ -6614,22 +6615,22 @@ Perl_reginitcolors(pTHX)
{
const char * const s = PerlEnv_getenv("PERL_RE_COLORS");
if (s) {
- char *t = savepv(s);
- int i = 0;
- PL_colors[0] = t;
- while (++i < 6) {
- t = strchr(t, '\t');
- if (t) {
- *t = '\0';
- PL_colors[i] = ++t;
- }
- else
- PL_colors[i] = t = (char *)"";
- }
+ char *t = savepv(s);
+ int i = 0;
+ PL_colors[0] = t;
+ while (++i < 6) {
+ t = strchr(t, '\t');
+ if (t) {
+ *t = '\0';
+ PL_colors[i] = ++t;
+ }
+ else
+ PL_colors[i] = t = (char *)"";
+ }
} else {
- int i = 0;
- while (i < 6)
- PL_colors[i++] = (char *)"";
+ int i = 0;
+ while (i < 6)
+ PL_colors[i++] = (char *)"";
}
PL_colorset = 1;
}
@@ -6666,24 +6667,24 @@ regexp_engine const *
Perl_current_re_engine(pTHX)
{
if (IN_PERL_COMPILETIME) {
- HV * const table = GvHV(PL_hintgv);
- SV **ptr;
+ HV * const table = GvHV(PL_hintgv);
+ SV **ptr;
- if (!table || !(PL_hints & HINT_LOCALIZE_HH))
- return &PL_core_reg_engine;
- ptr = hv_fetchs(table, "regcomp", FALSE);
- if ( !(ptr && SvIOK(*ptr) && SvIV(*ptr)))
- return &PL_core_reg_engine;
- return INT2PTR(regexp_engine*, SvIV(*ptr));
+ if (!table || !(PL_hints & HINT_LOCALIZE_HH))
+ return &PL_core_reg_engine;
+ ptr = hv_fetchs(table, "regcomp", FALSE);
+ if ( !(ptr && SvIOK(*ptr) && SvIV(*ptr)))
+ return &PL_core_reg_engine;
+ return INT2PTR(regexp_engine*, SvIV(*ptr));
}
else {
- SV *ptr;
- if (!PL_curcop->cop_hints_hash)
- return &PL_core_reg_engine;
- ptr = cop_hints_fetch_pvs(PL_curcop, "regcomp", 0);
- if ( !(ptr && SvIOK(ptr) && SvIV(ptr)))
- return &PL_core_reg_engine;
- return INT2PTR(regexp_engine*, SvIV(ptr));
+ SV *ptr;
+ if (!PL_curcop->cop_hints_hash)
+ return &PL_core_reg_engine;
+ ptr = cop_hints_fetch_pvs(PL_curcop, "regcomp", 0);
+ if ( !(ptr && SvIOK(ptr) && SvIV(ptr)))
+ return &PL_core_reg_engine;
+ return INT2PTR(regexp_engine*, SvIV(ptr));
}
}
@@ -6699,7 +6700,7 @@ Perl_pregcomp(pTHX_ SV * const pattern, const U32 flags)
/* Dispatch a request to compile a regexp to correct regexp engine. */
DEBUG_COMPILE_r({
Perl_re_printf( aTHX_ "Using engine %" UVxf "\n",
- PTR2UV(eng));
+ PTR2UV(eng));
});
return CALLREGCOMP_ENG(eng, pattern, flags);
}
@@ -6770,7 +6771,7 @@ S_alloc_code_blocks(pTHX_ int ncode)
static void
S_pat_upgrade_to_utf8(pTHX_ RExC_state_t * const pRExC_state,
- char **pat_p, STRLEN *plen_p, int num_code_blocks)
+ char **pat_p, STRLEN *plen_p, int num_code_blocks)
{
U8 *const src = (U8*)*pat_p;
U8 *dst, *d;
@@ -6929,7 +6930,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
oplist = OpSIBLING(oplist);;
}
- /* apply magic and QR overloading to arg */
+ /* apply magic and QR overloading to arg */
SvGETMAGIC(msv);
if (SvROK(msv) && SvAMAGIC(msv)) {
@@ -7061,7 +7062,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
static bool
S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
- char *pat, STRLEN plen)
+ char *pat, STRLEN plen)
{
int n = 0;
STRLEN s;
@@ -7069,21 +7070,21 @@ S_has_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
PERL_UNUSED_CONTEXT;
for (s = 0; s < plen; s++) {
- if ( pRExC_state->code_blocks
+ if ( pRExC_state->code_blocks
&& n < pRExC_state->code_blocks->count
- && s == pRExC_state->code_blocks->cb[n].start)
- {
- s = pRExC_state->code_blocks->cb[n].end;
- n++;
- continue;
- }
- /* TODO ideally should handle [..], (#..), /#.../x to reduce false
- * positives here */
- if (pat[s] == '(' && s+2 <= plen && pat[s+1] == '?' &&
- (pat[s+2] == '{'
+ && s == pRExC_state->code_blocks->cb[n].start)
+ {
+ s = pRExC_state->code_blocks->cb[n].end;
+ n++;
+ continue;
+ }
+ /* TODO ideally should handle [..], (#..), /#.../x to reduce false
+ * positives here */
+ if (pat[s] == '(' && s+2 <= plen && pat[s+1] == '?' &&
+ (pat[s+2] == '{'
|| (s + 2 <= plen && pat[s+2] == '?' && pat[s+3] == '{'))
- )
- return 1;
+ )
+ return 1;
}
return 0;
}
@@ -7120,39 +7121,39 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
DECLARE_AND_GET_RE_DEBUG_FLAGS;
if (pRExC_state->runtime_code_qr) {
- /* this is the second time we've been called; this should
- * only happen if the main pattern got upgraded to utf8
- * during compilation; re-use the qr we compiled first time
- * round (which should be utf8 too)
- */
- qr = pRExC_state->runtime_code_qr;
- pRExC_state->runtime_code_qr = NULL;
- assert(RExC_utf8 && SvUTF8(qr));
+ /* this is the second time we've been called; this should
+ * only happen if the main pattern got upgraded to utf8
+ * during compilation; re-use the qr we compiled first time
+ * round (which should be utf8 too)
+ */
+ qr = pRExC_state->runtime_code_qr;
+ pRExC_state->runtime_code_qr = NULL;
+ assert(RExC_utf8 && SvUTF8(qr));
}
else {
- int n = 0;
- STRLEN s;
- char *p, *newpat;
- int newlen = plen + 7; /* allow for "qr''xx\0" extra chars */
- SV *sv, *qr_ref;
- dSP;
-
- /* determine how many extra chars we need for ' and \ escaping */
- for (s = 0; s < plen; s++) {
- if (pat[s] == '\'' || pat[s] == '\\')
- newlen++;
- }
-
- Newx(newpat, newlen, char);
- p = newpat;
- *p++ = 'q'; *p++ = 'r'; *p++ = '\'';
-
- for (s = 0; s < plen; s++) {
- if ( pRExC_state->code_blocks
- && n < pRExC_state->code_blocks->count
- && s == pRExC_state->code_blocks->cb[n].start)
- {
- /* blank out literal code block so that they aren't
+ int n = 0;
+ STRLEN s;
+ char *p, *newpat;
+ int newlen = plen + 7; /* allow for "qr''xx\0" extra chars */
+ SV *sv, *qr_ref;
+ dSP;
+
+ /* determine how many extra chars we need for ' and \ escaping */
+ for (s = 0; s < plen; s++) {
+ if (pat[s] == '\'' || pat[s] == '\\')
+ newlen++;
+ }
+
+ Newx(newpat, newlen, char);
+ p = newpat;
+ *p++ = 'q'; *p++ = 'r'; *p++ = '\'';
+
+ for (s = 0; s < plen; s++) {
+ if ( pRExC_state->code_blocks
+ && n < pRExC_state->code_blocks->count
+ && s == pRExC_state->code_blocks->cb[n].start)
+ {
+ /* blank out literal code block so that they aren't
* recompiled: eg change from/to:
* /(?{xyz})/
* /(?=====)/
@@ -7163,76 +7164,76 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
* /(?(?{xyz}))/
* /(?(?=====))/
*/
- assert(pat[s] == '(');
- assert(pat[s+1] == '?');
+ assert(pat[s] == '(');
+ assert(pat[s+1] == '?');
*p++ = '(';
*p++ = '?';
s += 2;
- while (s < pRExC_state->code_blocks->cb[n].end) {
- *p++ = '=';
- s++;
- }
+ while (s < pRExC_state->code_blocks->cb[n].end) {
+ *p++ = '=';
+ s++;
+ }
*p++ = ')';
- n++;
- continue;
- }
- if (pat[s] == '\'' || pat[s] == '\\')
- *p++ = '\\';
- *p++ = pat[s];
- }
- *p++ = '\'';
- if (pRExC_state->pm_flags & RXf_PMf_EXTENDED) {
- *p++ = 'x';
+ n++;
+ continue;
+ }
+ if (pat[s] == '\'' || pat[s] == '\\')
+ *p++ = '\\';
+ *p++ = pat[s];
+ }
+ *p++ = '\'';
+ if (pRExC_state->pm_flags & RXf_PMf_EXTENDED) {
+ *p++ = 'x';
if (pRExC_state->pm_flags & RXf_PMf_EXTENDED_MORE) {
*p++ = 'x';
}
}
- *p++ = '\0';
- DEBUG_COMPILE_r({
+ *p++ = '\0';
+ DEBUG_COMPILE_r({
Perl_re_printf( aTHX_
- "%sre-parsing pattern for runtime code:%s %s\n",
- PL_colors[4], PL_colors[5], newpat);
- });
+ "%sre-parsing pattern for runtime code:%s %s\n",
+ PL_colors[4], PL_colors[5], newpat);
+ });
- sv = newSVpvn_flags(newpat, p-newpat-1, RExC_utf8 ? SVf_UTF8 : 0);
- Safefree(newpat);
+ sv = newSVpvn_flags(newpat, p-newpat-1, RExC_utf8 ? SVf_UTF8 : 0);
+ Safefree(newpat);
- ENTER;
- SAVETMPS;
- save_re_context();
- PUSHSTACKi(PERLSI_REQUIRE);
+ ENTER;
+ SAVETMPS;
+ save_re_context();
+ PUSHSTACKi(PERLSI_REQUIRE);
/* G_RE_REPARSING causes the toker to collapse \\ into \ when
* parsing qr''; normally only q'' does this. It also alters
* hints handling */
- eval_sv(sv, G_SCALAR|G_RE_REPARSING);
- SvREFCNT_dec_NN(sv);
- SPAGAIN;
- qr_ref = POPs;
- PUTBACK;
- {
- SV * const errsv = ERRSV;
- if (SvTRUE_NN(errsv))
+ eval_sv(sv, G_SCALAR|G_RE_REPARSING);
+ SvREFCNT_dec_NN(sv);
+ SPAGAIN;
+ qr_ref = POPs;
+ PUTBACK;
+ {
+ SV * const errsv = ERRSV;
+ if (SvTRUE_NN(errsv))
/* use croak_sv ? */
- Perl_croak_nocontext("%" SVf, SVfARG(errsv));
- }
- assert(SvROK(qr_ref));
- qr = SvRV(qr_ref);
- assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp);
- /* the leaving below frees the tmp qr_ref.
- * Give qr a life of its own */
- SvREFCNT_inc(qr);
- POPSTACK;
- FREETMPS;
- LEAVE;
+ Perl_croak_nocontext("%" SVf, SVfARG(errsv));
+ }
+ assert(SvROK(qr_ref));
+ qr = SvRV(qr_ref);
+ assert(SvTYPE(qr) == SVt_REGEXP && RX_ENGINE((REGEXP*)qr)->op_comp);
+ /* the leaving below frees the tmp qr_ref.
+ * Give qr a life of its own */
+ SvREFCNT_inc(qr);
+ POPSTACK;
+ FREETMPS;
+ LEAVE;
}
if (!RExC_utf8 && SvUTF8(qr)) {
- /* first time through; the pattern got upgraded; save the
- * qr for the next time through */
- assert(!pRExC_state->runtime_code_qr);
- pRExC_state->runtime_code_qr = qr;
- return 0;
+ /* first time through; the pattern got upgraded; save the
+ * qr for the next time through */
+ assert(!pRExC_state->runtime_code_qr);
+ pRExC_state->runtime_code_qr = qr;
+ return 0;
}
@@ -7241,17 +7242,17 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
/* merge the main (r1) and run-time (r2) code blocks into one */
{
- RXi_GET_DECL(ReANY((REGEXP *)qr), r2);
- struct reg_code_block *new_block, *dst;
- RExC_state_t * const r1 = pRExC_state; /* convenient alias */
- int i1 = 0, i2 = 0;
+ RXi_GET_DECL(ReANY((REGEXP *)qr), r2);
+ struct reg_code_block *new_block, *dst;
+ RExC_state_t * const r1 = pRExC_state; /* convenient alias */
+ int i1 = 0, i2 = 0;
int r1c, r2c;
- if (!r2->code_blocks || !r2->code_blocks->count) /* we guessed wrong */
- {
- SvREFCNT_dec_NN(qr);
- return 1;
- }
+ if (!r2->code_blocks || !r2->code_blocks->count) /* we guessed wrong */
+ {
+ SvREFCNT_dec_NN(qr);
+ return 1;
+ }
if (!r1->code_blocks)
r1->code_blocks = S_alloc_code_blocks(aTHX_ 0);
@@ -7259,46 +7260,46 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
r1c = r1->code_blocks->count;
r2c = r2->code_blocks->count;
- Newx(new_block, r1c + r2c, struct reg_code_block);
-
- dst = new_block;
-
- while (i1 < r1c || i2 < r2c) {
- struct reg_code_block *src;
- bool is_qr = 0;
-
- if (i1 == r1c) {
- src = &r2->code_blocks->cb[i2++];
- is_qr = 1;
- }
- else if (i2 == r2c)
- src = &r1->code_blocks->cb[i1++];
- else if ( r1->code_blocks->cb[i1].start
- < r2->code_blocks->cb[i2].start)
- {
- src = &r1->code_blocks->cb[i1++];
- assert(src->end < r2->code_blocks->cb[i2].start);
- }
- else {
- assert( r1->code_blocks->cb[i1].start
- > r2->code_blocks->cb[i2].start);
- src = &r2->code_blocks->cb[i2++];
- is_qr = 1;
- assert(src->end < r1->code_blocks->cb[i1].start);
- }
-
- assert(pat[src->start] == '(');
- assert(pat[src->end] == ')');
- dst->start = src->start;
- dst->end = src->end;
- dst->block = src->block;
- dst->src_regex = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr)
- : src->src_regex;
- dst++;
- }
- r1->code_blocks->count += r2c;
- Safefree(r1->code_blocks->cb);
- r1->code_blocks->cb = new_block;
+ Newx(new_block, r1c + r2c, struct reg_code_block);
+
+ dst = new_block;
+
+ while (i1 < r1c || i2 < r2c) {
+ struct reg_code_block *src;
+ bool is_qr = 0;
+
+ if (i1 == r1c) {
+ src = &r2->code_blocks->cb[i2++];
+ is_qr = 1;
+ }
+ else if (i2 == r2c)
+ src = &r1->code_blocks->cb[i1++];
+ else if ( r1->code_blocks->cb[i1].start
+ < r2->code_blocks->cb[i2].start)
+ {
+ src = &r1->code_blocks->cb[i1++];
+ assert(src->end < r2->code_blocks->cb[i2].start);
+ }
+ else {
+ assert( r1->code_blocks->cb[i1].start
+ > r2->code_blocks->cb[i2].start);
+ src = &r2->code_blocks->cb[i2++];
+ is_qr = 1;
+ assert(src->end < r1->code_blocks->cb[i1].start);
+ }
+
+ assert(pat[src->start] == '(');
+ assert(pat[src->end] == ')');
+ dst->start = src->start;
+ dst->end = src->end;
+ dst->block = src->block;
+ dst->src_regex = is_qr ? (REGEXP*) SvREFCNT_inc( (SV*) qr)
+ : src->src_regex;
+ dst++;
+ }
+ r1->code_blocks->count += r2c;
+ Safefree(r1->code_blocks->cb);
+ r1->code_blocks->cb = new_block;
}
SvREFCNT_dec_NN(qr);
@@ -7506,8 +7507,8 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
REGEXP *
Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
- OP *expr, const regexp_engine* eng, REGEXP *old_re,
- bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
+ OP *expr, const regexp_engine* eng, REGEXP *old_re,
+ bool *is_bare_re, const U32 orig_rx_flags, const U32 pm_flags)
{
REGEXP *Rx; /* Capital 'R' means points to a REGEXP */
STRLEN plen;
@@ -7548,19 +7549,19 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
pRExC_state->code_blocks = NULL;
if (is_bare_re)
- *is_bare_re = FALSE;
+ *is_bare_re = FALSE;
if (expr && (expr->op_type == OP_LIST ||
- (expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) {
- /* allocate code_blocks if needed */
- OP *o;
- int ncode = 0;
+ (expr->op_type == OP_NULL && expr->op_targ == OP_LIST))) {
+ /* allocate code_blocks if needed */
+ OP *o;
+ int ncode = 0;
- for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o))
- if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
- ncode++; /* count of DO blocks */
+ for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o))
+ if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
+ ncode++; /* count of DO blocks */
- if (ncode)
+ if (ncode)
pRExC_state->code_blocks = S_alloc_code_blocks(aTHX_ ncode);
}
@@ -7638,15 +7639,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
exp = SvPV_nomg(pat, plen);
if (!eng->op_comp) {
- if ((SvUTF8(pat) && IN_BYTES)
- || SvGMAGICAL(pat) || SvAMAGIC(pat))
- {
- /* make a temporary copy; either to convert to bytes,
- * or to avoid repeating get-magic / overloaded stringify */
- pat = newSVpvn_flags(exp, plen, SVs_TEMP |
- (IN_BYTES ? 0 : SvUTF8(pat)));
- }
- return CALLREGCOMP_ENG(eng, pat, orig_rx_flags);
+ if ((SvUTF8(pat) && IN_BYTES)
+ || SvGMAGICAL(pat) || SvAMAGIC(pat))
+ {
+ /* make a temporary copy; either to convert to bytes,
+ * or to avoid repeating get-magic / overloaded stringify */
+ pat = newSVpvn_flags(exp, plen, SVs_TEMP |
+ (IN_BYTES ? 0 : SvUTF8(pat)));
+ }
+ return CALLREGCOMP_ENG(eng, pat, orig_rx_flags);
}
/* ignore the utf8ness if the pattern is 0 length */
@@ -7690,11 +7691,11 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
* to utf8 */
if ((pm_flags & PMf_USE_RE_EVAL)
- /* this second condition covers the non-regex literal case,
- * i.e. $foo =~ '(?{})'. */
- || (IN_PERL_COMPILETIME && (PL_hints & HINT_RE_EVAL))
+ /* this second condition covers the non-regex literal case,
+ * i.e. $foo =~ '(?{})'. */
+ || (IN_PERL_COMPILETIME && (PL_hints & HINT_RE_EVAL))
)
- runtime_code = S_has_runtime_code(aTHX_ pRExC_state, exp, plen);
+ runtime_code = S_has_runtime_code(aTHX_ pRExC_state, exp, plen);
redo_parse:
/* return old regex if pattern hasn't changed */
@@ -7708,10 +7709,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
&& !recompile
&& !!RX_UTF8(old_re) == !!RExC_utf8
&& ( RX_COMPFLAGS(old_re) == ( orig_rx_flags & RXf_PMf_FLAGCOPYMASK ) )
- && RX_PRECOMP(old_re)
- && RX_PRELEN(old_re) == plen
+ && RX_PRECOMP(old_re)
+ && RX_PRELEN(old_re) == plen
&& memEQ(RX_PRECOMP(old_re), exp, plen)
- && !runtime_code /* with runtime code, always recompile */ )
+ && !runtime_code /* with runtime code, always recompile */ )
{
DEBUG_COMPILE_r({
SV *dsv= sv_newmortal();
@@ -7734,9 +7735,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
&& initial_charset == REGEX_DEPENDS_CHARSET)
{
- /* Set to use unicode semantics if the pattern is in utf8 and has the
- * 'depends' charset specified, as it means unicode when utf8 */
- set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
+ /* Set to use unicode semantics if the pattern is in utf8 and has the
+ * 'depends' charset specified, as it means unicode when utf8 */
+ set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
RExC_uni_semantics = 1;
}
@@ -7744,16 +7745,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
if (runtime_code) {
assert(TAINTING_get || !TAINT_get);
- if (TAINT_get)
- Perl_croak(aTHX_ "Eval-group in insecure regular expression");
+ if (TAINT_get)
+ Perl_croak(aTHX_ "Eval-group in insecure regular expression");
- if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
- /* whoops, we have a non-utf8 pattern, whilst run-time code
- * got compiled as utf8. Try again with a utf8 pattern */
+ if (!S_compile_runtime_code(aTHX_ pRExC_state, exp, plen)) {
+ /* whoops, we have a non-utf8 pattern, whilst run-time code
+ * got compiled as utf8. Try again with a utf8 pattern */
S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &exp, &plen,
pRExC_state->code_blocks ? pRExC_state->code_blocks->count : 0);
goto redo_parse;
- }
+ }
}
assert(!pRExC_state->runtime_code_qr);
@@ -7828,7 +7829,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RXp_COMPFLAGS(RExC_rx) = orig_rx_flags & RXf_PMf_FLAGCOPYMASK;
if (pm_flags & PMf_IS_QR) {
- RExC_rxi->code_blocks = pRExC_state->code_blocks;
+ RExC_rxi->code_blocks = pRExC_state->code_blocks;
if (RExC_rxi->code_blocks) {
RExC_rxi->code_blocks->refcnt++;
}
@@ -7870,7 +7871,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_total_parens = RExC_npar;
}
else if (! MUST_RESTART(flags)) {
- ReREFCNT_dec(Rx);
+ ReREFCNT_dec(Rx);
Perl_croak(aTHX_ "panic: reg returned failure to re_op_compile, flags=%#" UVxf, (UV) flags);
}
@@ -8032,7 +8033,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN;
else
RExC_seen &= ~REG_TOP_LEVEL_BRANCHES_SEEN;
- StructCopy(&zero_scan_data, &data, scan_data_t);
+ StructCopy(&zero_scan_data, &data, scan_data_t);
}
#else
StructCopy(&zero_scan_data, &data, scan_data_t);
@@ -8043,171 +8044,171 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/*dmq: removed as part of de-PMOP: pm->op_pmflags = RExC_flags; */
if (UTF)
- SvUTF8_on(Rx); /* Unicode in it? */
+ SvUTF8_on(Rx); /* Unicode in it? */
RExC_rxi->regstclass = NULL;
if (RExC_naughty >= TOO_NAUGHTY) /* Probably an expensive pattern. */
- RExC_rx->intflags |= PREGf_NAUGHTY;
+ RExC_rx->intflags |= PREGf_NAUGHTY;
scan = RExC_rxi->program + 1; /* First BRANCH. */
/* testing for BRANCH here tells us whether there is "must appear"
data in the pattern. If there is then we can use it for optimisations */
if (!(RExC_seen & REG_TOP_LEVEL_BRANCHES_SEEN)) { /* Only one top-level choice.
*/
- SSize_t fake;
- STRLEN longest_length[2];
- regnode_ssc ch_class; /* pointed to by data */
- int stclass_flag;
- SSize_t last_close = 0; /* pointed to by data */
+ SSize_t fake;
+ STRLEN longest_length[2];
+ regnode_ssc ch_class; /* pointed to by data */
+ int stclass_flag;
+ SSize_t last_close = 0; /* pointed to by data */
regnode *first= scan;
regnode *first_next= regnext(first);
int i;
- /*
- * Skip introductions and multiplicators >= 1
- * so that we can extract the 'meat' of the pattern that must
- * match in the large if() sequence following.
- * NOTE that EXACT is NOT covered here, as it is normally
- * picked up by the optimiser separately.
- *
- * This is unfortunate as the optimiser isnt handling lookahead
- * properly currently.
- *
- */
- while ((OP(first) == OPEN && (sawopen = 1)) ||
- /* An OR of *one* alternative - should not happen now. */
- (OP(first) == BRANCH && OP(first_next) != BRANCH) ||
- /* for now we can't handle lookbehind IFMATCH*/
- (OP(first) == IFMATCH && !first->flags && (sawlookahead = 1)) ||
- (OP(first) == PLUS) ||
- (OP(first) == MINMOD) ||
- /* An {n,m} with n>0 */
- (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ||
- (OP(first) == NOTHING && PL_regkind[OP(first_next)] != END ))
- {
- /*
- * the only op that could be a regnode is PLUS, all the rest
- * will be regnode_1 or regnode_2.
- *
+ /*
+ * Skip introductions and multiplicators >= 1
+ * so that we can extract the 'meat' of the pattern that must
+ * match in the large if() sequence following.
+ * NOTE that EXACT is NOT covered here, as it is normally
+ * picked up by the optimiser separately.
+ *
+ * This is unfortunate as the optimiser isnt handling lookahead
+ * properly currently.
+ *
+ */
+ while ((OP(first) == OPEN && (sawopen = 1)) ||
+ /* An OR of *one* alternative - should not happen now. */
+ (OP(first) == BRANCH && OP(first_next) != BRANCH) ||
+ /* for now we can't handle lookbehind IFMATCH*/
+ (OP(first) == IFMATCH && !first->flags && (sawlookahead = 1)) ||
+ (OP(first) == PLUS) ||
+ (OP(first) == MINMOD) ||
+ /* An {n,m} with n>0 */
+ (PL_regkind[OP(first)] == CURLY && ARG1(first) > 0) ||
+ (OP(first) == NOTHING && PL_regkind[OP(first_next)] != END ))
+ {
+ /*
+ * the only op that could be a regnode is PLUS, all the rest
+ * will be regnode_1 or regnode_2.
+ *
* (yves doesn't think this is true)
- */
- if (OP(first) == PLUS)
- sawplus = 1;
+ */
+ if (OP(first) == PLUS)
+ sawplus = 1;
else {
if (OP(first) == MINMOD)
sawminmod = 1;
- first += regarglen[OP(first)];
+ first += regarglen[OP(first)];
}
- first = NEXTOPER(first);
- first_next= regnext(first);
- }
+ first = NEXTOPER(first);
+ first_next= regnext(first);
+ }
- /* Starting-point info. */
+ /* Starting-point info. */
again:
DEBUG_PEEP("first:", first, 0, 0);
/* Ignore EXACT as we deal with it later. */
- if (PL_regkind[OP(first)] == EXACT) {
- if (! isEXACTFish(OP(first))) {
- NOOP; /* Empty, get anchored substr later. */
+ if (PL_regkind[OP(first)] == EXACT) {
+ if (! isEXACTFish(OP(first))) {
+ NOOP; /* Empty, get anchored substr later. */
}
- else
- RExC_rxi->regstclass = first;
- }
+ else
+ RExC_rxi->regstclass = first;
+ }
#ifdef TRIE_STCLASS
- else if (PL_regkind[OP(first)] == TRIE &&
- ((reg_trie_data *)RExC_rxi->data->data[ ARG(first) ])->minlen>0)
- {
+ else if (PL_regkind[OP(first)] == TRIE &&
+ ((reg_trie_data *)RExC_rxi->data->data[ ARG(first) ])->minlen>0)
+ {
/* this can happen only on restudy */
RExC_rxi->regstclass = construct_ahocorasick_from_trie(pRExC_state, (regnode *)first, 0);
- }
+ }
#endif
- else if (REGNODE_SIMPLE(OP(first)))
- RExC_rxi->regstclass = first;
- else if (PL_regkind[OP(first)] == BOUND ||
- PL_regkind[OP(first)] == NBOUND)
- RExC_rxi->regstclass = first;
- else if (PL_regkind[OP(first)] == BOL) {
+ else if (REGNODE_SIMPLE(OP(first)))
+ RExC_rxi->regstclass = first;
+ else if (PL_regkind[OP(first)] == BOUND ||
+ PL_regkind[OP(first)] == NBOUND)
+ RExC_rxi->regstclass = first;
+ else if (PL_regkind[OP(first)] == BOL) {
RExC_rx->intflags |= (OP(first) == MBOL
? PREGf_ANCH_MBOL
: PREGf_ANCH_SBOL);
- first = NEXTOPER(first);
- goto again;
- }
- else if (OP(first) == GPOS) {
+ first = NEXTOPER(first);
+ goto again;
+ }
+ else if (OP(first) == GPOS) {
RExC_rx->intflags |= PREGf_ANCH_GPOS;
- first = NEXTOPER(first);
- goto again;
- }
- else if ((!sawopen || !RExC_sawback) &&
+ first = NEXTOPER(first);
+ goto again;
+ }
+ else if ((!sawopen || !RExC_sawback) &&
!sawlookahead &&
- (OP(first) == STAR &&
- PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
+ (OP(first) == STAR &&
+ PL_regkind[OP(NEXTOPER(first))] == REG_ANY) &&
!(RExC_rx->intflags & PREGf_ANCH) && !pRExC_state->code_blocks)
- {
- /* turn .* into ^.* with an implied $*=1 */
- const int type =
- (OP(NEXTOPER(first)) == REG_ANY)
+ {
+ /* turn .* into ^.* with an implied $*=1 */
+ const int type =
+ (OP(NEXTOPER(first)) == REG_ANY)
? PREGf_ANCH_MBOL
: PREGf_ANCH_SBOL;
RExC_rx->intflags |= (type | PREGf_IMPLICIT);
- first = NEXTOPER(first);
- goto again;
- }
+ first = NEXTOPER(first);
+ goto again;
+ }
if (sawplus && !sawminmod && !sawlookahead
&& (!sawopen || !RExC_sawback)
- && !pRExC_state->code_blocks) /* May examine pos and $& */
- /* x+ must match at the 1st pos of run of x's */
- RExC_rx->intflags |= PREGf_SKIP;
+ && !pRExC_state->code_blocks) /* May examine pos and $& */
+ /* x+ must match at the 1st pos of run of x's */
+ RExC_rx->intflags |= PREGf_SKIP;
- /* Scan is after the zeroth branch, first is atomic matcher. */
+ /* Scan is after the zeroth branch, first is atomic matcher. */
#ifdef TRIE_STUDY_OPT
- DEBUG_PARSE_r(
- if (!restudied)
+ DEBUG_PARSE_r(
+ if (!restudied)
Perl_re_printf( aTHX_ "first at %" IVdf "\n",
- (IV)(first - scan + 1))
+ (IV)(first - scan + 1))
);
#else
- DEBUG_PARSE_r(
+ DEBUG_PARSE_r(
Perl_re_printf( aTHX_ "first at %" IVdf "\n",
- (IV)(first - scan + 1))
+ (IV)(first - scan + 1))
);
#endif
- /*
- * If there's something expensive in the r.e., find the
- * longest literal string that must appear and make it the
- * regmust. Resolve ties in favor of later strings, since
- * the regstart check works with the beginning of the r.e.
- * and avoiding duplication strengthens checking. Not a
- * strong reason, but sufficient in the absence of others.
- * [Now we resolve ties in favor of the earlier string if
- * it happens that c_offset_min has been invalidated, since the
- * earlier string may buy us something the later one won't.]
- */
-
- data.substrs[0].str = newSVpvs("");
- data.substrs[1].str = newSVpvs("");
- data.last_found = newSVpvs("");
- data.cur_is_floating = 0; /* initially any found substring is fixed */
- ENTER_with_name("study_chunk");
- SAVEFREESV(data.substrs[0].str);
- SAVEFREESV(data.substrs[1].str);
- SAVEFREESV(data.last_found);
- first = scan;
- if (!RExC_rxi->regstclass) {
- ssc_init(pRExC_state, &ch_class);
- data.start_class = &ch_class;
- stclass_flag = SCF_DO_STCLASS_AND;
- } else /* XXXX Check for BOUND? */
- stclass_flag = 0;
- data.last_closep = &last_close;
+ /*
+ * If there's something expensive in the r.e., find the
+ * longest literal string that must appear and make it the
+ * regmust. Resolve ties in favor of later strings, since
+ * the regstart check works with the beginning of the r.e.
+ * and avoiding duplication strengthens checking. Not a
+ * strong reason, but sufficient in the absence of others.
+ * [Now we resolve ties in favor of the earlier string if
+ * it happens that c_offset_min has been invalidated, since the
+ * earlier string may buy us something the later one won't.]
+ */
+
+ data.substrs[0].str = newSVpvs("");
+ data.substrs[1].str = newSVpvs("");
+ data.last_found = newSVpvs("");
+ data.cur_is_floating = 0; /* initially any found substring is fixed */
+ ENTER_with_name("study_chunk");
+ SAVEFREESV(data.substrs[0].str);
+ SAVEFREESV(data.substrs[1].str);
+ SAVEFREESV(data.last_found);
+ first = scan;
+ if (!RExC_rxi->regstclass) {
+ ssc_init(pRExC_state, &ch_class);
+ data.start_class = &ch_class;
+ stclass_flag = SCF_DO_STCLASS_AND;
+ } else /* XXXX Check for BOUND? */
+ stclass_flag = 0;
+ data.last_closep = &last_close;
DEBUG_RExC_seen();
/*
* MAIN ENTRY FOR study_chunk() FOR m/PATTERN/
* (NO top level branches)
*/
- minlen = study_chunk(pRExC_state, &first, &minlen, &fake,
+ minlen = study_chunk(pRExC_state, &first, &minlen, &fake,
scan + RExC_size, /* Up to end */
&data, -1, 0, NULL,
SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
@@ -8218,15 +8219,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
- if ( RExC_total_parens == 1 && !data.cur_is_floating
- && data.last_start_min == 0 && data.last_end > 0
- && !RExC_seen_zerolen
+ if ( RExC_total_parens == 1 && !data.cur_is_floating
+ && data.last_start_min == 0 && data.last_end > 0
+ && !RExC_seen_zerolen
&& !(RExC_seen & REG_VERBARG_SEEN)
&& !(RExC_seen & REG_GPOS_SEEN)
){
- RExC_rx->extflags |= RXf_CHECK_ALL;
+ RExC_rx->extflags |= RXf_CHECK_ALL;
}
- scan_commit(pRExC_state, &data,&minlen, 0);
+ scan_commit(pRExC_state, &data,&minlen, 0);
/* XXX this is done in reverse order because that's the way the
@@ -8263,39 +8264,39 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
}
- LEAVE_with_name("study_chunk");
+ LEAVE_with_name("study_chunk");
- if (RExC_rxi->regstclass
- && (OP(RExC_rxi->regstclass) == REG_ANY || OP(RExC_rxi->regstclass) == SANY))
- RExC_rxi->regstclass = NULL;
+ if (RExC_rxi->regstclass
+ && (OP(RExC_rxi->regstclass) == REG_ANY || OP(RExC_rxi->regstclass) == SANY))
+ RExC_rxi->regstclass = NULL;
- if ((!(RExC_rx->substrs->data[0].substr || RExC_rx->substrs->data[0].utf8_substr)
+ if ((!(RExC_rx->substrs->data[0].substr || RExC_rx->substrs->data[0].utf8_substr)
|| RExC_rx->substrs->data[0].min_offset)
- && stclass_flag
+ && stclass_flag
&& ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
- && is_ssc_worth_it(pRExC_state, data.start_class))
- {
- const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
+ && is_ssc_worth_it(pRExC_state, data.start_class))
+ {
+ const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
ssc_finalize(pRExC_state, data.start_class);
- Newx(RExC_rxi->data->data[n], 1, regnode_ssc);
- StructCopy(data.start_class,
- (regnode_ssc*)RExC_rxi->data->data[n],
- regnode_ssc);
- RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n];
- RExC_rx->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
- DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
+ Newx(RExC_rxi->data->data[n], 1, regnode_ssc);
+ StructCopy(data.start_class,
+ (regnode_ssc*)RExC_rxi->data->data[n],
+ regnode_ssc);
+ RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n];
+ RExC_rx->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
+ DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
regprop(RExC_rx, sv, (regnode*)data.start_class, NULL, pRExC_state);
Perl_re_printf( aTHX_
- "synthetic stclass \"%s\".\n",
- SvPVX_const(sv));});
+ "synthetic stclass \"%s\".\n",
+ SvPVX_const(sv));});
data.start_class = NULL;
- }
+ }
/* A temporary algorithm prefers floated substr to fixed one of
* same length to dig more info. */
- i = (longest_length[0] <= longest_length[1]);
+ i = (longest_length[0] <= longest_length[1]);
RExC_rx->substrs->check_ix = i;
RExC_rx->check_end_shift = RExC_rx->substrs->data[i].end_shift;
RExC_rx->check_substr = RExC_rx->substrs->data[i].substr;
@@ -8305,38 +8306,38 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
if (!i && (RExC_rx->intflags & (PREGf_ANCH_SBOL|PREGf_ANCH_GPOS)))
RExC_rx->intflags |= PREGf_NOSCAN;
- if ((RExC_rx->check_substr || RExC_rx->check_utf8) ) {
- RExC_rx->extflags |= RXf_USE_INTUIT;
- if (SvTAIL(RExC_rx->check_substr ? RExC_rx->check_substr : RExC_rx->check_utf8))
- RExC_rx->extflags |= RXf_INTUIT_TAIL;
- }
+ if ((RExC_rx->check_substr || RExC_rx->check_utf8) ) {
+ RExC_rx->extflags |= RXf_USE_INTUIT;
+ if (SvTAIL(RExC_rx->check_substr ? RExC_rx->check_substr : RExC_rx->check_utf8))
+ RExC_rx->extflags |= RXf_INTUIT_TAIL;
+ }
- /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
- if ( (STRLEN)minlen < longest_length[1] )
+ /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
+ if ( (STRLEN)minlen < longest_length[1] )
minlen= longest_length[1];
if ( (STRLEN)minlen < longest_length[0] )
minlen= longest_length[0];
*/
}
else {
- /* Several toplevels. Best we can is to set minlen. */
- SSize_t fake;
- regnode_ssc ch_class;
- SSize_t last_close = 0;
+ /* Several toplevels. Best we can is to set minlen. */
+ SSize_t fake;
+ regnode_ssc ch_class;
+ SSize_t last_close = 0;
DEBUG_PARSE_r(Perl_re_printf( aTHX_ "\nMulti Top Level\n"));
- scan = RExC_rxi->program + 1;
- ssc_init(pRExC_state, &ch_class);
- data.start_class = &ch_class;
- data.last_closep = &last_close;
+ scan = RExC_rxi->program + 1;
+ ssc_init(pRExC_state, &ch_class);
+ data.start_class = &ch_class;
+ data.last_closep = &last_close;
DEBUG_RExC_seen();
/*
* MAIN ENTRY FOR study_chunk() FOR m/P1|P2|.../
* (patterns WITH top level branches)
*/
- minlen = study_chunk(pRExC_state,
+ minlen = study_chunk(pRExC_state,
&scan, &minlen, &fake, scan + RExC_size, &data, -1, 0, NULL,
SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
? SCF_TRIE_DOING_RESTUDY
@@ -8345,7 +8346,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
CHECK_RESTUDY_GOTO_butfirst(NOOP);
- RExC_rx->check_substr = NULL;
+ RExC_rx->check_substr = NULL;
RExC_rx->check_utf8 = NULL;
RExC_rx->substrs->data[0].substr = NULL;
RExC_rx->substrs->data[0].utf8_substr = NULL;
@@ -8353,25 +8354,25 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_rx->substrs->data[1].utf8_substr = NULL;
if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
- && is_ssc_worth_it(pRExC_state, data.start_class))
+ && is_ssc_worth_it(pRExC_state, data.start_class))
{
- const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
+ const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
ssc_finalize(pRExC_state, data.start_class);
- Newx(RExC_rxi->data->data[n], 1, regnode_ssc);
- StructCopy(data.start_class,
- (regnode_ssc*)RExC_rxi->data->data[n],
- regnode_ssc);
- RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n];
- RExC_rx->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
- DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
+ Newx(RExC_rxi->data->data[n], 1, regnode_ssc);
+ StructCopy(data.start_class,
+ (regnode_ssc*)RExC_rxi->data->data[n],
+ regnode_ssc);
+ RExC_rxi->regstclass = (regnode*)RExC_rxi->data->data[n];
+ RExC_rx->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
+ DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
regprop(RExC_rx, sv, (regnode*)data.start_class, NULL, pRExC_state);
Perl_re_printf( aTHX_
- "synthetic stclass \"%s\".\n",
- SvPVX_const(sv));});
+ "synthetic stclass \"%s\".\n",
+ SvPVX_const(sv));});
data.start_class = NULL;
- }
+ }
}
if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN) {
@@ -8402,16 +8403,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RExC_rx->extflags |= RXf_NO_INPLACE_SUBST; /* inplace might break the
lookbehind */
if (pRExC_state->code_blocks)
- RExC_rx->extflags |= RXf_EVAL_SEEN;
+ RExC_rx->extflags |= RXf_EVAL_SEEN;
if (RExC_seen & REG_VERBARG_SEEN)
{
- RExC_rx->intflags |= PREGf_VERBARG_SEEN;
+ RExC_rx->intflags |= PREGf_VERBARG_SEEN;
RExC_rx->extflags |= RXf_NO_INPLACE_SUBST; /* don't understand this! Yves */
}
if (RExC_seen & REG_CUTGROUP_SEEN)
- RExC_rx->intflags |= PREGf_CUTGROUP_SEEN;
+ RExC_rx->intflags |= PREGf_CUTGROUP_SEEN;
if (pm_flags & PMf_USE_RE_EVAL)
- RExC_rx->intflags |= PREGf_USE_RE_EVAL;
+ RExC_rx->intflags |= PREGf_USE_RE_EVAL;
if (RExC_paren_names)
RXp_PAREN_NAMES(RExC_rx) = MUTABLE_HV(SvREFCNT_inc(RExC_paren_names));
else
@@ -8567,7 +8568,7 @@ Perl_reg_named_buff_iter(pTHX_ REGEXP * const rx, const SV * const lastkey,
SV*
Perl_reg_named_buff_fetch(pTHX_ REGEXP * const r, SV * const namesv,
- const U32 flags)
+ const U32 flags)
{
SV *ret;
struct regexp *const rx = ReANY(r);
@@ -8616,9 +8617,9 @@ Perl_reg_named_buff_exists(pTHX_ REGEXP * const r, SV * const key,
if (flags & RXapif_ALL) {
return hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
} else {
- SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
+ SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
if (sv) {
- SvREFCNT_dec_NN(sv);
+ SvREFCNT_dec_NN(sv);
return TRUE;
} else {
return FALSE;
@@ -8637,11 +8638,11 @@ Perl_reg_named_buff_firstkey(pTHX_ REGEXP * const r, const U32 flags)
PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
if ( rx && RXp_PAREN_NAMES(rx) ) {
- (void)hv_iterinit(RXp_PAREN_NAMES(rx));
+ (void)hv_iterinit(RXp_PAREN_NAMES(rx));
- return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
+ return CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
} else {
- return FALSE;
+ return FALSE;
}
}
@@ -8671,7 +8672,7 @@ Perl_reg_named_buff_nextkey(pTHX_ REGEXP * const r, const U32 flags)
}
}
if (parno || flags & RXapif_ALL) {
- return newSVhek(HeKEY_hek(temphe));
+ return newSVhek(HeKEY_hek(temphe));
}
}
}
@@ -8695,7 +8696,7 @@ Perl_reg_named_buff_scalar(pTHX_ REGEXP * const r, const U32 flags)
ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
av = MUTABLE_AV(SvRV(ret));
length = av_count(av);
- SvREFCNT_dec_NN(ret);
+ SvREFCNT_dec_NN(ret);
return newSViv(length);
} else {
Perl_croak(aTHX_ "panic: Unknown flags %d in named_buff_scalar",
@@ -8743,7 +8744,7 @@ Perl_reg_named_buff_all(pTHX_ REGEXP * const r, const U32 flags)
void
Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
- SV * const sv)
+ SV * const sv)
{
struct regexp *const rx = ReANY(r);
char *s = NULL;
@@ -8782,16 +8783,16 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
&& rx->offs[0].start != -1)
{
/* $`, ${^PREMATCH} */
- i = rx->offs[0].start;
- s = rx->subbeg;
+ i = rx->offs[0].start;
+ s = rx->subbeg;
}
else
if ((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH)
&& rx->offs[0].end != -1)
{
/* $', ${^POSTMATCH} */
- s = rx->subbeg - rx->suboffset + rx->offs[0].end;
- i = rx->sublen + rx->suboffset - rx->offs[0].end;
+ s = rx->subbeg - rx->suboffset + rx->offs[0].end;
+ i = rx->sublen + rx->suboffset - rx->offs[0].end;
}
else
if (inRANGE(n, 0, (I32)rx->nparens) &&
@@ -8848,7 +8849,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
void
Perl_reg_numbered_buff_store(pTHX_ REGEXP * const rx, const I32 paren,
- SV const * const value)
+ SV const * const value)
{
PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE;
@@ -8893,32 +8894,32 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
case RX_BUFF_IDX_PREMATCH: /* $` */
if (rx->offs[0].start != -1) {
- i = rx->offs[0].start;
- if (i > 0) {
- s1 = 0;
- t1 = i;
- goto getlen;
- }
- }
+ i = rx->offs[0].start;
+ if (i > 0) {
+ s1 = 0;
+ t1 = i;
+ goto getlen;
+ }
+ }
return 0;
case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
case RX_BUFF_IDX_POSTMATCH: /* $' */
- if (rx->offs[0].end != -1) {
- i = rx->sublen - rx->offs[0].end;
- if (i > 0) {
- s1 = rx->offs[0].end;
- t1 = rx->sublen;
- goto getlen;
- }
- }
+ if (rx->offs[0].end != -1) {
+ i = rx->sublen - rx->offs[0].end;
+ if (i > 0) {
+ s1 = rx->offs[0].end;
+ t1 = rx->sublen;
+ goto getlen;
+ }
+ }
return 0;
default: /* $& / ${^MATCH}, $1, $2, ... */
- if (paren <= (I32)rx->nparens &&
+ if (paren <= (I32)rx->nparens &&
(s1 = rx->offs[paren].start) != -1 &&
(t1 = rx->offs[paren].end) != -1)
- {
+ {
i = t1 - s1;
goto getlen;
} else {
@@ -8945,11 +8946,11 @@ SV*
Perl_reg_qr_package(pTHX_ REGEXP * const rx)
{
PERL_ARGS_ASSERT_REG_QR_PACKAGE;
- PERL_UNUSED_ARG(rx);
- if (0)
- return NULL;
- else
- return newSVpvs("Regexp");
+ PERL_UNUSED_ARG(rx);
+ if (0)
+ return NULL;
+ else
+ return newSVpvs("Regexp");
}
/* Scans the name of a named buffer from the pattern.
@@ -8977,22 +8978,22 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
else if (isIDFIRST_lazy_if_safe(RExC_parse, RExC_end, UTF)) {
/* Note that the code here assumes well-formed UTF-8. Skip IDFIRST by
* using do...while */
- if (UTF)
- do {
- RExC_parse += UTF8SKIP(RExC_parse);
- } while ( RExC_parse < RExC_end
+ if (UTF)
+ do {
+ RExC_parse += UTF8SKIP(RExC_parse);
+ } while ( RExC_parse < RExC_end
&& isWORDCHAR_utf8_safe((U8*)RExC_parse, (U8*) RExC_end));
- else
- do {
- RExC_parse++;
- } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse));
+ else
+ do {
+ RExC_parse++;
+ } while (RExC_parse < RExC_end && isWORDCHAR(*RExC_parse));
} else {
RExC_parse++; /* so the <- from the vFAIL is after the offending
character */
vFAIL("Group name must start with a non-digit word character");
}
sv_name = newSVpvn_flags(name_start, (int)(RExC_parse - name_start),
- SVs_TEMP | (UTF ? SVf_UTF8 : 0));
+ SVs_TEMP | (UTF ? SVf_UTF8 : 0));
if ( flags == REG_RSN_RETURN_NAME)
return sv_name;
else if (flags==REG_RSN_RETURN_DATA) {
@@ -9312,7 +9313,7 @@ Perl__new_invlist(pTHX_ IV initial_size)
SV* new_list;
if (initial_size < 0) {
- initial_size = 10;
+ initial_size = 10;
}
new_list = newSV_type(SVt_INVLIST);
@@ -9358,7 +9359,7 @@ Perl__new_invlist_C_array(pTHX_ const UV* const list)
SvPV_set(invlist, (char *) (list + HEADER_LENGTH));
SvLEN_set(invlist, 0); /* Means we own the contents, and the system
- shouldn't touch it */
+ shouldn't touch it */
*(get_invlist_offset_addr(invlist)) = offset;
@@ -9398,39 +9399,39 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
array = _invlist_array_init(invlist, ! offset);
}
else {
- /* Here, the existing list is non-empty. The current max entry in the
- * list is generally the first value not in the set, except when the
- * set extends to the end of permissible values, in which case it is
- * the first entry in that final set, and so this call is an attempt to
- * append out-of-order */
-
- UV final_element = len - 1;
- array = invlist_array(invlist);
- if ( array[final_element] > start
- || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
- {
- Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c",
- array[final_element], start,
- ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
- }
+ /* Here, the existing list is non-empty. The current max entry in the
+ * list is generally the first value not in the set, except when the
+ * set extends to the end of permissible values, in which case it is
+ * the first entry in that final set, and so this call is an attempt to
+ * append out-of-order */
+
+ UV final_element = len - 1;
+ array = invlist_array(invlist);
+ if ( array[final_element] > start
+ || ELEMENT_RANGE_MATCHES_INVLIST(final_element))
+ {
+ Perl_croak(aTHX_ "panic: attempting to append to an inversion list, but wasn't at the end of the list, final=%" UVuf ", start=%" UVuf ", match=%c",
+ array[final_element], start,
+ ELEMENT_RANGE_MATCHES_INVLIST(final_element) ? 't' : 'f');
+ }
/* Here, it is a legal append. If the new range begins 1 above the end
* of the range below it, it is extending the range below it, so the
* new first value not in the set is one greater than the newly
* extended range. */
offset = *get_invlist_offset_addr(invlist);
- if (array[final_element] == start) {
- if (end != UV_MAX) {
- array[final_element] = end + 1;
- }
- else {
- /* But if the end is the maximum representable on the machine,
+ if (array[final_element] == start) {
+ if (end != UV_MAX) {
+ array[final_element] = end + 1;
+ }
+ else {
+ /* But if the end is the maximum representable on the machine,
* assume that infinity was actually what was meant. Just let
* the range that this would extend to have no end */
- invlist_set_len(invlist, len - 1, offset);
- }
- return;
- }
+ invlist_set_len(invlist, len - 1, offset);
+ }
+ return;
+ }
}
/* Here the new range doesn't extend any existing set. Add it */
@@ -9440,27 +9441,27 @@ S__append_range_to_invlist(pTHX_ SV* const invlist,
/* If wll overflow the existing space, extend, which may cause the array to
* be moved */
if (max < len) {
- invlist_extend(invlist, len);
+ invlist_extend(invlist, len);
/* Have to set len here to avoid assert failure in invlist_array() */
invlist_set_len(invlist, len, offset);
- array = invlist_array(invlist);
+ array = invlist_array(invlist);
}
else {
- invlist_set_len(invlist, len, offset);
+ invlist_set_len(invlist, len, offset);
}
/* The next item on the list starts the range, the one after that is
* one past the new range. */
array[len - 2] = start;
if (end != UV_MAX) {
- array[len - 1] = end + 1;
+ array[len - 1] = end + 1;
}
else {
- /* But if the end is the maximum representable on the machine, just let
- * the range have no end */
- invlist_set_len(invlist, len - 1, offset);
+ /* But if the end is the maximum representable on the machine, just let
+ * the range have no end */
+ invlist_set_len(invlist, len - 1, offset);
}
}
@@ -9484,7 +9485,7 @@ Perl__invlist_search(SV* const invlist, const UV cp)
/* If list is empty, return failure. */
if (high == 0) {
- return -1;
+ return -1;
}
/* (We can't get the array unless we know the list is non-empty) */
@@ -9535,20 +9536,20 @@ Perl__invlist_search(SV* const invlist, const UV cp)
* The loop below converges on the i+1. Note that there may not be an
* (i+1)th element in the array, and things work nonetheless */
while (low < high) {
- mid = (low + high) / 2;
+ mid = (low + high) / 2;
assert(mid <= highest_element);
- if (array[mid] <= cp) { /* cp >= array[mid] */
- low = mid + 1;
+ if (array[mid] <= cp) { /* cp >= array[mid] */
+ low = mid + 1;
- /* We could do this extra test to exit the loop early.
- if (cp < array[low]) {
- return mid;
- }
- */
- }
- else { /* cp < array[mid] */
- high = mid;
- }
+ /* We could do this extra test to exit the loop early.
+ if (cp < array[low]) {
+ return mid;
+ }
+ */
+ }
+ else { /* cp < array[mid] */
+ high = mid;
+ }
}
found_entry:
@@ -9681,7 +9682,7 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
SvREFCNT_dec_NN(u);
}
- return;
+ return;
}
/* Here both lists exist and are non-empty */
@@ -9692,8 +9693,8 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* up so are looking at b's complement. */
if (complement_b) {
- /* To complement, we invert: if the first element is 0, remove it. To
- * do this, we just pretend the array starts one later */
+ /* To complement, we invert: if the first element is 0, remove it. To
+ * do this, we just pretend the array starts one later */
if (array_b[0] == 0) {
array_b++;
len_b--;
@@ -9718,11 +9719,11 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
/* Go through each input list item by item, stopping when have exhausted
* one of them */
while (i_a < len_a && i_b < len_b) {
- UV cp; /* The element to potentially add to the union's array */
- bool cp_in_set; /* is it in the input list's set or not */
+ UV cp; /* The element to potentially add to the union's array */
+ bool cp_in_set; /* is it in the input list's set or not */
- /* We need to take one or the other of the two inputs for the union.
- * Since we are merging two sorted lists, we take the smaller of the
+ /* We need to take one or the other of the two inputs for the union.
+ * Since we are merging two sorted lists, we take the smaller of the
* next items. In case of a tie, we take first the one that is in its
* set. If we first took the one not in its set, it would decrement
* the count, possibly to 0 which would cause it to be output as ending
@@ -9732,33 +9733,33 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* momentarily decremented to 0, and thus the two adjoining ranges will
* be seamlessly merged. (In a tie and both are in the set or both not
* in the set, it doesn't matter which we take first.) */
- if ( array_a[i_a] < array_b[i_b]
- || ( array_a[i_a] == array_b[i_b]
- && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
- {
- cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
- cp = array_a[i_a++];
- }
- else {
- cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
- cp = array_b[i_b++];
- }
-
- /* Here, have chosen which of the two inputs to look at. Only output
- * if the running count changes to/from 0, which marks the
- * beginning/end of a range that's in the set */
- if (cp_in_set) {
- if (count == 0) {
- array_u[i_u++] = cp;
- }
- count++;
- }
- else {
- count--;
- if (count == 0) {
- array_u[i_u++] = cp;
- }
- }
+ if ( array_a[i_a] < array_b[i_b]
+ || ( array_a[i_a] == array_b[i_b]
+ && ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
+ {
+ cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
+ cp = array_a[i_a++];
+ }
+ else {
+ cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
+ cp = array_b[i_b++];
+ }
+
+ /* Here, have chosen which of the two inputs to look at. Only output
+ * if the running count changes to/from 0, which marks the
+ * beginning/end of a range that's in the set */
+ if (cp_in_set) {
+ if (count == 0) {
+ array_u[i_u++] = cp;
+ }
+ count++;
+ }
+ else {
+ count--;
+ if (count == 0) {
+ array_u[i_u++] = cp;
+ }
+ }
}
@@ -9769,9 +9770,9 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* that list is in its set. (i_a and i_b each currently index the element
* beyond the one we care about.) */
if ( (i_a != len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
- || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
+ || (i_b != len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
{
- count--;
+ count--;
}
/* Above we decremented 'count' if the list that had unexamined elements in
@@ -9801,11 +9802,11 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
else {
IV copy_count = len_a - i_a;
if (copy_count > 0) { /* The non-exhausted input is 'a' */
- Copy(array_a + i_a, array_u + i_u, copy_count, UV);
+ Copy(array_a + i_a, array_u + i_u, copy_count, UV);
}
else { /* The non-exhausted input is b */
copy_count = len_b - i_b;
- Copy(array_b + i_b, array_u + i_u, copy_count, UV);
+ Copy(array_b + i_b, array_u + i_u, copy_count, UV);
}
len_u = i_u + copy_count;
}
@@ -9814,9 +9815,9 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* array_u, so re-find it. (Note that it is unlikely that this will
* change, as we are shrinking the space, not enlarging it) */
if (len_u != _invlist_len(u)) {
- invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
- invlist_trim(u);
- array_u = invlist_array(u);
+ invlist_set_len(u, len_u, *get_invlist_offset_addr(u));
+ invlist_trim(u);
+ array_u = invlist_array(u);
}
if (*output == NULL) { /* Simply return the new inversion list */
@@ -9914,7 +9915,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
}
invlist_clear(*i);
- return;
+ return;
}
/* Here both lists exist and are non-empty */
@@ -9925,8 +9926,8 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* up so are looking at b's complement. */
if (complement_b) {
- /* To complement, we invert: if the first element is 0, remove it. To
- * do this, we just pretend the array starts one later */
+ /* To complement, we invert: if the first element is 0, remove it. To
+ * do this, we just pretend the array starts one later */
if (array_b[0] == 0) {
array_b++;
len_b--;
@@ -9951,12 +9952,12 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
/* Go through each list item by item, stopping when have exhausted one of
* them */
while (i_a < len_a && i_b < len_b) {
- UV cp; /* The element to potentially add to the intersection's
- array */
- bool cp_in_set; /* Is it in the input list's set or not */
+ UV cp; /* The element to potentially add to the intersection's
+ array */
+ bool cp_in_set; /* Is it in the input list's set or not */
- /* We need to take one or the other of the two inputs for the
- * intersection. Since we are merging two sorted lists, we take the
+ /* We need to take one or the other of the two inputs for the
+ * intersection. Since we are merging two sorted lists, we take the
* smaller of the next items. In case of a tie, we take first the one
* that is not in its set (a difference from the union algorithm). If
* we first took the one in its set, it would increment the count,
@@ -9966,33 +9967,33 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* opposite of this, there is no possibility that the count will be
* momentarily incremented to 2. (In a tie and both are in the set or
* both not in the set, it doesn't matter which we take first.) */
- if ( array_a[i_a] < array_b[i_b]
- || ( array_a[i_a] == array_b[i_b]
- && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
- {
- cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
- cp = array_a[i_a++];
- }
- else {
- cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
- cp= array_b[i_b++];
- }
-
- /* Here, have chosen which of the two inputs to look at. Only output
- * if the running count changes to/from 2, which marks the
- * beginning/end of a range that's in the intersection */
- if (cp_in_set) {
- count++;
- if (count == 2) {
- array_r[i_r++] = cp;
- }
- }
- else {
- if (count == 2) {
- array_r[i_r++] = cp;
- }
- count--;
- }
+ if ( array_a[i_a] < array_b[i_b]
+ || ( array_a[i_a] == array_b[i_b]
+ && ! ELEMENT_RANGE_MATCHES_INVLIST(i_a)))
+ {
+ cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_a);
+ cp = array_a[i_a++];
+ }
+ else {
+ cp_in_set = ELEMENT_RANGE_MATCHES_INVLIST(i_b);
+ cp= array_b[i_b++];
+ }
+
+ /* Here, have chosen which of the two inputs to look at. Only output
+ * if the running count changes to/from 2, which marks the
+ * beginning/end of a range that's in the intersection */
+ if (cp_in_set) {
+ count++;
+ if (count == 2) {
+ array_r[i_r++] = cp;
+ }
+ }
+ else {
+ if (count == 2) {
+ array_r[i_r++] = cp;
+ }
+ count--;
+ }
}
@@ -10005,7 +10006,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
if ( (i_a == len_a && PREV_RANGE_MATCHES_INVLIST(i_a))
|| (i_b == len_b && PREV_RANGE_MATCHES_INVLIST(i_b)))
{
- count++;
+ count++;
}
/* Above we incremented 'count' if the exhausted list was in its set. This
@@ -10035,11 +10036,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
else { /* copy the non-exhausted list, unchanged. */
IV copy_count = len_a - i_a;
if (copy_count > 0) { /* a is the one with stuff left */
- Copy(array_a + i_a, array_r + i_r, copy_count, UV);
+ Copy(array_a + i_a, array_r + i_r, copy_count, UV);
}
else { /* b is the one with stuff left */
copy_count = len_b - i_b;
- Copy(array_b + i_b, array_r + i_r, copy_count, UV);
+ Copy(array_b + i_b, array_r + i_r, copy_count, UV);
}
len_r = i_r + copy_count;
}
@@ -10048,9 +10049,9 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
* array_r, so re-find it. (Note that it is unlikely that this will
* change, as we are shrinking the space, not enlarging it) */
if (len_r != _invlist_len(r)) {
- invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
- invlist_trim(r);
- array_r = invlist_array(r);
+ invlist_set_len(r, len_r, *get_invlist_offset_addr(r));
+ invlist_trim(r);
+ array_r = invlist_array(r);
}
if (*i == NULL) { /* Simply return the calculated intersection */
@@ -10099,7 +10100,7 @@ Perl__add_range_to_invlist(pTHX_ SV* invlist, UV start, UV end)
/* This range becomes the whole inversion list if none already existed */
if (invlist == NULL) {
- invlist = _new_invlist(2);
+ invlist = _new_invlist(2);
_append_range_to_invlist(invlist, start, end);
return invlist;
}
@@ -10378,8 +10379,8 @@ Perl__invlist_invert(pTHX_ SV* const invlist)
/* The inverse of matching nothing is matching everything */
if (_invlist_len(invlist) == 0) {
- _append_range_to_invlist(invlist, 0, UV_MAX);
- return;
+ _append_range_to_invlist(invlist, 0, UV_MAX);
+ return;
}
*get_invlist_offset_addr(invlist) = ! *get_invlist_offset_addr(invlist);
@@ -10463,21 +10464,21 @@ S_invlist_contents(pTHX_ SV* const invlist, const bool traditional_style)
invlist_iterinit(invlist);
while (invlist_iternext(invlist, &start, &end)) {
- if (end == UV_MAX) {
- Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c",
+ if (end == UV_MAX) {
+ Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%cINFTY%c",
start, intra_range_delimiter,
inter_range_delimiter);
- }
- else if (end != start) {
- Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
- start,
+ }
+ else if (end != start) {
+ Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c%04" UVXf "%c",
+ start,
intra_range_delimiter,
end, inter_range_delimiter);
- }
- else {
- Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
+ }
+ else {
+ Perl_sv_catpvf(aTHX_ output, "%04" UVXf "%c",
start, inter_range_delimiter);
- }
+ }
}
if (SvCUR(output) && ! traditional_style) {/* Get rid of trailing blank */
@@ -10520,20 +10521,20 @@ Perl__invlist_dump(pTHX_ PerlIO *file, I32 level,
invlist_iterinit(invlist);
while (invlist_iternext(invlist, &start, &end)) {
- if (end == UV_MAX) {
- Perl_dump_indent(aTHX_ level, file,
+ if (end == UV_MAX) {
+ Perl_dump_indent(aTHX_ level, file,
"%s[%" UVuf "] 0x%04" UVXf " .. INFTY\n",
indent, (UV)count, start);
- }
- else if (end != start) {
- Perl_dump_indent(aTHX_ level, file,
+ }
+ else if (end != start) {
+ Perl_dump_indent(aTHX_ level, file,
"%s[%" UVuf "] 0x%04" UVXf " .. 0x%04" UVXf "\n",
- indent, (UV)count, start, end);
- }
- else {
- Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n",
+ indent, (UV)count, start, end);
+ }
+ else {
+ Perl_dump_indent(aTHX_ level, file, "%s[%" UVuf "] 0x%04" UVXf "\n",
indent, (UV)count, start);
- }
+ }
count += 2;
}
}
@@ -10939,7 +10940,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
: WASTED_G;
if (! (wastedflags & wflagbit) ) {
wastedflags |= wflagbit;
- /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
+ /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
vWARN5(
RExC_parse + 1,
"Useless (%s%c) - %suse /%c modifier",
@@ -10959,7 +10960,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
if (ckWARN(WARN_REGEXP)) {
if (! (wastedflags & WASTED_C) ) {
wastedflags |= WASTED_GC;
- /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
+ /* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
vWARN3(
RExC_parse + 1,
"Useless (%sc) - %suse /gc modifier",
@@ -11020,7 +11021,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
default:
fail_modifiers:
RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
- /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
+ /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
vFAIL2utf8f("Sequence (%" UTF8f "...) not recognized",
UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
NOT_REACHED; /*NOTREACHED*/
@@ -11171,7 +11172,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
&& *(RExC_parse - 1) != '(';
if (RExC_parse >= RExC_end) {
- vFAIL("Unmatched (");
+ vFAIL("Unmatched (");
}
if (paren == 'r') { /* Atomic script run */
@@ -11179,10 +11180,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
goto parse_rest;
}
else if ( *RExC_parse == '*') { /* (*VERB:ARG), (*construct:...) */
- char *start_verb = RExC_parse + 1;
- STRLEN verb_len;
- char *start_arg = NULL;
- unsigned char op = 0;
+ char *start_verb = RExC_parse + 1;
+ STRLEN verb_len;
+ char *start_arg = NULL;
+ unsigned char op = 0;
int arg_required = 0;
int internal_argval = -1; /* if >-1 we are not allowed an argument*/
bool has_upper = FALSE;
@@ -11199,11 +11200,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
vFAIL("In '(*...)', the '(' and '*' must be adjacent");
}
}
- while (RExC_parse < RExC_end && *RExC_parse != ')' ) {
- if ( *RExC_parse == ':' ) {
- start_arg = RExC_parse + 1;
- break;
- }
+ while (RExC_parse < RExC_end && *RExC_parse != ')' ) {
+ if ( *RExC_parse == ':' ) {
+ start_arg = RExC_parse + 1;
+ break;
+ }
else if (! UTF) {
if (isUPPER(*RExC_parse)) {
has_upper = TRUE;
@@ -11213,18 +11214,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
else {
RExC_parse += UTF8SKIP(RExC_parse);
}
- }
- verb_len = RExC_parse - start_verb;
- if ( start_arg ) {
+ }
+ verb_len = RExC_parse - start_verb;
+ if ( start_arg ) {
if (RExC_parse >= RExC_end) {
goto unterminated_verb_pattern;
}
- RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
- while ( RExC_parse < RExC_end && *RExC_parse != ')' ) {
+ RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+ while ( RExC_parse < RExC_end && *RExC_parse != ')' ) {
RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
}
- if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
+ if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
unterminated_verb_pattern:
if (has_upper) {
vFAIL("Unterminated verb pattern argument");
@@ -11233,8 +11234,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
vFAIL("Unterminated '(*...' argument");
}
}
- } else {
- if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
+ } else {
+ if ( RExC_parse >= RExC_end || *RExC_parse != ')' ) {
if (has_upper) {
vFAIL("Unterminated verb pattern");
}
@@ -11242,29 +11243,29 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
vFAIL("Unterminated '(*...' construct");
}
}
- }
+ }
/* Here, we know that RExC_parse < RExC_end */
- switch ( *start_verb ) {
+ switch ( *start_verb ) {
case 'A': /* (*ACCEPT) */
if ( memEQs(start_verb, verb_len,"ACCEPT") ) {
- op = ACCEPT;
- internal_argval = RExC_nestroot;
- }
- break;
+ op = ACCEPT;
+ internal_argval = RExC_nestroot;
+ }
+ break;
case 'C': /* (*COMMIT) */
if ( memEQs(start_verb, verb_len,"COMMIT") )
op = COMMIT;
break;
case 'F': /* (*FAIL) */
if ( verb_len==1 || memEQs(start_verb, verb_len,"FAIL") ) {
- op = OPFAIL;
- }
- break;
+ op = OPFAIL;
+ }
+ break;
case ':': /* (*:NAME) */
- case 'M': /* (*MARK:NAME) */
- if ( verb_len==0 || memEQs(start_verb, verb_len,"MARK") ) {
+ case 'M': /* (*MARK:NAME) */
+ if ( verb_len==0 || memEQs(start_verb, verb_len,"MARK") ) {
op = MARKPOINT;
arg_required = 1;
}
@@ -11421,7 +11422,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
ret=reganode(pRExC_state, OPFAIL, 0);
nextchar(pRExC_state);
return ret;
- }
+ }
RExC_parse = start_arg;
goto parse_rest;
@@ -11430,11 +11431,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
vFAIL2utf8f(
"'(*%" UTF8f "' requires a terminating ':'",
UTF8fARG(UTF, verb_len, start_verb));
- NOT_REACHED; /*NOTREACHED*/
+ NOT_REACHED; /*NOTREACHED*/
- } /* End of switch */
- if ( ! op ) {
- RExC_parse += UTF
+ } /* End of switch */
+ if ( ! op ) {
+ RExC_parse += UTF
? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
: 1;
if (has_upper || verb_len == 0) {
@@ -11447,7 +11448,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
"Unknown '(*...)' construct '%" UTF8f "'",
UTF8fARG(UTF, verb_len, start_verb));
}
- }
+ }
if ( RExC_parse == start_arg ) {
start_arg = NULL;
}
@@ -11473,12 +11474,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
if ( internal_argval != -1 )
ARG2L_SET(REGNODE_p(ret), internal_argval);
- nextchar(pRExC_state);
- return ret;
+ nextchar(pRExC_state);
+ return ret;
}
else if (*RExC_parse == '?') { /* (?...) */
- bool is_logical = 0;
- const char * const seqstart = RExC_parse;
+ bool is_logical = 0;
+ const char * const seqstart = RExC_parse;
const char * endptr;
const char non_existent_group_msg[]
= "Reference to nonexistent group";
@@ -11489,24 +11490,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
vFAIL("In '(?...)', the '(' and '?' must be adjacent");
}
- RExC_parse++; /* past the '?' */
+ RExC_parse++; /* past the '?' */
paren = *RExC_parse; /* might be a trailing NUL, if not
well-formed */
RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
if (RExC_parse > RExC_end) {
paren = '\0';
}
- ret = 0; /* For look-ahead/behind. */
- switch (paren) {
+ ret = 0; /* For look-ahead/behind. */
+ switch (paren) {
- case 'P': /* (?P...) variants for those used to PCRE/Python */
- paren = *RExC_parse;
- if ( paren == '<') { /* (?P<...>) named capture */
+ case 'P': /* (?P...) variants for those used to PCRE/Python */
+ paren = *RExC_parse;
+ if ( paren == '<') { /* (?P<...>) named capture */
RExC_parse++;
if (RExC_parse >= RExC_end) {
vFAIL("Sequence (?P<... not terminated");
}
- goto named_capture;
+ goto named_capture;
}
else if (paren == '>') { /* (?P>name) named recursion */
RExC_parse++;
@@ -11522,33 +11523,33 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
/* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
- vFAIL3("Sequence (%.*s...) not recognized",
+ vFAIL3("Sequence (%.*s...) not recognized",
(int) (RExC_parse - seqstart), seqstart);
- NOT_REACHED; /*NOTREACHED*/
+ NOT_REACHED; /*NOTREACHED*/
case '<': /* (?<...) */
/* If you want to support (?<*...), first reconcile with GH #17363 */
- if (*RExC_parse == '!')
- paren = ',';
- else if (*RExC_parse != '=')
+ if (*RExC_parse == '!')
+ paren = ',';
+ else if (*RExC_parse != '=')
named_capture:
- { /* (?<...>) */
- char *name_start;
- SV *svname;
- paren= '>';
+ { /* (?<...>) */
+ char *name_start;
+ SV *svname;
+ paren= '>';
/* FALLTHROUGH */
case '\'': /* (?'...') */
name_start = RExC_parse;
svname = reg_scan_name(pRExC_state, REG_RSN_RETURN_NAME);
- if ( RExC_parse == name_start
+ if ( RExC_parse == name_start
|| RExC_parse >= RExC_end
|| *RExC_parse != paren)
{
- vFAIL2("Sequence (?%c... not terminated",
- paren=='>' ? '<' : (char) paren);
+ vFAIL2("Sequence (?%c... not terminated",
+ paren=='>' ? '<' : (char) paren);
}
- {
- HE *he_str;
- SV *sv_dat = NULL;
+ {
+ HE *he_str;
+ SV *sv_dat = NULL;
if (!svname) /* shouldn't happen */
Perl_croak(aTHX_
"panic: reg_scan_name returned NULL");
@@ -11607,56 +11608,56 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
/*sv_dump(sv_dat);*/
}
nextchar(pRExC_state);
- paren = 1;
- goto capturing_parens;
- }
+ paren = 1;
+ goto capturing_parens;
+ }
RExC_seen |= REG_LOOKBEHIND_SEEN;
- RExC_in_lookaround++;
- RExC_parse++;
+ RExC_in_lookaround++;
+ RExC_parse++;
if (RExC_parse >= RExC_end) {
vFAIL("Sequence (?... not terminated");
}
RExC_seen_zerolen++;
break;
- case '=': /* (?=...) */
- RExC_seen_zerolen++;
+ case '=': /* (?=...) */
+ RExC_seen_zerolen++;
RExC_in_lookaround++;
break;
- case '!': /* (?!...) */
- RExC_seen_zerolen++;
- /* check if we're really just a "FAIL" assertion */
+ case '!': /* (?!...) */
+ RExC_seen_zerolen++;
+ /* check if we're really just a "FAIL" assertion */
skip_to_be_ignored_text(pRExC_state, &RExC_parse,
FALSE /* Don't force to /x */ );
- if (*RExC_parse == ')') {
+ if (*RExC_parse == ')') {
ret=reganode(pRExC_state, OPFAIL, 0);
- nextchar(pRExC_state);
- return ret;
- }
+ nextchar(pRExC_state);
+ return ret;
+ }
RExC_in_lookaround++;
- break;
- case '|': /* (?|...) */
- /* branch reset, behave like a (?:...) except that
- buffers in alternations share the same numbers */
- paren = ':';
- after_freeze = freeze_paren = RExC_npar;
+ break;
+ case '|': /* (?|...) */
+ /* branch reset, behave like a (?:...) except that
+ buffers in alternations share the same numbers */
+ paren = ':';
+ after_freeze = freeze_paren = RExC_npar;
/* XXX This construct currently requires an extra pass.
* Investigation would be required to see if that could be
* changed */
REQUIRE_PARENS_PASS;
- break;
- case ':': /* (?:...) */
- case '>': /* (?>...) */
- break;
- case '$': /* (?$...) */
- case '@': /* (?@...) */
- vFAIL2("Sequence (?%c...) not implemented", (int)paren);
- break;
- case '0' : /* (?0) */
- case 'R' : /* (?R) */
+ break;
+ case ':': /* (?:...) */
+ case '>': /* (?>...) */
+ break;
+ case '$': /* (?$...) */
+ case '@': /* (?@...) */
+ vFAIL2("Sequence (?%c...) not implemented", (int)paren);
+ break;
+ case '0' : /* (?0) */
+ case 'R' : /* (?R) */
if (RExC_parse == RExC_end || *RExC_parse != ')')
- FAIL("Sequence (?R) not terminated");
+ FAIL("Sequence (?R) not terminated");
num = 0;
RExC_seen |= REG_RECURSE_SEEN;
@@ -11664,9 +11665,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
* It probably could be changed */
REQUIRE_PARENS_PASS;
- *flagp |= POSTPONED;
+ *flagp |= POSTPONED;
goto gen_recurse_regop;
- /*notreached*/
+ /*notreached*/
/* named and numeric backreferences */
case '&': /* (?&NAME) */
parse_start = RExC_parse - 1;
@@ -11694,8 +11695,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
/* FALLTHROUGH */
case '1': case '2': case '3': case '4': /* (?1) */
- case '5': case '6': case '7': case '8': case '9':
- RExC_parse = (char *) seqstart + 1; /* Point to the digit */
+ case '5': case '6': case '7': case '8': case '9':
+ RExC_parse = (char *) seqstart + 1; /* Point to the digit */
parse_recursion:
{
bool is_neg = FALSE;
@@ -11725,8 +11726,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
num = -num;
}
}
- if (*RExC_parse!=')')
- vFAIL("Expecting close bracket");
+ if (*RExC_parse!=')')
+ vFAIL("Expecting close bracket");
gen_recurse_regop:
if (paren == '-' || paren == '+') {
@@ -11801,7 +11802,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
Set_Node_Length(REGNODE_p(ret),
1 + regarglen[OP(REGNODE_p(ret))]); /* MJD */
- Set_Node_Offset(REGNODE_p(ret), parse_start); /* MJD */
+ Set_Node_Offset(REGNODE_p(ret), parse_start); /* MJD */
*flagp |= POSTPONED;
assert(*RExC_parse == ')');
@@ -11810,43 +11811,43 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
/* NOTREACHED */
- case '?': /* (??...) */
- is_logical = 1;
- if (*RExC_parse != '{') {
+ case '?': /* (??...) */
+ is_logical = 1;
+ if (*RExC_parse != '{') {
RExC_parse += SKIP_IF_CHAR(RExC_parse, RExC_end);
/* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
vFAIL2utf8f(
"Sequence (%" UTF8f "...) not recognized",
UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
- NOT_REACHED; /*NOTREACHED*/
- }
- *flagp |= POSTPONED;
- paren = '{';
+ NOT_REACHED; /*NOTREACHED*/
+ }
+ *flagp |= POSTPONED;
+ paren = '{';
RExC_parse++;
- /* FALLTHROUGH */
- case '{': /* (?{...}) */
- {
- U32 n = 0;
- struct reg_code_block *cb;
+ /* FALLTHROUGH */
+ case '{': /* (?{...}) */
+ {
+ U32 n = 0;
+ struct reg_code_block *cb;
OP * o;
- RExC_seen_zerolen++;
+ RExC_seen_zerolen++;
- if ( !pRExC_state->code_blocks
- || pRExC_state->code_index
+ if ( !pRExC_state->code_blocks
+ || pRExC_state->code_index
>= pRExC_state->code_blocks->count
- || pRExC_state->code_blocks->cb[pRExC_state->code_index].start
- != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
- - RExC_start)
- ) {
- if (RExC_pm_flags & PMf_USE_RE_EVAL)
- FAIL("panic: Sequence (?{...}): no code block found\n");
- FAIL("Eval-group not allowed at runtime, use re 'eval'");
- }
- /* this is a pre-compiled code block (?{...}) */
- cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
- RExC_parse = RExC_start + cb->end;
- o = cb->block;
+ || pRExC_state->code_blocks->cb[pRExC_state->code_index].start
+ != (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
+ - RExC_start)
+ ) {
+ if (RExC_pm_flags & PMf_USE_RE_EVAL)
+ FAIL("panic: Sequence (?{...}): no code block found\n");
+ FAIL("Eval-group not allowed at runtime, use re 'eval'");
+ }
+ /* this is a pre-compiled code block (?{...}) */
+ cb = &pRExC_state->code_blocks->cb[pRExC_state->code_index];
+ RExC_parse = RExC_start + cb->end;
+ o = cb->block;
if (cb->src_regex) {
n = add_data(pRExC_state, STR_WITH_LEN("rl"));
RExC_rxi->data->data[n] =
@@ -11858,12 +11859,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
(RExC_pm_flags & PMf_HAS_CV) ? "L" : "l", 1);
RExC_rxi->data->data[n] = (void*)o;
}
- pRExC_state->code_index++;
- nextchar(pRExC_state);
+ pRExC_state->code_index++;
+ nextchar(pRExC_state);
- if (is_logical) {
+ if (is_logical) {
regnode_offset eval;
- ret = reg_node(pRExC_state, LOGICAL);
+ ret = reg_node(pRExC_state, LOGICAL);
eval = reg2Lanode(pRExC_state, EVAL,
n,
@@ -11877,24 +11878,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
REQUIRE_BRANCHJ(flagp, 0);
}
/* deal with the length of this later - MJD */
- return ret;
- }
- ret = reg2Lanode(pRExC_state, EVAL, n, 0);
- Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1);
- Set_Node_Offset(REGNODE_p(ret), parse_start);
- return ret;
- }
- case '(': /* (?(?{...})...) and (?(?=...)...) */
- {
- int is_define= 0;
+ return ret;
+ }
+ ret = reg2Lanode(pRExC_state, EVAL, n, 0);
+ Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1);
+ Set_Node_Offset(REGNODE_p(ret), parse_start);
+ return ret;
+ }
+ case '(': /* (?(?{...})...) and (?(?=...)...) */
+ {
+ int is_define= 0;
const int DEFINE_len = sizeof("DEFINE") - 1;
- if ( RExC_parse < RExC_end - 1
+ if ( RExC_parse < RExC_end - 1
&& ( ( RExC_parse[0] == '?' /* (?(?...)) */
&& ( RExC_parse[1] == '='
|| RExC_parse[1] == '!'
|| RExC_parse[1] == '<'
|| RExC_parse[1] == '{'))
- || ( RExC_parse[0] == '*' /* (?(*...)) */
+ || ( RExC_parse[0] == '*' /* (?(*...)) */
&& ( memBEGINs(RExC_parse + 1,
(Size_t) (RExC_end - (RExC_parse + 1)),
"pla:")
@@ -11933,14 +11934,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
goto insert_if;
}
- else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */
- || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
- {
- char ch = RExC_parse[0] == '<' ? '>' : '\'';
- char *name_start= RExC_parse++;
- U32 num = 0;
- SV *sv_dat=reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
- if ( RExC_parse == name_start
+ else if ( RExC_parse[0] == '<' /* (?(<NAME>)...) */
+ || RExC_parse[0] == '\'' ) /* (?('NAME')...) */
+ {
+ char ch = RExC_parse[0] == '<' ? '>' : '\'';
+ char *name_start= RExC_parse++;
+ U32 num = 0;
+ SV *sv_dat=reg_scan_name(pRExC_state, REG_RSN_RETURN_DATA);
+ if ( RExC_parse == name_start
|| RExC_parse >= RExC_end
|| *RExC_parse != ch)
{
@@ -11955,23 +11956,23 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
ret = reganode(pRExC_state, GROUPPN, num);
goto insert_if_check_paren;
- }
- else if (memBEGINs(RExC_parse,
+ }
+ else if (memBEGINs(RExC_parse,
(STRLEN) (RExC_end - RExC_parse),
"DEFINE"))
{
- ret = reganode(pRExC_state, DEFINEP, 0);
- RExC_parse += DEFINE_len;
- is_define = 1;
- goto insert_if_check_paren;
- }
- else if (RExC_parse[0] == 'R') {
- RExC_parse++;
+ ret = reganode(pRExC_state, DEFINEP, 0);
+ RExC_parse += DEFINE_len;
+ is_define = 1;
+ goto insert_if_check_paren;
+ }
+ else if (RExC_parse[0] == 'R') {
+ RExC_parse++;
/* parno == 0 => /(?(R)YES|NO)/ "in any form of recursion OR eval"
* parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)"
* parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)"
*/
- parno = 0;
+ parno = 0;
if (RExC_parse[0] == '0') {
parno = 1;
RExC_parse++;
@@ -11986,20 +11987,20 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
RExC_parse = (char*)endptr;
}
/* else "Switch condition not recognized" below */
- } else if (RExC_parse[0] == '&') {
- SV *sv_dat;
- RExC_parse++;
- sv_dat = reg_scan_name(pRExC_state,
+ } else if (RExC_parse[0] == '&') {
+ SV *sv_dat;
+ RExC_parse++;
+ sv_dat = reg_scan_name(pRExC_state,
REG_RSN_RETURN_DATA);
if (sv_dat)
parno = 1 + *((I32 *)SvPVX(sv_dat));
- }
- ret = reganode(pRExC_state, INSUBP, parno);
- goto insert_if_check_paren;
- }
+ }
+ ret = reganode(pRExC_state, INSUBP, parno);
+ goto insert_if_check_paren;
+ }
else if (inRANGE(RExC_parse[0], '1', '9')) {
/* (?(1)...) */
- char c;
+ char c;
UV uv;
endptr = RExC_end;
if (grok_atoUV(RExC_parse, &uv, &endptr)
@@ -12014,21 +12015,21 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
ret = reganode(pRExC_state, GROUPP, parno);
insert_if_check_paren:
- if (UCHARAT(RExC_parse) != ')') {
+ if (UCHARAT(RExC_parse) != ')') {
RExC_parse += UTF
? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
: 1;
- vFAIL("Switch condition not recognized");
- }
- nextchar(pRExC_state);
- insert_if:
+ vFAIL("Switch condition not recognized");
+ }
+ nextchar(pRExC_state);
+ insert_if:
if (! REGTAIL(pRExC_state, ret, reganode(pRExC_state,
IFTHEN, 0)))
{
REQUIRE_BRANCHJ(flagp, 0);
}
br = regbranch(pRExC_state, &flags, 1, depth+1);
- if (br == 0) {
+ if (br == 0) {
RETURN_FAIL_ON_RESTART(flags,flagp);
FAIL2("panic: regbranch returned failure, flags=%#" UVxf,
(UV) flags);
@@ -12038,13 +12039,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
{
REQUIRE_BRANCHJ(flagp, 0);
}
- c = UCHARAT(RExC_parse);
+ c = UCHARAT(RExC_parse);
nextchar(pRExC_state);
- if (flags&HASWIDTH)
- *flagp |= HASWIDTH;
- if (c == '|') {
- if (is_define)
- vFAIL("(?(DEFINE)....) does not allow branches");
+ if (flags&HASWIDTH)
+ *flagp |= HASWIDTH;
+ if (c == '|') {
+ if (is_define)
+ vFAIL("(?(DEFINE)....) does not allow branches");
/* Fake one for optimizer. */
lastbr = reganode(pRExC_state, IFTHEN, 0);
@@ -12058,23 +12059,23 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
REQUIRE_BRANCHJ(flagp, 0);
}
if (flags&HASWIDTH)
- *flagp |= HASWIDTH;
+ *flagp |= HASWIDTH;
c = UCHARAT(RExC_parse);
nextchar(pRExC_state);
- }
- else
- lastbr = 0;
+ }
+ else
+ lastbr = 0;
if (c != ')') {
if (RExC_parse >= RExC_end)
vFAIL("Switch (?(condition)... not terminated");
else
vFAIL("Switch (?(condition)... contains too many branches");
}
- ender = reg_node(pRExC_state, TAIL);
+ ender = reg_node(pRExC_state, TAIL);
if (! REGTAIL(pRExC_state, br, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
- if (lastbr) {
+ if (lastbr) {
if (! REGTAIL(pRExC_state, lastbr, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
@@ -12086,8 +12087,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
{
REQUIRE_BRANCHJ(flagp, 0);
}
- }
- else
+ }
+ else
if (! REGTAIL(pRExC_state, ret, ender)) {
REQUIRE_BRANCHJ(flagp, 0);
}
@@ -12096,18 +12097,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
For large programs it seems to be required
but I can't figure out why. -- dmq*/
#endif
- return ret;
- }
+ return ret;
+ }
RExC_parse += UTF
? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
: 1;
vFAIL("Unknown switch condition (?(...))");
- }
- case '[': /* (?[ ... ]) */
+ }
+ case '[': /* (?[ ... ]) */
return handle_regex_sets(pRExC_state, NULL, flagp, depth+1,
oregcomp_parse);
case 0: /* A NUL */
- RExC_parse--; /* for vFAIL to print correctly */
+ RExC_parse--; /* for vFAIL to print correctly */
vFAIL("Sequence (? incomplete");
break;
@@ -12117,11 +12118,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
/* FALLTHROUGH */
case '*': /* If you want to support (?*...), first reconcile with GH #17363 */
- /* FALLTHROUGH */
- default: /* e.g., (?i) */
- RExC_parse = (char *) seqstart + 1;
+ /* FALLTHROUGH */
+ default: /* e.g., (?i) */
+ RExC_parse = (char *) seqstart + 1;
parse_flags:
- parse_lparen_question_flags(pRExC_state);
+ parse_lparen_question_flags(pRExC_state);
if (UCHARAT(RExC_parse) != ':') {
if (RExC_parse < RExC_end)
nextchar(pRExC_state);
@@ -12133,11 +12134,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
ret = 0;
goto parse_rest;
} /* end switch */
- }
+ }
else if (!(RExC_flags & RXf_PMf_NOCAPTURE)) { /* (...) */
- capturing_parens:
- parno = RExC_npar;
- RExC_npar++;
+ capturing_parens:
+ parno = RExC_npar;
+ RExC_npar++;
if (! ALL_PARENS_COUNTED) {
/* If we are in our first pass through (and maybe only pass),
* we need to allocate memory for the capturing parentheses
@@ -12182,7 +12183,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
}
- ret = reganode(pRExC_state, OPEN, parno);
+ ret = reganode(pRExC_state, OPEN, parno);
if (!RExC_nestroot)
RExC_nestroot = parno;
if (RExC_open_parens && !RExC_open_parens[parno])
@@ -12196,15 +12197,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
Set_Node_Offset(REGNODE_p(ret), RExC_parse); /* MJD */
- is_open = 1;
- } else {
+ is_open = 1;
+ } else {
/* with RXf_PMf_NOCAPTURE treat (...) as (?:...) */
paren = ':';
- ret = 0;
+ ret = 0;
}
}
else /* ! paren */
- ret = 0;
+ ret = 0;
parse_rest:
/* Pick up the branches, linking them together. */
@@ -12218,18 +12219,18 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
}
if (*RExC_parse == '|') {
- if (RExC_use_BRANCHJ) {
- reginsert(pRExC_state, BRANCHJ, br, depth+1);
- }
- else { /* MJD */
- reginsert(pRExC_state, BRANCH, br, depth+1);
+ if (RExC_use_BRANCHJ) {
+ reginsert(pRExC_state, BRANCHJ, br, depth+1);
+ }
+ else { /* MJD */
+ reginsert(pRExC_state, BRANCH, br, depth+1);
Set_Node_Length(REGNODE_p(br), paren != 0);
Set_Node_Offset_To_R(br, parse_start-RExC_start);
}
- have_branch = 1;
+ have_branch = 1;
}
else if (paren == ':') {
- *flagp |= flags&SIMPLE;
+ *flagp |= flags&SIMPLE;
}
if (is_open) { /* Starts with OPEN. */
if (! REGTAIL(pRExC_state, ret, br)) { /* OPEN -> first. */
@@ -12237,82 +12238,82 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
}
}
else if (paren != '?') /* Not Conditional */
- ret = br;
+ ret = br;
*flagp |= flags & (HASWIDTH | POSTPONED);
lastbr = br;
while (*RExC_parse == '|') {
- if (RExC_use_BRANCHJ) {
+ if (RExC_use_BRANCHJ) {
bool shut_gcc_up;
- ender = reganode(pRExC_state, LONGJMP, 0);
+ ender = reganode(pRExC_state, LONGJMP, 0);
/* Append to the previous. */
shut_gcc_up = REGTAIL(pRExC_state,
REGNODE_OFFSET(NEXTOPER(NEXTOPER(REGNODE_p(lastbr)))),
ender);
PERL_UNUSED_VAR(shut_gcc_up);
- }
- nextchar(pRExC_state);
- if (freeze_paren) {
- if (RExC_npar > after_freeze)
- after_freeze = RExC_npar;
+ }
+ nextchar(pRExC_state);
+ if (freeze_paren) {
+ if (RExC_npar > after_freeze)
+ after_freeze = RExC_npar;
RExC_npar = freeze_paren;
}
br = regbranch(pRExC_state, &flags, 0, depth+1);
- if (br == 0) {
+ if (br == 0) {
RETURN_FAIL_ON_RESTART(flags, flagp);
FAIL2("panic: regbranch returned failure, flags=%#" UVxf, (UV) flags);
}
if (! REGTAIL(pRExC_state, lastbr, br)) { /* BRANCH -> BRANCH. */
REQUIRE_BRANCHJ(flagp, 0);
}
- lastbr = br;
- *flagp |= flags & (HASWIDTH | POSTPONED);
+ lastbr = br;
+ *flagp |= flags & (HASWIDTH | POSTPONED);
}
if (have_branch || paren != ':') {
regnode * br;
- /* Make a closing node, and hook it on the end. */
- switch (paren) {
- case ':':
- ender = reg_node(pRExC_state, TAIL);
- break;
- case 1: case 2:
- ender = reganode(pRExC_state, CLOSE, parno);
+ /* Make a closing node, and hook it on the end. */
+ switch (paren) {
+ case ':':
+ ender = reg_node(pRExC_state, TAIL);
+ break;
+ case 1: case 2:
+ ender = reganode(pRExC_state, CLOSE, parno);
if ( RExC_close_parens ) {
DEBUG_OPTIMISE_MORE_r(Perl_re_printf( aTHX_
"%*s%*s Setting close paren #%" IVdf " to %zu\n",
22, "| |", (int)(depth * 2 + 1), "",
(IV)parno, ender));
RExC_close_parens[parno]= ender;
- if (RExC_nestroot == parno)
- RExC_nestroot = 0;
- }
+ if (RExC_nestroot == parno)
+ RExC_nestroot = 0;
+ }
Set_Node_Offset(REGNODE_p(ender), RExC_parse+1); /* MJD */
Set_Node_Length(REGNODE_p(ender), 1); /* MJD */
- break;
- case 's':
- ender = reg_node(pRExC_state, SRCLOSE);
+ break;
+ case 's':
+ ender = reg_node(pRExC_state, SRCLOSE);
RExC_in_script_run = 0;
- break;
- case '<':
+ break;
+ case '<':
case 'a':
case 'A':
case 'b':
case 'B':
- case ',':
- case '=':
- case '!':
- *flagp &= ~HASWIDTH;
- /* FALLTHROUGH */
+ case ',':
+ case '=':
+ case '!':
+ *flagp &= ~HASWIDTH;
+ /* FALLTHROUGH */
case 't': /* aTomic */
- case '>':
- ender = reg_node(pRExC_state, SUCCEED);
- break;
- case 0:
- ender = reg_node(pRExC_state, END);
+ case '>':
+ ender = reg_node(pRExC_state, SUCCEED);
+ break;
+ case 0:
+ ender = reg_node(pRExC_state, END);
assert(!RExC_end_op); /* there can only be one! */
RExC_end_op = REGNODE_p(ender);
if (RExC_close_parens) {
@@ -12323,8 +12324,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
RExC_close_parens[0]= ender;
}
- break;
- }
+ break;
+ }
DEBUG_PARSE_r({
DEBUG_PARSE_MSG("lsbr");
regprop(RExC_rx, RExC_mysv1, REGNODE_p(lastbr), NULL, pRExC_state);
@@ -12341,15 +12342,15 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
REQUIRE_BRANCHJ(flagp, 0);
}
- if (have_branch) {
+ if (have_branch) {
char is_nothing= 1;
- if (depth==1)
+ if (depth==1)
RExC_seen |= REG_TOP_LEVEL_BRANCHES_SEEN;
- /* Hook the tails of the branches to the closing node. */
- for (br = REGNODE_p(ret); br; br = regnext(br)) {
- const U8 op = PL_regkind[OP(br)];
- if (op == BRANCH) {
+ /* Hook the tails of the branches to the closing node. */
+ for (br = REGNODE_p(ret); br; br = regnext(br)) {
+ const U8 op = PL_regkind[OP(br)];
+ if (op == BRANCH) {
if (! REGTAIL_STUDY(pRExC_state,
REGNODE_OFFSET(NEXTOPER(br)),
ender))
@@ -12359,8 +12360,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
if ( OP(NEXTOPER(br)) != NOTHING
|| regnext(NEXTOPER(br)) != REGNODE_p(ender))
is_nothing= 0;
- }
- else if (op == BRANCHJ) {
+ }
+ else if (op == BRANCHJ) {
bool shut_gcc_up = REGTAIL_STUDY(pRExC_state,
REGNODE_OFFSET(NEXTOPER(NEXTOPER(br))),
ender);
@@ -12370,8 +12371,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
|| regnext(NEXTOPER(NEXTOPER(br))) != REGNODE_p(ender))
*/
is_nothing= 0;
- }
- }
+ }
+ }
if (is_nothing) {
regnode * ret_as_regnode = REGNODE_p(ret);
br= PL_regkind[OP(ret_as_regnode)] != BRANCH
@@ -12402,7 +12403,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
NEXT_OFF(br)= REGNODE_p(ender) - br;
}
}
- }
+ }
}
{
@@ -12411,47 +12412,47 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp, U32 depth)
static const char parens[] = "=!aA<,>Bbt";
/* flag below is set to 0 up through 'A'; 1 for larger */
- if (paren && (p = strchr(parens, paren))) {
- U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
- int flag = (p - parens) > 3;
+ if (paren && (p = strchr(parens, paren))) {
+ U8 node = ((p - parens) % 2) ? UNLESSM : IFMATCH;
+ int flag = (p - parens) > 3;
- if (paren == '>' || paren == 't') {
- node = SUSPEND, flag = 0;
+ if (paren == '>' || paren == 't') {
+ node = SUSPEND, flag = 0;
}
- reginsert(pRExC_state, node, ret, depth+1);
+ reginsert(pRExC_state, node, ret, depth+1);
Set_Node_Cur_Length(REGNODE_p(ret), parse_start);
- Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
- FLAGS(REGNODE_p(ret)) = flag;
+ Set_Node_Offset(REGNODE_p(ret), parse_start + 1);
+ FLAGS(REGNODE_p(ret)) = flag;
if (! REGTAIL_STUDY(pRExC_state, ret, reg_node(pRExC_state, TAIL)))
{
REQUIRE_BRANCHJ(flagp, 0);
}
- }
+ }
}
/* Check for proper termination. */
if (paren) {
/* restore original flags, but keep (?p) and, if we've encountered
* something in the parse that changes /d rules into /u, keep the /u */
- RExC_flags = oregflags | (RExC_flags & RXf_PMf_KEEPCOPY);
+ RExC_flags = oregflags | (RExC_flags & RXf_PMf_KEEPCOPY);
if (DEPENDS_SEMANTICS && toUSE_UNI_CHARSET_NOT_DEPENDS) {
set_regex_charset(&RExC_flags, REGEX_UNICODE_CHARSET);
}
- if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') {
- RExC_parse = oregcomp_parse;
- vFAIL("Unmatched (");
- }
- nextchar(pRExC_state);
+ if (RExC_parse >= RExC_end || UCHARAT(RExC_parse) != ')') {
+ RExC_parse = oregcomp_parse;
+ vFAIL("Unmatched (");
+ }
+ nextchar(pRExC_state);
}
else if (!paren && RExC_parse < RExC_end) {
- if (*RExC_parse == ')') {
- RExC_parse++;
- vFAIL("Unmatched )");
- }
- else
- FAIL("Junk on end of regexp"); /* "Can't happen". */
- NOT_REACHED; /* NOTREACHED */
+ if (*RExC_parse == ')') {
+ RExC_parse++;
+ vFAIL("Unmatched )");
+ }
+ else
+ FAIL("Junk on end of regexp"); /* "Can't happen". */
+ NOT_REACHED; /* NOTREACHED */
}
if (after_freeze > RExC_npar)
@@ -12488,12 +12489,12 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
DEBUG_PARSE("brnc");
if (first)
- ret = 0;
+ ret = 0;
else {
- if (RExC_use_BRANCHJ)
- ret = reganode(pRExC_state, BRANCHJ, 0);
- else {
- ret = reg_node(pRExC_state, BRANCH);
+ if (RExC_use_BRANCHJ)
+ ret = reganode(pRExC_state, BRANCHJ, 0);
+ else {
+ ret = reg_node(pRExC_state, BRANCH);
Set_Node_Length(REGNODE_p(ret), 1);
}
}
@@ -12503,38 +12504,38 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
skip_to_be_ignored_text(pRExC_state, &RExC_parse,
FALSE /* Don't force to /x */ );
while (RExC_parse < RExC_end && *RExC_parse != '|' && *RExC_parse != ')') {
- flags &= ~TRYAGAIN;
+ flags &= ~TRYAGAIN;
latest = regpiece(pRExC_state, &flags, depth+1);
- if (latest == 0) {
- if (flags & TRYAGAIN)
- continue;
+ if (latest == 0) {
+ if (flags & TRYAGAIN)
+ continue;
RETURN_FAIL_ON_RESTART(flags, flagp);
FAIL2("panic: regpiece returned failure, flags=%#" UVxf, (UV) flags);
- }
- else if (ret == 0)
+ }
+ else if (ret == 0)
ret = latest;
- *flagp |= flags&(HASWIDTH|POSTPONED);
- if (chain != 0) {
- /* FIXME adding one for every branch after the first is probably
- * excessive now we have TRIE support. (hv) */
- MARK_NAUGHTY(1);
+ *flagp |= flags&(HASWIDTH|POSTPONED);
+ if (chain != 0) {
+ /* FIXME adding one for every branch after the first is probably
+ * excessive now we have TRIE support. (hv) */
+ MARK_NAUGHTY(1);
if (! REGTAIL(pRExC_state, chain, latest)) {
/* XXX We could just redo this branch, but figuring out what
* bookkeeping needs to be reset is a pain, and it's likely
* that other branches that goto END will also be too large */
REQUIRE_BRANCHJ(flagp, 0);
}
- }
- chain = latest;
- c++;
+ }
+ chain = latest;
+ c++;
}
if (chain == 0) { /* Loop ran zero times. */
- chain = reg_node(pRExC_state, NOTHING);
- if (ret == 0)
- ret = chain;
+ chain = reg_node(pRExC_state, NOTHING);
+ if (ret == 0)
+ ret = chain;
}
if (c == 1) {
- *flagp |= flags&SIMPLE;
+ *flagp |= flags&SIMPLE;
}
return ret;
@@ -12551,15 +12552,15 @@ Perl_regcurly(const char *s)
PERL_ARGS_ASSERT_REGCURLY;
if (*s++ != '{')
- return FALSE;
+ return FALSE;
if (!isDIGIT(*s))
- return FALSE;
+ return FALSE;
while (isDIGIT(*s))
- s++;
+ s++;
if (*s == ',') {
- s++;
- while (isDIGIT(*s))
- s++;
+ s++;
+ while (isDIGIT(*s))
+ s++;
}
return *s == '}';
@@ -12842,7 +12843,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
/* Forbid extra quantifiers */
- if (ISMULT2(RExC_parse)) {
+ if (isQUANTIFIER(RExC_parse, RExC_end)) {
RExC_parse++;
vFAIL("Nested quantifiers");
}
@@ -13351,7 +13352,7 @@ S_new_regcurly(const char *s, const char *e)
PERL_ARGS_ASSERT_NEW_REGCURLY;
if (s >= e || *s++ != '{')
- return FALSE;
+ return FALSE;
while (s < e && isSPACE(*s)) {
s++;
@@ -13365,7 +13366,7 @@ S_new_regcurly(const char *s, const char *e)
}
if (*s == ',') {
- s++;
+ s++;
while (s < e && isSPACE(*s)) {
s++;
}
@@ -13423,36 +13424,36 @@ S_backref_value(char *p, char *e)
A summary of the code structure is:
switch (first_byte) {
- cases for each special:
- handle this special;
- break;
- case '\\':
- switch (2nd byte) {
- cases for each unambiguous special:
- handle this special;
- break;
- cases for each ambigous special/literal:
- disambiguate;
- if (special) handle here
- else goto defchar;
- default: // unambiguously literal:
- goto defchar;
- }
- default: // is a literal char
- // FALL THROUGH
- defchar:
- create EXACTish node for literal;
- while (more input and node isn't full) {
- switch (input_byte) {
- cases for each special;
+ cases for each special:
+ handle this special;
+ break;
+ case '\\':
+ switch (2nd byte) {
+ cases for each unambiguous special:
+ handle this special;
+ break;
+ cases for each ambigous special/literal:
+ disambiguate;
+ if (special) handle here
+ else goto defchar;
+ default: // unambiguously literal:
+ goto defchar;
+ }
+ default: // is a literal char
+ // FALL THROUGH
+ defchar:
+ create EXACTish node for literal;
+ while (more input and node isn't full) {
+ switch (input_byte) {
+ cases for each special;
make sure parse pointer is set so that the next call to
regatom will see this special first
goto loopdone; // EXACTish node terminated by prev. char
- default:
- append char to EXACTISH node;
- }
- get next input byte;
- }
+ default:
+ append char to EXACTISH node;
+ }
+ get next input byte;
+ }
loopdone:
}
return the generated node;
@@ -13486,37 +13487,37 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
assert(RExC_parse < RExC_end);
switch ((U8)*RExC_parse) {
case '^':
- RExC_seen_zerolen++;
- nextchar(pRExC_state);
- if (RExC_flags & RXf_PMf_MULTILINE)
- ret = reg_node(pRExC_state, MBOL);
- else
- ret = reg_node(pRExC_state, SBOL);
+ RExC_seen_zerolen++;
+ nextchar(pRExC_state);
+ if (RExC_flags & RXf_PMf_MULTILINE)
+ ret = reg_node(pRExC_state, MBOL);
+ else
+ ret = reg_node(pRExC_state, SBOL);
Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
- break;
+ break;
case '$':
- nextchar(pRExC_state);
- if (*RExC_parse)
- RExC_seen_zerolen++;
- if (RExC_flags & RXf_PMf_MULTILINE)
- ret = reg_node(pRExC_state, MEOL);
- else
- ret = reg_node(pRExC_state, SEOL);
+ nextchar(pRExC_state);
+ if (*RExC_parse)
+ RExC_seen_zerolen++;
+ if (RExC_flags & RXf_PMf_MULTILINE)
+ ret = reg_node(pRExC_state, MEOL);
+ else
+ ret = reg_node(pRExC_state, SEOL);
Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
- break;
+ break;
case '.':
- nextchar(pRExC_state);
- if (RExC_flags & RXf_PMf_SINGLELINE)
- ret = reg_node(pRExC_state, SANY);
- else
- ret = reg_node(pRExC_state, REG_ANY);
- *flagp |= HASWIDTH|SIMPLE;
- MARK_NAUGHTY(1);
+ nextchar(pRExC_state);
+ if (RExC_flags & RXf_PMf_SINGLELINE)
+ ret = reg_node(pRExC_state, SANY);
+ else
+ ret = reg_node(pRExC_state, REG_ANY);
+ *flagp |= HASWIDTH|SIMPLE;
+ MARK_NAUGHTY(1);
Set_Node_Length(REGNODE_p(ret), 1); /* MJD */
- break;
+ break;
case '[':
{
- char * const oregcomp_parse = ++RExC_parse;
+ char * const oregcomp_parse = ++RExC_parse;
ret = regclass(pRExC_state, flagp, depth+1,
FALSE, /* means parse the whole char class */
TRUE, /* allow multi-char folds */
@@ -13529,65 +13530,65 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
FAIL2("panic: regclass returned failure to regatom, flags=%#" UVxf,
(UV) *flagp);
}
- if (*RExC_parse != ']') {
- RExC_parse = oregcomp_parse;
- vFAIL("Unmatched [");
- }
- nextchar(pRExC_state);
+ if (*RExC_parse != ']') {
+ RExC_parse = oregcomp_parse;
+ vFAIL("Unmatched [");
+ }
+ nextchar(pRExC_state);
Set_Node_Length(REGNODE_p(ret), RExC_parse - oregcomp_parse + 1); /* MJD */
- break;
+ break;
}
case '(':
- nextchar(pRExC_state);
+ nextchar(pRExC_state);
ret = reg(pRExC_state, 2, &flags, depth+1);
- if (ret == 0) {
- if (flags & TRYAGAIN) {
- if (RExC_parse >= RExC_end) {
- /* Make parent create an empty node if needed. */
- *flagp |= TRYAGAIN;
- return(0);
- }
- goto tryagain;
- }
+ if (ret == 0) {
+ if (flags & TRYAGAIN) {
+ if (RExC_parse >= RExC_end) {
+ /* Make parent create an empty node if needed. */
+ *flagp |= TRYAGAIN;
+ return(0);
+ }
+ goto tryagain;
+ }
RETURN_FAIL_ON_RESTART(flags, flagp);
FAIL2("panic: reg returned failure to regatom, flags=%#" UVxf,
(UV) flags);
- }
- *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED);
- break;
+ }
+ *flagp |= flags&(HASWIDTH|SIMPLE|POSTPONED);
+ break;
case '|':
case ')':
- if (flags & TRYAGAIN) {
- *flagp |= TRYAGAIN;
- return 0;
- }
- vFAIL("Internal urp");
- /* Supposed to be caught earlier. */
- break;
+ if (flags & TRYAGAIN) {
+ *flagp |= TRYAGAIN;
+ return 0;
+ }
+ vFAIL("Internal urp");
+ /* Supposed to be caught earlier. */
+ break;
case '?':
case '+':
case '*':
- RExC_parse++;
- vFAIL("Quantifier follows nothing");
- break;
+ RExC_parse++;
+ vFAIL("Quantifier follows nothing");
+ break;
case '\\':
- /* Special Escapes
-
- This switch handles escape sequences that resolve to some kind
- of special regop and not to literal text. Escape sequences that
- resolve to literal text are handled below in the switch marked
- "Literal Escapes".
-
- Every entry in this switch *must* have a corresponding entry
- in the literal escape switch. However, the opposite is not
- required, as the default for this switch is to jump to the
- literal text handling code.
- */
- RExC_parse++;
- switch ((U8)*RExC_parse) {
- /* Special Escapes */
- case 'A':
- RExC_seen_zerolen++;
+ /* Special Escapes
+
+ This switch handles escape sequences that resolve to some kind
+ of special regop and not to literal text. Escape sequences that
+ resolve to literal text are handled below in the switch marked
+ "Literal Escapes".
+
+ Every entry in this switch *must* have a corresponding entry
+ in the literal escape switch. However, the opposite is not
+ required, as the default for this switch is to jump to the
+ literal text handling code.
+ */
+ RExC_parse++;
+ switch ((U8)*RExC_parse) {
+ /* Special Escapes */
+ case 'A':
+ RExC_seen_zerolen++;
/* Under wildcards, this is changed to match \n; should be
* invisible to the user, as they have to compile under /m */
if (RExC_pm_flags & PMf_WILDCARD) {
@@ -13599,8 +13600,8 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* /\A/ from /^/ in split. */
FLAGS(REGNODE_p(ret)) = 1;
}
- goto finish_meta_pat;
- case 'G':
+ goto finish_meta_pat;
+ case 'G':
if (RExC_pm_flags & PMf_WILDCARD) {
RExC_parse++;
/* diag_listed_as: Use of %s is not allowed in Unicode property
@@ -13609,10 +13610,10 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
vFAIL("Use of '\\G' is not allowed in Unicode property"
" wildcard subpatterns");
}
- ret = reg_node(pRExC_state, GPOS);
+ ret = reg_node(pRExC_state, GPOS);
RExC_seen |= REG_GPOS_SEEN;
- goto finish_meta_pat;
- case 'K':
+ goto finish_meta_pat;
+ case 'K':
if (!RExC_in_lookaround) {
RExC_seen_zerolen++;
ret = reg_node(pRExC_state, KEEPS);
@@ -13627,7 +13628,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
++RExC_parse; /* advance past the 'K' */
vFAIL("\\K not permitted in lookahead/lookbehind");
}
- case 'Z':
+ case 'Z':
if (RExC_pm_flags & PMf_WILDCARD) {
/* See comment under \A above */
ret = reg_node(pRExC_state, MEOL);
@@ -13635,9 +13636,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
else {
ret = reg_node(pRExC_state, SEOL);
}
- RExC_seen_zerolen++; /* Do not optimize RE away */
- goto finish_meta_pat;
- case 'z':
+ RExC_seen_zerolen++; /* Do not optimize RE away */
+ goto finish_meta_pat;
+ case 'z':
if (RExC_pm_flags & PMf_WILDCARD) {
/* See comment under \A above */
ret = reg_node(pRExC_state, MEOL);
@@ -13645,28 +13646,28 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
else {
ret = reg_node(pRExC_state, EOS);
}
- RExC_seen_zerolen++; /* Do not optimize RE away */
- goto finish_meta_pat;
- case 'C':
- vFAIL("\\C no longer supported");
- case 'X':
- ret = reg_node(pRExC_state, CLUMP);
- *flagp |= HASWIDTH;
- goto finish_meta_pat;
+ RExC_seen_zerolen++; /* Do not optimize RE away */
+ goto finish_meta_pat;
+ case 'C':
+ vFAIL("\\C no longer supported");
+ case 'X':
+ ret = reg_node(pRExC_state, CLUMP);
+ *flagp |= HASWIDTH;
+ goto finish_meta_pat;
- case 'B':
+ case 'B':
invert = 1;
/* FALLTHROUGH */
- case 'b':
+ case 'b':
{
U8 flags = 0;
- regex_charset charset = get_regex_charset(RExC_flags);
+ regex_charset charset = get_regex_charset(RExC_flags);
- RExC_seen_zerolen++;
+ RExC_seen_zerolen++;
RExC_seen |= REG_LOOKBEHIND_SEEN;
- op = BOUND + charset;
+ op = BOUND + charset;
- if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') {
+ if (RExC_parse >= RExC_end || *(RExC_parse + 1) != '{') {
flags = TRADITIONAL_BOUND;
if (op > BOUNDA) { /* /aa is same as /a */
op = BOUNDA;
@@ -13726,9 +13727,9 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
default:
bad_bound_type:
RExC_parse = endbrace;
- vFAIL2utf8f(
+ vFAIL2utf8f(
"'%" UTF8f "' is an unknown bound type",
- UTF8fARG(UTF, length, endbrace - length));
+ UTF8fARG(UTF, length, endbrace - length));
NOT_REACHED; /*NOTREACHED*/
}
RExC_parse = endbrace;
@@ -13751,7 +13752,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
? ASCII_RESTRICT_PAT_MODS
: ASCII_MORE_RESTRICT_PAT_MODS);
}
- }
+ }
if (op == BOUND) {
RExC_seen_d_op = TRUE;
@@ -13764,29 +13765,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
op += NBOUND - BOUND;
}
- ret = reg_node(pRExC_state, op);
+ ret = reg_node(pRExC_state, op);
FLAGS(REGNODE_p(ret)) = flags;
- goto finish_meta_pat;
+ goto finish_meta_pat;
}
- case 'R':
- ret = reg_node(pRExC_state, LNBREAK);
- *flagp |= HASWIDTH|SIMPLE;
- goto finish_meta_pat;
-
- case 'd':
- case 'D':
- case 'h':
- case 'H':
- case 'p':
- case 'P':
- case 's':
- case 'S':
- case 'v':
- case 'V':
- case 'w':
- case 'W':
+ case 'R':
+ ret = reg_node(pRExC_state, LNBREAK);
+ *flagp |= HASWIDTH|SIMPLE;
+ goto finish_meta_pat;
+
+ case 'd':
+ case 'D':
+ case 'h':
+ case 'H':
+ case 'p':
+ case 'P':
+ case 's':
+ case 'S':
+ case 'v':
+ case 'V':
+ case 'w':
+ case 'W':
/* These all have the same meaning inside [brackets], and it knows
* how to do the best optimizations for them. So, pretend we found
* these within brackets, and let it do the work */
@@ -13824,7 +13825,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
Set_Node_Offset(REGNODE_p(ret), parse_start);
Set_Node_Length(REGNODE_p(ret), RExC_parse - parse_start + 1); /* MJD */
nextchar(pRExC_state);
- break;
+ break;
case 'N':
/* Handle \N, \N{} and \N{NAMED SEQUENCE} (the latter meaning the
* \N{...} evaluates to a sequence of more than one code points).
@@ -13857,7 +13858,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
RExC_parse = parse_start;
goto defchar;
- case 'k': /* Handle \k<NAME> and \k'NAME' */
+ case 'k': /* Handle \k<NAME> and \k'NAME' */
parse_named_seq:
{
char ch;
@@ -13866,11 +13867,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
&& ch != '\''
&& ch != '{'))
{
- RExC_parse++;
- /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
- vFAIL2("Sequence %.2s... not terminated", parse_start);
- } else {
- RExC_parse += 2;
+ RExC_parse++;
+ /* diag_listed_as: Sequence \%s... not terminated in regex; marked by <-- HERE in m/%s/ */
+ vFAIL2("Sequence %.2s... not terminated", parse_start);
+ } else {
+ RExC_parse += 2;
ret = handle_named_backref(pRExC_state,
flagp,
parse_start,
@@ -13881,30 +13882,30 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
: '\'');
}
break;
- }
- case 'g':
- case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- {
- I32 num;
- bool hasbrace = 0;
-
- if (*RExC_parse == 'g') {
+ }
+ case 'g':
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ {
+ I32 num;
+ bool hasbrace = 0;
+
+ if (*RExC_parse == 'g') {
bool isrel = 0;
- RExC_parse++;
- if (*RExC_parse == '{') {
- RExC_parse++;
- hasbrace = 1;
- }
- if (*RExC_parse == '-') {
- RExC_parse++;
- isrel = 1;
- }
- if (hasbrace && !isDIGIT(*RExC_parse)) {
- if (isrel) RExC_parse--;
+ RExC_parse++;
+ if (*RExC_parse == '{') {
+ RExC_parse++;
+ hasbrace = 1;
+ }
+ if (*RExC_parse == '-') {
+ RExC_parse++;
+ isrel = 1;
+ }
+ if (hasbrace && !isDIGIT(*RExC_parse)) {
+ if (isrel) RExC_parse--;
RExC_parse -= 2;
- goto parse_named_seq;
+ goto parse_named_seq;
}
if (RExC_parse >= RExC_end) {
@@ -13915,7 +13916,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
vFAIL("Reference to invalid group 0");
else if (num == I32_MAX) {
if (isDIGIT(*RExC_parse))
- vFAIL("Reference to nonexistent group");
+ vFAIL("Reference to nonexistent group");
else
unterminated_g:
vFAIL("Unterminated \\g... pattern");
@@ -14001,48 +14002,48 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
Set_Node_Cur_Length(REGNODE_p(ret), parse_start-1);
skip_to_be_ignored_text(pRExC_state, &RExC_parse,
FALSE /* Don't force to /x */ );
- }
- break;
- case '\0':
- if (RExC_parse >= RExC_end)
- FAIL("Trailing \\");
- /* FALLTHROUGH */
- default:
- /* Do not generate "unrecognized" warnings here, we fall
- back into the quick-grab loop below */
+ }
+ break;
+ case '\0':
+ if (RExC_parse >= RExC_end)
+ FAIL("Trailing \\");
+ /* FALLTHROUGH */
+ default:
+ /* Do not generate "unrecognized" warnings here, we fall
+ back into the quick-grab loop below */
RExC_parse = parse_start;
- goto defchar;
- } /* end of switch on a \foo sequence */
- break;
+ goto defchar;
+ } /* end of switch on a \foo sequence */
+ break;
case '#':
/* '#' comments should have been spaced over before this function was
* called */
assert((RExC_flags & RXf_PMf_EXTENDED) == 0);
- /*
+ /*
if (RExC_flags & RXf_PMf_EXTENDED) {
- RExC_parse = reg_skipcomment( pRExC_state, RExC_parse );
- if (RExC_parse < RExC_end)
- goto tryagain;
- }
+ RExC_parse = reg_skipcomment( pRExC_state, RExC_parse );
+ if (RExC_parse < RExC_end)
+ goto tryagain;
+ }
*/
- /* FALLTHROUGH */
+ /* FALLTHROUGH */
default:
- defchar: {
+ defchar: {
/* Here, we have determined that the next thing is probably a
* literal character. RExC_parse points to the first byte of its
* definition. (It still may be an escape sequence that evaluates
* to a single character) */
- STRLEN len = 0;
- UV ender = 0;
- char *p;
- char *s, *old_s = NULL, *old_old_s = NULL;
- char *s0;
+ STRLEN len = 0;
+ UV ender = 0;
+ char *p;
+ char *s, *old_s = NULL, *old_old_s = NULL;
+ char *s0;
U32 max_string_len = 255;
/* We may have to reparse the node, artificially stopping filling
@@ -14116,11 +14117,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
FILL_NODE(ret, node_type);
RExC_emit++;
- s = STRING(REGNODE_p(ret));
+ s = STRING(REGNODE_p(ret));
s0 = s;
- reparse:
+ reparse:
p = RExC_parse;
len = 0;
@@ -14162,7 +14163,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* The exceptions override this */
Size_t added_len = 1;
- oldp = p;
+ oldp = p;
old_old_s = old_s;
old_s = s;
@@ -14170,62 +14171,62 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
assert( (RExC_flags & RXf_PMf_EXTENDED) == 0
|| ! is_PATWS_safe((p), RExC_end, UTF));
- switch ((U8)*p) {
+ switch ((U8)*p) {
const char* message;
U32 packed_warn;
U8 grok_c_char;
- case '^':
- case '$':
- case '.':
- case '[':
- case '(':
- case ')':
- case '|':
- goto loopdone;
- case '\\':
- /* Literal Escapes Switch
-
- This switch is meant to handle escape sequences that
- resolve to a literal character.
-
- Every escape sequence that represents something
- else, like an assertion or a char class, is handled
- in the switch marked 'Special Escapes' above in this
- routine, but also has an entry here as anything that
- isn't explicitly mentioned here will be treated as
- an unescaped equivalent literal.
- */
-
- switch ((U8)*++p) {
-
- /* These are all the special escapes. */
- case 'A': /* Start assertion */
- case 'b': case 'B': /* Word-boundary assertion*/
- case 'C': /* Single char !DANGEROUS! */
- case 'd': case 'D': /* digit class */
- case 'g': case 'G': /* generic-backref, pos assertion */
- case 'h': case 'H': /* HORIZWS */
- case 'k': case 'K': /* named backref, keep marker */
- case 'p': case 'P': /* Unicode property */
- case 'R': /* LNBREAK */
- case 's': case 'S': /* space class */
- case 'v': case 'V': /* VERTWS */
- case 'w': case 'W': /* word class */
+ case '^':
+ case '$':
+ case '.':
+ case '[':
+ case '(':
+ case ')':
+ case '|':
+ goto loopdone;
+ case '\\':
+ /* Literal Escapes Switch
+
+ This switch is meant to handle escape sequences that
+ resolve to a literal character.
+
+ Every escape sequence that represents something
+ else, like an assertion or a char class, is handled
+ in the switch marked 'Special Escapes' above in this
+ routine, but also has an entry here as anything that
+ isn't explicitly mentioned here will be treated as
+ an unescaped equivalent literal.
+ */
+
+ switch ((U8)*++p) {
+
+ /* These are all the special escapes. */
+ case 'A': /* Start assertion */
+ case 'b': case 'B': /* Word-boundary assertion*/
+ case 'C': /* Single char !DANGEROUS! */
+ case 'd': case 'D': /* digit class */
+ case 'g': case 'G': /* generic-backref, pos assertion */
+ case 'h': case 'H': /* HORIZWS */
+ case 'k': case 'K': /* named backref, keep marker */
+ case 'p': case 'P': /* Unicode property */
+ case 'R': /* LNBREAK */
+ case 's': case 'S': /* space class */
+ case 'v': case 'V': /* VERTWS */
+ case 'w': case 'W': /* word class */
case 'X': /* eXtended Unicode "combining
character sequence" */
- case 'z': case 'Z': /* End of line/string assertion */
- --p;
- goto loopdone;
-
- /* Anything after here is an escape that resolves to a
- literal. (Except digits, which may or may not)
- */
- case 'n':
- ender = '\n';
- p++;
- break;
- case 'N': /* Handle a single-code point named character. */
+ case 'z': case 'Z': /* End of line/string assertion */
+ --p;
+ goto loopdone;
+
+ /* Anything after here is an escape that resolves to a
+ literal. (Except digits, which may or may not)
+ */
+ case 'n':
+ ender = '\n';
+ p++;
+ break;
+ case 'N': /* Handle a single-code point named character. */
RExC_parse = p + 1;
if (! grok_bslash_N(pRExC_state,
NULL, /* Fail if evaluates to
@@ -14269,27 +14270,27 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
break;
- case 'r':
- ender = '\r';
- p++;
- break;
- case 't':
- ender = '\t';
- p++;
- break;
- case 'f':
- ender = '\f';
- p++;
- break;
- case 'e':
- ender = ESC_NATIVE;
- p++;
- break;
- case 'a':
- ender = '\a';
- p++;
- break;
- case 'o':
+ case 'r':
+ ender = '\r';
+ p++;
+ break;
+ case 't':
+ ender = '\t';
+ p++;
+ break;
+ case 'f':
+ ender = '\f';
+ p++;
+ break;
+ case 'e':
+ ender = ESC_NATIVE;
+ p++;
+ break;
+ case 'a':
+ ender = '\a';
+ p++;
+ break;
+ case 'o':
if (! grok_bslash_o(&p,
RExC_end,
&ender,
@@ -14308,7 +14309,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
warn_non_literal_string(p, packed_warn, message);
}
break;
- case 'x':
+ case 'x':
if (! grok_bslash_x(&p,
RExC_end,
&ender,
@@ -14335,7 +14336,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
#endif
break;
- case 'c':
+ case 'c':
p++;
if (! grok_bslash_c(*p, &grok_c_char,
&message, &packed_warn))
@@ -14354,7 +14355,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
warn_non_literal_string(p, packed_warn, message);
}
- break;
+ break;
case '8': case '9': /* must be a backreference */
--p;
/* we have an escape like \8 which cannot be an octal escape
@@ -14362,7 +14363,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* escape which may or may not be a legitimate backref. */
goto loopdone;
case '1': case '2': case '3':case '4':
- case '5': case '6': case '7':
+ case '5': case '6': case '7':
/* When we parse backslash escapes there is ambiguity
* between backreferences and octal escapes. Any escape
* from \1 - \9 is a backreference, any multi-digit
@@ -14387,29 +14388,29 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
/* FALLTHROUGH */
case '0':
- {
- I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+ {
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT
| PERL_SCAN_NOTIFY_ILLDIGIT;
- STRLEN numlen = 3;
- ender = grok_oct(p, &numlen, &flags, NULL);
- p += numlen;
+ STRLEN numlen = 3;
+ ender = grok_oct(p, &numlen, &flags, NULL);
+ p += numlen;
if ( (flags & PERL_SCAN_NOTIFY_ILLDIGIT)
&& isDIGIT(*p) /* like \08, \178 */
&& ckWARN(WARN_REGEXP))
{
- reg_warn_non_literal_string(
+ reg_warn_non_literal_string(
p + 1,
form_alien_digit_msg(8, numlen, p,
RExC_end, UTF, FALSE));
}
- }
- break;
- case '\0':
- if (p >= RExC_end)
- FAIL("Trailing \\");
- /* FALLTHROUGH */
- default:
- if (isALPHANUMERIC(*p)) {
+ }
+ break;
+ case '\0':
+ if (p >= RExC_end)
+ FAIL("Trailing \\");
+ /* FALLTHROUGH */
+ default:
+ if (isALPHANUMERIC(*p)) {
/* An alpha followed by '{' is going to fail next
* iteration, so don't output this warning in that
* case */
@@ -14417,11 +14418,11 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
ckWARN2reg(p + 1, "Unrecognized escape \\%.1s"
" passed through", p);
}
- }
- goto normal_default;
- } /* End of switch on '\' */
- break;
- case '{':
+ }
+ goto normal_default;
+ } /* End of switch on '\' */
+ break;
+ case '{':
/* Trying to gain new uses for '{' without breaking too
* much existing code is hard. The solution currently
* adopted is:
@@ -14437,7 +14438,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* misspelled the quantifier. Without this warning,
* the quantifier would silently be taken as a literal
* string of characters instead of a meta construct */
- if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
+ if (len || (p > RExC_start && isALPHA_A(*(p - 1)))) {
if ( RExC_strict
|| ( p > parse_start + 1
&& isALPHA_A(*(p - 1))
@@ -14450,28 +14451,28 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
}
ckWARNreg(p + 1, "Unescaped left brace in regex is"
" passed through");
- }
- goto normal_default;
+ }
+ goto normal_default;
case '}':
case ']':
if (p > RExC_parse && RExC_strict) {
ckWARN2reg(p + 1, "Unescaped literal '%c'", *p);
}
- /*FALLTHROUGH*/
- default: /* A literal character */
- normal_default:
- if (! UTF8_IS_INVARIANT(*p) && UTF) {
- STRLEN numlen;
- ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
- &numlen, UTF8_ALLOW_DEFAULT);
- p += numlen;
- }
- else
- ender = (U8) *p++;
- break;
- } /* End of switch on the literal */
-
- /* Here, have looked at the literal character, and <ender>
+ /*FALLTHROUGH*/
+ default: /* A literal character */
+ normal_default:
+ if (! UTF8_IS_INVARIANT(*p) && UTF) {
+ STRLEN numlen;
+ ender = utf8n_to_uvchr((U8*)p, RExC_end - p,
+ &numlen, UTF8_ALLOW_DEFAULT);
+ p += numlen;
+ }
+ else
+ ender = (U8) *p++;
+ break;
+ } /* End of switch on the literal */
+
+ /* Here, have looked at the literal character, and <ender>
* contains its ordinal; <p> points to the character after it.
* */
@@ -14503,7 +14504,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* only thing in its new node */
next_is_quantifier = LIKELY(p < RExC_end)
- && UNLIKELY(ISMULT2(p));
+ && UNLIKELY(isQUANTIFIER(p, RExC_end));
if (next_is_quantifier && LIKELY(len)) {
p = oldp;
@@ -14733,20 +14734,20 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
* requires UTF-8 to represent. */
: (char) toLOWER_L1(ender);
}
- } /* End of adding current character to the node */
+ } /* End of adding current character to the node */
done_with_this_char:
len += added_len;
- if (next_is_quantifier) {
+ if (next_is_quantifier) {
/* Here, the next input is a quantifier, and to get here,
* the current character is the only one in the node. */
goto loopdone;
- }
+ }
- } /* End of loop through literal characters */
+ } /* End of loop through literal characters */
/* Here we have either exhausted the input or run out of room in
* the node. If the former, we are done. (If we encountered a
@@ -15236,7 +15237,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
Safefree(locfold_buf);
Safefree(loc_correspondence);
}
- } /* End of verifying node ends with an appropriate char */
+ } /* End of verifying node ends with an appropriate char */
/* We need to start the next node at the character that didn't fit
* in this one */
@@ -15360,15 +15361,15 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
Set_Node_Length(REGNODE_p(ret), p - parse_start - 1);
RExC_parse = p;
- {
- /* len is STRLEN which is unsigned, need to copy to signed */
- IV iv = len;
- if (iv < 0)
- vFAIL("Internal disaster");
- }
+ {
+ /* len is STRLEN which is unsigned, need to copy to signed */
+ IV iv = len;
+ if (iv < 0)
+ vFAIL("Internal disaster");
+ }
- } /* End of label 'defchar:' */
- break;
+ } /* End of label 'defchar:' */
+ break;
} /* End of giant switch on input character */
/* Position parse to next real character */
@@ -15408,53 +15409,53 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, SV** invlist_ptr)
ANYOF_BITMAP_ZERO(node);
if (*invlist_ptr) {
- /* This gets set if we actually need to modify things */
- bool change_invlist = FALSE;
+ /* This gets set if we actually need to modify things */
+ bool change_invlist = FALSE;
- UV start, end;
+ UV start, end;
- /* Start looking through *invlist_ptr */
- invlist_iterinit(*invlist_ptr);
- while (invlist_iternext(*invlist_ptr, &start, &end)) {
- UV high;
- int i;
+ /* Start looking through *invlist_ptr */
+ invlist_iterinit(*invlist_ptr);
+ while (invlist_iternext(*invlist_ptr, &start, &end)) {
+ UV high;
+ int i;
if (end == UV_MAX && start <= NUM_ANYOF_CODE_POINTS) {
ANYOF_FLAGS(node) |= ANYOF_MATCHES_ALL_ABOVE_BITMAP;
}
- /* Quit if are above what we should change */
- if (start >= NUM_ANYOF_CODE_POINTS) {
- break;
- }
+ /* Quit if are above what we should change */
+ if (start >= NUM_ANYOF_CODE_POINTS) {
+ break;
+ }
- change_invlist = TRUE;
+ change_invlist = TRUE;
- /* Set all the bits in the range, up to the max that we are doing */
- high = (end < NUM_ANYOF_CODE_POINTS - 1)
+ /* Set all the bits in the range, up to the max that we are doing */
+ high = (end < NUM_ANYOF_CODE_POINTS - 1)
? end
: NUM_ANYOF_CODE_POINTS - 1;
- for (i = start; i <= (int) high; i++) {
+ for (i = start; i <= (int) high; i++) {
ANYOF_BITMAP_SET(node, i);
- }
- }
- invlist_iterfinish(*invlist_ptr);
+ }
+ }
+ invlist_iterfinish(*invlist_ptr);
/* Done with loop; remove any code points that are in the bitmap from
* *invlist_ptr; similarly for code points above the bitmap if we have
* a flag to match all of them anyways */
- if (change_invlist) {
- _invlist_subtract(*invlist_ptr, PL_InBitmap, invlist_ptr);
- }
+ if (change_invlist) {
+ _invlist_subtract(*invlist_ptr, PL_InBitmap, invlist_ptr);
+ }
if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
- _invlist_intersection(*invlist_ptr, PL_InBitmap, invlist_ptr);
- }
+ _invlist_intersection(*invlist_ptr, PL_InBitmap, invlist_ptr);
+ }
- /* If have completely emptied it, remove it completely */
- if (_invlist_len(*invlist_ptr) == 0) {
- SvREFCNT_dec_NN(*invlist_ptr);
- *invlist_ptr = NULL;
- }
+ /* If have completely emptied it, remove it completely */
+ if (_invlist_len(*invlist_ptr) == 0) {
+ SvREFCNT_dec_NN(*invlist_ptr);
+ *invlist_ptr = NULL;
+ }
}
}
@@ -16495,7 +16496,7 @@ redo_curchar:
RExC_parse++;
RExC_sets_depth++;
- node = reg(pRExC_state, 2, flagp, depth+1);
+ node = reg(pRExC_state, 2, flagp, depth+1);
RETURN_FAIL_ON_RESTART(*flagp, flagp);
if ( OP(REGNODE_p(node)) != REGEX_SET
@@ -17316,7 +17317,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
SV *listsv = NULL; /* List of \p{user-defined} whose definitions
aren't available at the time this was called */
STRLEN initial_listsv_len = 0; /* Kind of a kludge to see if it is more
- than just initialized. */
+ than just initialized. */
SV* properties = NULL; /* Code points that match \p{} \P{} */
SV* posixes = NULL; /* Code points that match classes like [:word:],
extended beyond the Latin1 range. These have to
@@ -17333,7 +17334,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
leading to less compilation and execution
work */
UV element_count = 0; /* Number of distinct elements in the class.
- Optimizations may be possible if this is tiny */
+ Optimizations may be possible if this is tiny */
AV * multi_char_matches = NULL; /* Code points that fold to more than one
character; used under /i */
UV n;
@@ -17436,7 +17437,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
assert(RExC_parse <= RExC_end);
if (UCHARAT(RExC_parse) == '^') { /* Complement the class */
- RExC_parse++;
+ RExC_parse++;
invert = TRUE;
allow_mutiple_chars = FALSE;
MARK_NAUGHTY(1);
@@ -17471,7 +17472,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* allow 1st char to be ']' (allowing it to be '-' is dealt with later) */
if (UCHARAT(RExC_parse) == ']')
- goto charclassloop;
+ goto charclassloop;
while (1) {
@@ -17499,23 +17500,23 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
charclassloop:
- namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
+ namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
save_value = value;
save_prevvalue = prevvalue;
- if (!range) {
- rangebegin = RExC_parse;
- element_count++;
+ if (!range) {
+ rangebegin = RExC_parse;
+ element_count++;
non_portable_endpoint = 0;
- }
- if (UTF && ! UTF8_IS_INVARIANT(* RExC_parse)) {
- value = utf8n_to_uvchr((U8*)RExC_parse,
- RExC_end - RExC_parse,
- &numlen, UTF8_ALLOW_DEFAULT);
- RExC_parse += numlen;
- }
- else
- value = UCHARAT(RExC_parse++);
+ }
+ if (UTF && ! UTF8_IS_INVARIANT(* RExC_parse)) {
+ value = utf8n_to_uvchr((U8*)RExC_parse,
+ RExC_end - RExC_parse,
+ &numlen, UTF8_ALLOW_DEFAULT);
+ RExC_parse += numlen;
+ }
+ else
+ value = UCHARAT(RExC_parse++);
if (value == '[') {
char * posix_class_end;
@@ -17570,20 +17571,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
vFAIL("Unmatched [");
}
- if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) {
- value = utf8n_to_uvchr((U8*)RExC_parse,
- RExC_end - RExC_parse,
- &numlen, UTF8_ALLOW_DEFAULT);
- RExC_parse += numlen;
- }
- else
- value = UCHARAT(RExC_parse++);
+ if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) {
+ value = utf8n_to_uvchr((U8*)RExC_parse,
+ RExC_end - RExC_parse,
+ &numlen, UTF8_ALLOW_DEFAULT);
+ RExC_parse += numlen;
+ }
+ else
+ value = UCHARAT(RExC_parse++);
- /* Some compilers cannot handle switching on 64-bit integer
- * values, therefore value cannot be an UV. Yes, this will
- * be a problem later if we want switch on Unicode.
- * A similar issue a little bit later when switching on
- * namedclass. --jhi */
+ /* Some compilers cannot handle switching on 64-bit integer
+ * values, therefore value cannot be an UV. Yes, this will
+ * be a problem later if we want switch on Unicode.
+ * A similar issue a little bit later when switching on
+ * namedclass. --jhi */
/* If the \ is escaping white space when white space is being
* skipped, it means that that white space is wanted literally, and
@@ -17594,16 +17595,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
U32 packed_warn;
U8 grok_c_char;
- case 'w': namedclass = ANYOF_WORDCHAR; break;
- case 'W': namedclass = ANYOF_NWORDCHAR; break;
- case 's': namedclass = ANYOF_SPACE; break;
- case 'S': namedclass = ANYOF_NSPACE; break;
- case 'd': namedclass = ANYOF_DIGIT; break;
- case 'D': namedclass = ANYOF_NDIGIT; break;
- case 'v': namedclass = ANYOF_VERTWS; break;
- case 'V': namedclass = ANYOF_NVERTWS; break;
- case 'h': namedclass = ANYOF_HORIZWS; break;
- case 'H': namedclass = ANYOF_NHORIZWS; break;
+ case 'w': namedclass = ANYOF_WORDCHAR; break;
+ case 'W': namedclass = ANYOF_NWORDCHAR; break;
+ case 's': namedclass = ANYOF_SPACE; break;
+ case 'S': namedclass = ANYOF_NSPACE; break;
+ case 'd': namedclass = ANYOF_DIGIT; break;
+ case 'D': namedclass = ANYOF_NDIGIT; break;
+ case 'v': namedclass = ANYOF_VERTWS; break;
+ case 'V': namedclass = ANYOF_NVERTWS; break;
+ case 'h': namedclass = ANYOF_HORIZWS; break;
+ case 'H': namedclass = ANYOF_NHORIZWS; break;
case 'N': /* Handle \N{NAME} in class */
{
const char * const backslash_N_beg = RExC_parse - 2;
@@ -17670,10 +17671,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
unicode_range = TRUE; /* \N{} are Unicode */
}
break;
- case 'p':
- case 'P':
- {
- char *e;
+ case 'p':
+ case 'P':
+ {
+ char *e;
if (RExC_pm_flags & PMf_WILDCARD) {
RExC_parse++;
@@ -17684,14 +17685,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
" wildcard subpatterns", (char) value, *(RExC_parse - 1));
}
- /* \p means they want Unicode semantics */
- REQUIRE_UNI_RULES(flagp, 0);
+ /* \p means they want Unicode semantics */
+ REQUIRE_UNI_RULES(flagp, 0);
- if (RExC_parse >= RExC_end)
- vFAIL2("Empty \\%c", (U8)value);
- if (*RExC_parse == '{') {
- const U8 c = (U8)value;
- e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
+ if (RExC_parse >= RExC_end)
+ vFAIL2("Empty \\%c", (U8)value);
+ if (*RExC_parse == '{') {
+ const U8 c = (U8)value;
+ e = (char *) memchr(RExC_parse, '}', RExC_end - RExC_parse);
if (!e) {
RExC_parse++;
vFAIL2("Missing right brace on \\%c{}", c);
@@ -17703,9 +17704,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* any '^', even when not under /x */
while (isSPACE(*RExC_parse)) {
RExC_parse++;
- }
+ }
- if (UCHARAT(RExC_parse) == '^') {
+ if (UCHARAT(RExC_parse) == '^') {
/* toggle. (The rhs xor gets the single bit that
* differs between P and p; the other xor inverts just
@@ -17721,12 +17722,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
if (e == RExC_parse)
vFAIL2("Empty \\%c{}", c);
- n = e - RExC_parse;
- while (isSPACE(*(RExC_parse + n - 1)))
- n--;
+ n = e - RExC_parse;
+ while (isSPACE(*(RExC_parse + n - 1)))
+ n--;
- } /* The \p isn't immediately followed by a '{' */
- else if (! isALPHA(*RExC_parse)) {
+ } /* The \p isn't immediately followed by a '{' */
+ else if (! isALPHA(*RExC_parse)) {
RExC_parse += (UTF)
? UTF8_SAFE_SKIP(RExC_parse, RExC_end)
: 1;
@@ -17735,10 +17736,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
(U8) value);
}
else {
- e = RExC_parse;
- n = 1;
- }
- {
+ e = RExC_parse;
+ n = 1;
+ }
+ {
char* name = RExC_parse;
/* Any message returned about expanding the definition */
@@ -17771,7 +17772,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
mojibake */
RExC_utf8 = TRUE;
}
- /* diag_listed_as: Can't find Unicode property definition "%s" in regex; marked by <-- HERE in m/%s/ */
+ /* diag_listed_as: Can't find Unicode property definition "%s" in regex; marked by <-- HERE in m/%s/ */
vFAIL2utf8f("%" UTF8f, UTF8fARG(SvUTF8(msg),
SvCUR(msg), SvPVX(msg)));
}
@@ -17889,30 +17890,30 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* Invert if asking for the complement */
if (value == 'P') {
- _invlist_union_complement_2nd(properties,
+ _invlist_union_complement_2nd(properties,
prop_definition,
&properties);
}
else {
_invlist_union(properties, prop_definition, &properties);
- }
+ }
}
}
- RExC_parse = e + 1;
+ RExC_parse = e + 1;
namedclass = ANYOF_UNIPROP; /* no official name, but it's
named */
- }
- break;
- case 'n': value = '\n'; break;
- case 'r': value = '\r'; break;
- case 't': value = '\t'; break;
- case 'f': value = '\f'; break;
- case 'b': value = '\b'; break;
- case 'e': value = ESC_NATIVE; break;
- case 'a': value = '\a'; break;
- case 'o':
- RExC_parse--; /* function expects to be pointed at the 'o' */
+ }
+ break;
+ case 'n': value = '\n'; break;
+ case 'r': value = '\r'; break;
+ case 't': value = '\t'; break;
+ case 'f': value = '\f'; break;
+ case 'b': value = '\b'; break;
+ case 'e': value = ESC_NATIVE; break;
+ case 'a': value = '\a'; break;
+ case 'o':
+ RExC_parse--; /* function expects to be pointed at the 'o' */
if (! grok_bslash_o(&RExC_parse,
RExC_end,
&value,
@@ -17932,9 +17933,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
if (value < 256) {
non_portable_endpoint++;
}
- break;
- case 'x':
- RExC_parse--; /* function expects to be pointed at the 'x' */
+ break;
+ case 'x':
+ RExC_parse--; /* function expects to be pointed at the 'x' */
if (! grok_bslash_x(&RExC_parse,
RExC_end,
&value,
@@ -17954,8 +17955,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
if (value < 256) {
non_portable_endpoint++;
}
- break;
- case 'c':
+ break;
+ case 'c':
if (! grok_bslash_c(*RExC_parse, &grok_c_char, &message,
&packed_warn))
{
@@ -17974,16 +17975,16 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
non_portable_endpoint++;
- break;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7':
- {
- /* Take 1-3 octal digits */
- I32 flags = PERL_SCAN_SILENT_ILLDIGIT
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7':
+ {
+ /* Take 1-3 octal digits */
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT
| PERL_SCAN_NOTIFY_ILLDIGIT;
numlen = (strict) ? 4 : 3;
value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
- RExC_parse += numlen;
+ RExC_parse += numlen;
if (numlen != 3) {
if (strict) {
RExC_parse += (UTF)
@@ -18005,11 +18006,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
if (value < 256) {
non_portable_endpoint++;
}
- break;
- }
- default:
- /* Allow \_ to not give an error */
- if (isWORDCHAR(value) && value != '_') {
+ break;
+ }
+ default:
+ /* Allow \_ to not give an error */
+ if (isWORDCHAR(value) && value != '_') {
if (strict) {
vFAIL2("Unrecognized escape \\%c in character class",
(int)value);
@@ -18019,20 +18020,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
"Unrecognized escape \\%c in character class passed through",
(int)value);
}
- }
- break;
- } /* End of switch on char following backslash */
- } /* end of handling backslash escape sequences */
+ }
+ break;
+ } /* End of switch on char following backslash */
+ } /* end of handling backslash escape sequences */
/* Here, we have the current token in 'value' */
- if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
+ if (namedclass > OOB_NAMEDCLASS) { /* this is a named class \blah */
U8 classnum;
- /* a bad range like a-\d, a-[:digit:]. The '-' is taken as a
- * literal, as is the character that began the false range, i.e.
- * the 'a' in the examples */
- if (range) {
+ /* a bad range like a-\d, a-[:digit:]. The '-' is taken as a
+ * literal, as is the character that began the false range, i.e.
+ * the 'a' in the examples */
+ if (range) {
const int w = (RExC_parse >= rangebegin)
? RExC_parse - rangebegin
: 0;
@@ -18050,13 +18051,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
prevvalue);
}
- range = 0; /* this was not a true range */
+ range = 0; /* this was not a true range */
element_count += 2; /* So counts for three values */
- }
+ }
classnum = namedclass_to_classnum(namedclass);
- if (LOC && namedclass < ANYOF_POSIXL_MAX
+ if (LOC && namedclass < ANYOF_POSIXL_MAX
#ifndef HAS_ISASCII
&& classnum != _CC_ASCII
#endif
@@ -18178,8 +18179,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
namedclass % 2 != 0,
posixes_ptr);
}
- }
- } /* end of namedclass \blah */
+ }
+ } /* end of namedclass \blah */
SKIP_BRACKETED_WHITE_SPACE(skip_white, RExC_parse, RExC_end);
@@ -18192,20 +18193,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* the next real character to be processed is the range indicator--the
* minus sign */
- if (range) {
+ if (range) {
#ifdef EBCDIC
/* For unicode ranges, we have to test that the Unicode as opposed
* to the native values are not decreasing. (Above 255, there is
* no difference between native and Unicode) */
- if (unicode_range && prevvalue < 255 && value < 255) {
+ if (unicode_range && prevvalue < 255 && value < 255) {
if (NATIVE_TO_LATIN1(prevvalue) > NATIVE_TO_LATIN1(value)) {
goto backwards_range;
}
}
else
#endif
- if (prevvalue > value) /* b-a */ {
- int w;
+ if (prevvalue > value) /* b-a */ {
+ int w;
#ifdef EBCDIC
backwards_range:
#endif
@@ -18214,9 +18215,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
"Invalid [] range \"%" UTF8f "\"",
UTF8fARG(UTF, w, rangebegin));
NOT_REACHED; /* NOTREACHED */
- }
- }
- else {
+ }
+ }
+ else {
prevvalue = value; /* save the beginning of the potential range */
if (! stop_at_1 /* Can't be a range if parsing just one thing */
&& *RExC_parse == '-')
@@ -18253,8 +18254,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
range = 1; /* yeah, it's a range! */
continue; /* but do it the next time */
}
- }
- }
+ }
+ }
if (namedclass > OOB_NAMEDCLASS) {
continue;
@@ -18264,8 +18265,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* <prevvalue> is the beginning of the range, if any; or <value> if
* not. */
- /* non-Latin1 code point implies unicode semantics. */
- if (value > 255) {
+ /* non-Latin1 code point implies unicode semantics. */
+ if (value > 255) {
if (value > MAX_LEGAL_CP && ( value != UV_MAX
|| prevvalue > MAX_LEGAL_CP))
{
@@ -18281,7 +18282,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
PL_extended_cp_format,
value);
}
- }
+ }
/* Ready to process either the single value, or the completed range.
* For single-valued non-inverted ranges, we consider the possibility
@@ -18518,7 +18519,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
#endif
- range = 0; /* this range (if it was one) is done now */
+ range = 0; /* this range (if it was one) is done now */
} /* End of loop through all the text within the brackets */
if ( posix_warnings && av_tindex_skip_len_mg(posix_warnings) >= 0) {
@@ -18529,12 +18530,12 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* deal with them by building up a substitute parse string, and recursively
* calling reg() on it, instead of proceeding */
if (multi_char_matches) {
- SV * substitute_parse = newSVpvn_flags("?:", 2, SVs_TEMP);
+ SV * substitute_parse = newSVpvn_flags("?:", 2, SVs_TEMP);
I32 cp_count;
- STRLEN len;
- char *save_end = RExC_end;
- char *save_parse = RExC_parse;
- char *save_start = RExC_start;
+ STRLEN len;
+ char *save_end = RExC_end;
+ char *save_parse = RExC_parse;
+ char *save_start = RExC_start;
Size_t constructed_prefix_len = 0; /* This gives the length of the
constructed portion of the
substitute parse. */
@@ -18612,20 +18613,20 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* reported. See the comments at the definition of
* REPORT_LOCATION_ARGS for details */
RExC_copy_start_in_input = (char *) orig_parse;
- RExC_start = RExC_parse = SvPV(substitute_parse, len);
+ RExC_start = RExC_parse = SvPV(substitute_parse, len);
RExC_copy_start_in_constructed = RExC_start + constructed_prefix_len;
- RExC_end = RExC_parse + len;
+ RExC_end = RExC_parse + len;
RExC_in_multi_char_class = 1;
- ret = reg(pRExC_state, 1, &reg_flags, depth+1);
+ ret = reg(pRExC_state, 1, &reg_flags, depth+1);
*flagp |= reg_flags & (HASWIDTH|SIMPLE|POSTPONED|RESTART_PARSE|NEED_UTF8);
/* And restore so can parse the rest of the pattern */
RExC_parse = save_parse;
- RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = save_start;
- RExC_end = save_end;
- RExC_in_multi_char_class = 0;
+ RExC_start = RExC_copy_start_in_constructed = RExC_copy_start_in_input = save_start;
+ RExC_end = save_end;
+ RExC_in_multi_char_class = 0;
SvREFCNT_dec_NN(multi_char_matches);
return ret;
}
@@ -18771,7 +18772,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* Now that we have finished adding all the folds, there is no reason
* to keep the foldable list separate */
_invlist_union(cp_list, cp_foldable_list, &cp_list);
- SvREFCNT_dec_NN(cp_foldable_list);
+ SvREFCNT_dec_NN(cp_foldable_list);
}
/* And combine the result (if any) with any inversion lists from posix
@@ -19007,8 +19008,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
{
_invlist_invert(cp_list);
- /* Clear the invert flag since have just done it here */
- invert = FALSE;
+ /* Clear the invert flag since have just done it here */
+ invert = FALSE;
}
/* All possible optimizations below still have these characteristics.
@@ -19954,15 +19955,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* when the target string is UTF-8 (<upper_latin1_only_utf8_matches>).
* */
if (upper_latin1_only_utf8_matches) {
- if (cp_list) {
- _invlist_union(cp_list,
+ if (cp_list) {
+ _invlist_union(cp_list,
upper_latin1_only_utf8_matches,
&cp_list);
- SvREFCNT_dec_NN(upper_latin1_only_utf8_matches);
- }
- else {
- cp_list = upper_latin1_only_utf8_matches;
- }
+ SvREFCNT_dec_NN(upper_latin1_only_utf8_matches);
+ }
+ else {
+ cp_list = upper_latin1_only_utf8_matches;
+ }
ANYOF_FLAGS(REGNODE_p(ret)) |= ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP;
}
@@ -20017,11 +20018,11 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) {
assert(! (ANYOF_FLAGS(node)
& ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP));
- ARG_SET(node, ANYOF_ONLY_HAS_BITMAP);
+ ARG_SET(node, ANYOF_ONLY_HAS_BITMAP);
}
else {
- AV * const av = newAV();
- SV *rv;
+ AV * const av = newAV();
+ SV *rv;
if (cp_list) {
av_store(av, INVLIST_INDEX, SvREFCNT_inc_NN(cp_list));
@@ -20040,10 +20041,10 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
SvREFCNT_inc_NN(runtime_defns));
}
- rv = newRV_noinc(MUTABLE_SV(av));
- n = add_data(pRExC_state, STR_WITH_LEN("s"));
- RExC_rxi->data->data[n] = (void*)rv;
- ARG_SET(node, n);
+ rv = newRV_noinc(MUTABLE_SV(av));
+ n = add_data(pRExC_state, STR_WITH_LEN("s"));
+ RExC_rxi->data->data[n] = (void*)rv;
+ ARG_SET(node, n);
}
}
@@ -20097,12 +20098,12 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node,
assert(! output_invlist || listsvp);
if (data && data->count) {
- const U32 n = ARG(node);
+ const U32 n = ARG(node);
- if (data->what[n] == 's') {
- SV * const rv = MUTABLE_SV(data->data[n]);
- AV * const av = MUTABLE_AV(SvRV(rv));
- SV **const ary = AvARRAY(av);
+ if (data->what[n] == 's') {
+ SV * const rv = MUTABLE_SV(data->data[n]);
+ AV * const av = MUTABLE_AV(SvRV(rv));
+ SV **const ary = AvARRAY(av);
invlist = ary[INVLIST_INDEX];
@@ -20114,7 +20115,7 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node,
si = ary[DEFERRED_USER_DEFINED_INDEX];
}
- if (doinit && (si || invlist)) {
+ if (doinit && (si || invlist)) {
if (si) {
bool user_defined;
SV * msg = newSVpvs_flags("", SVs_TEMP);
@@ -20156,20 +20157,20 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node,
: INVLIST_INDEX);
si = NULL;
}
- }
- }
+ }
+ }
}
/* If requested, return a printable version of what this ANYOF node matches
* */
if (listsvp) {
- SV* matches_string = NULL;
+ SV* matches_string = NULL;
/* This function can be called at compile-time, before everything gets
* resolved, in which case we return the currently best available
* information, which is the string that will eventually be used to do
* that resolving, 'si' */
- if (si) {
+ if (si) {
/* Here, we only have 'si' (and possibly some passed-in data in
* 'invlist', which is handled below) If the caller only wants
* 'si', use that. */
@@ -20268,7 +20269,7 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node,
SvCUR_set(matches_string, SvCUR(matches_string) - 1);
}
} /* end of has an 'si' */
- }
+ }
/* Add the stuff that's already known */
if (invlist) {
@@ -20291,7 +20292,7 @@ Perl_get_re_gclass_nonbitmap_data(pTHX_ const regexp *prog, const regnode* node,
}
}
- *listsvp = matches_string;
+ *listsvp = matches_string;
}
return invlist;
@@ -20347,21 +20348,21 @@ S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state,
assert( ! UTF || UTF8_IS_INVARIANT(**p) || UTF8_IS_START(**p));
for (;;) {
- if (RExC_end - (*p) >= 3
- && *(*p) == '('
- && *(*p + 1) == '?'
- && *(*p + 2) == '#')
- {
- while (*(*p) != ')') {
- if ((*p) == RExC_end)
- FAIL("Sequence (?#... not terminated");
- (*p)++;
- }
- (*p)++;
- continue;
- }
-
- if (use_xmod) {
+ if (RExC_end - (*p) >= 3
+ && *(*p) == '('
+ && *(*p + 1) == '?'
+ && *(*p + 2) == '#')
+ {
+ while (*(*p) != ')') {
+ if ((*p) == RExC_end)
+ FAIL("Sequence (?#... not terminated");
+ (*p)++;
+ }
+ (*p)++;
+ continue;
+ }
+
+ if (use_xmod) {
const char * save_p = *p;
while ((*p) < RExC_end) {
STRLEN len;
@@ -20378,7 +20379,7 @@ S_skip_to_be_ignored_text(pTHX_ RExC_state_t *pRExC_state,
if (*p != save_p) {
continue;
}
- }
+ }
break;
}
@@ -20432,7 +20433,7 @@ S_change_engine_size(pTHX_ RExC_state_t *pRExC_state, const Ptrdiff_t size)
char,
regexp_internal);
if ( RExC_rxi == NULL )
- FAIL("Regexp out of space");
+ FAIL("Regexp out of space");
RXi_SET(RExC_rx, RExC_rxi);
RExC_emit_start = RExC_rxi->program;
@@ -20473,16 +20474,16 @@ S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_
assert(extra_size >= regarglen[op] || PL_regkind[op] == ANYOF);
if (RExC_offsets) { /* MJD */
- MJD_OFFSET_DEBUG(
+ MJD_OFFSET_DEBUG(
("%s:%d: (op %s) %s %" UVuf " (len %" UVuf ") (max %" UVuf ").\n",
name, __LINE__,
PL_reg_name[op],
(UV)(RExC_emit) > RExC_offsets[0]
- ? "Overwriting end of array!\n" : "OK",
+ ? "Overwriting end of array!\n" : "OK",
(UV)(RExC_emit),
(UV)(RExC_parse - RExC_start),
(UV)RExC_offsets[0]));
- Set_Node_Offset(REGNODE_p(RExC_emit), RExC_parse + (op == END));
+ Set_Node_Offset(REGNODE_p(RExC_emit), RExC_parse + (op == END));
}
#endif
return(ret);
@@ -20627,21 +20628,21 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
RExC_end_op += size;
while (src > REGNODE_p(operand)) {
- StructCopy(--src, --dst, regnode);
+ StructCopy(--src, --dst, regnode);
#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD 20010112 */
- MJD_OFFSET_DEBUG(
+ MJD_OFFSET_DEBUG(
("%s(%d): (op %s) %s copy %" UVuf " -> %" UVuf " (max %" UVuf ").\n",
"reginsert",
- __LINE__,
- PL_reg_name[op],
+ __LINE__,
+ PL_reg_name[op],
(UV)(REGNODE_OFFSET(dst)) > RExC_offsets[0]
- ? "Overwriting end of array!\n" : "OK",
+ ? "Overwriting end of array!\n" : "OK",
(UV)REGNODE_OFFSET(src),
(UV)REGNODE_OFFSET(dst),
(UV)RExC_offsets[0]));
- Set_Node_Offset_To_R(REGNODE_OFFSET(dst), Node_Offset(src));
- Set_Node_Length_To_R(REGNODE_OFFSET(dst), Node_Length(src));
+ Set_Node_Offset_To_R(REGNODE_OFFSET(dst), Node_Offset(src));
+ Set_Node_Length_To_R(REGNODE_OFFSET(dst), Node_Length(src));
}
#endif
}
@@ -20649,18 +20650,18 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, const U8 op,
place = REGNODE_p(operand); /* Op node, where operand used to be. */
#ifdef RE_TRACK_PATTERN_OFFSETS
if (RExC_offsets) { /* MJD */
- MJD_OFFSET_DEBUG(
+ MJD_OFFSET_DEBUG(
("%s(%d): (op %s) %s %" UVuf " <- %" UVuf " (max %" UVuf ").\n",
"reginsert",
- __LINE__,
- PL_reg_name[op],
+ __LINE__,
+ PL_reg_name[op],
(UV)REGNODE_OFFSET(place) > RExC_offsets[0]
? "Overwriting end of array!\n" : "OK",
(UV)REGNODE_OFFSET(place),
(UV)(RExC_parse - RExC_start),
(UV)RExC_offsets[0]));
- Set_Node_Offset(place, RExC_parse);
- Set_Node_Length(place, 1);
+ Set_Node_Offset(place, RExC_parse);
+ Set_Node_Length(place, 1);
}
#endif
src = NEXTOPER(place);
@@ -20696,7 +20697,7 @@ S_regtail(pTHX_ RExC_state_t * pRExC_state,
* */
scan = (regnode_offset) p;
for (;;) {
- regnode * const temp = regnext(REGNODE_p(scan));
+ regnode * const temp = regnext(REGNODE_p(scan));
DEBUG_PARSE_r({
DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
regprop(RExC_rx, RExC_mysv, REGNODE_p(scan), NULL, pRExC_state);
@@ -20773,11 +20774,11 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
regnode * const temp = regnext(REGNODE_p(scan));
#ifdef EXPERIMENTAL_INPLACESCAN
if (PL_regkind[OP(REGNODE_p(scan))] == EXACT) {
- bool unfolded_multi_char; /* Unexamined in this routine */
+ bool unfolded_multi_char; /* Unexamined in this routine */
if (join_exact(pRExC_state, scan, &min,
&unfolded_multi_char, 1, REGNODE_p(val), depth+1))
return TRUE; /* Was return EXACT */
- }
+ }
#endif
if ( exact ) {
if (PL_regkind[OP(REGNODE_p(scan))] == EXACT) {
@@ -20798,23 +20799,23 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
scan,
PL_reg_name[exact]);
});
- if (temp == NULL)
- break;
- scan = REGNODE_OFFSET(temp);
+ if (temp == NULL)
+ break;
+ scan = REGNODE_OFFSET(temp);
}
DEBUG_PARSE_r({
DEBUG_PARSE_MSG("");
regprop(RExC_rx, RExC_mysv, REGNODE_p(val), NULL, pRExC_state);
Perl_re_printf( aTHX_
"~ attach to %s (%" IVdf ") offset to %" IVdf "\n",
- SvPV_nolen_const(RExC_mysv),
- (IV)val,
- (IV)(val - scan)
+ SvPV_nolen_const(RExC_mysv),
+ (IV)val,
+ (IV)(val - scan)
);
});
if (reg_off_by_arg[OP(REGNODE_p(scan))]) {
assert((UV) (val - scan) <= U32_MAX);
- ARG_SET(REGNODE_p(scan), val - scan);
+ ARG_SET(REGNODE_p(scan), val - scan);
}
else {
if (val - scan > U16_MAX) {
@@ -20824,7 +20825,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode_offset p,
NEXT_OFF(REGNODE_p(scan)) = U16_MAX;
return FALSE;
}
- NEXT_OFF(REGNODE_p(scan)) = val - scan;
+ NEXT_OFF(REGNODE_p(scan)) = val - scan;
}
return TRUE; /* Was 'return exact' */
@@ -20903,9 +20904,9 @@ S_regdump_extflags(pTHX_ const char *lead, const U32 flags)
for (bit=0; bit<REG_EXTFLAGS_NAME_SIZE; bit++) {
if (flags & (1<<bit)) {
- if ((1<<bit) & RXf_PMf_CHARSET) { /* Output separately, below */
- continue;
- }
+ if ((1<<bit) & RXf_PMf_CHARSET) { /* Output separately, below */
+ continue;
+ }
if (!set++ && lead)
Perl_re_printf( aTHX_ "%s", lead);
Perl_re_printf( aTHX_ "%s ", PL_reg_extflags_name[bit]);
@@ -20988,10 +20989,10 @@ Perl_regdump(pTHX_ const regexp *r)
if (r->check_substr || r->check_utf8)
Perl_re_printf( aTHX_
- (const char *)
- ( r->check_substr == r->substrs->data[1].substr
- && r->check_utf8 == r->substrs->data[1].utf8_substr
- ? "(checking floating" : "(checking anchored"));
+ (const char *)
+ ( r->check_substr == r->substrs->data[1].substr
+ && r->check_utf8 == r->substrs->data[1].utf8_substr
+ ? "(checking floating" : "(checking anchored"));
if (r->intflags & PREGf_NOSCAN)
Perl_re_printf( aTHX_ " noscan");
if (r->extflags & RXf_CHECK_ALL)
@@ -21112,29 +21113,29 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
k = PL_regkind[OP(o)];
if (k == EXACT) {
- sv_catpvs(sv, " ");
- /* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
- * is a crude hack but it may be the best for now since
- * we have no flag "this EXACTish node was UTF-8"
- * --jhi */
- pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
+ sv_catpvs(sv, " ");
+ /* Using is_utf8_string() (via PERL_PV_UNI_DETECT)
+ * is a crude hack but it may be the best for now since
+ * we have no flag "this EXACTish node was UTF-8"
+ * --jhi */
+ pv_pretty(sv, STRING(o), STR_LEN(o), PL_dump_re_max_len,
PL_colors[0], PL_colors[1],
- PERL_PV_ESCAPE_UNI_DETECT |
- PERL_PV_ESCAPE_NONASCII |
- PERL_PV_PRETTY_ELLIPSES |
- PERL_PV_PRETTY_LTGT |
- PERL_PV_PRETTY_NOCLEAR
- );
+ PERL_PV_ESCAPE_UNI_DETECT |
+ PERL_PV_ESCAPE_NONASCII |
+ PERL_PV_PRETTY_ELLIPSES |
+ PERL_PV_PRETTY_LTGT |
+ PERL_PV_PRETTY_NOCLEAR
+ );
} else if (k == TRIE) {
- /* print the details of the trie in dumpuntil instead, as
- * progi->data isn't available here */
+ /* print the details of the trie in dumpuntil instead, as
+ * progi->data isn't available here */
const char op = OP(o);
const U32 n = ARG(o);
const reg_ac_data * const ac = IS_TRIE_AC(op) ?
(reg_ac_data *)progi->data->data[n] :
NULL;
const reg_trie_data * const trie
- = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
+ = (reg_trie_data*)progi->data->data[!IS_TRIE_AC(op) ? n : ac->trie];
Perl_sv_catpvf(aTHX_ sv, "-%s", PL_reg_name[o->flags]);
DEBUG_TRIE_COMPILE_r({
@@ -21167,8 +21168,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
}
} else if (k == CURLY) {
U32 lo = ARG1(o), hi = ARG2(o);
- if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
- Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
+ if (OP(o) == CURLYM || OP(o) == CURLYN || OP(o) == CURLYX)
+ Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* Parenth number */
Perl_sv_catpvf(aTHX_ sv, "{%u,", (unsigned) lo);
if (hi == REG_INFTY)
sv_catpvs(sv, "INFTY");
@@ -21177,14 +21178,14 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
sv_catpvs(sv, "}");
}
else if (k == WHILEM && o->flags) /* Ordinal/of */
- Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
+ Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
else if (k == REF || k == OPEN || k == CLOSE
|| k == GROUPP || OP(o)==ACCEPT)
{
AV *name_list= NULL;
U32 parno= OP(o) == ACCEPT ? (U32)ARG2L(o) : ARG(o);
Perl_sv_catpvf(aTHX_ sv, "%" UVuf, (UV)parno); /* Parenth number */
- if ( RXp_PAREN_NAMES(prog) ) {
+ if ( RXp_PAREN_NAMES(prog) ) {
name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
} else if ( pRExC_state ) {
name_list= RExC_paren_name_list;
@@ -21192,8 +21193,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
if (name_list) {
if ( k != REF || (OP(o) < REFN)) {
SV **name= av_fetch(name_list, parno, 0 );
- if (name)
- Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
+ if (name)
+ Perl_sv_catpvf(aTHX_ sv, " '%" SVf "'", SVfARG(*name));
}
else {
SV *sv_dat= MUTABLE_SV(progi->data->data[ parno ]);
@@ -21242,7 +21243,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
}
else if (k == LOGICAL)
/* 2: embedded, otherwise 1 */
- Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
+ Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags);
else if (k == ANYOF || k == ANYOFR) {
U8 flags;
char * bitmap;
@@ -21274,7 +21275,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
arg = ARG(o);
}
- if (OP(o) == ANYOFL || OP(o) == ANYOFPOSIXL) {
+ if (OP(o) == ANYOFL || OP(o) == ANYOFPOSIXL) {
if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
sv_catpvs(sv, "{utf8-locale-reqd}");
}
@@ -21328,7 +21329,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
}
/* Ready to start outputting. First, the initial left bracket */
- Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+ Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
/* ANYOFH by definition doesn't have anything that will fit inside the
* bitmap; ANYOFR may or may not. */
@@ -21433,7 +21434,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
}
/* And finally the matching, closing ']' */
- Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+ Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
if (OP(o) == ANYOFHs) {
Perl_sv_catpvf(aTHX_ sv, " (Leading UTF-8 bytes=%s", _byte_dump_string((U8 *) ((struct regnode_anyofhs *) o)->string, FLAGS(o), 1));
@@ -21464,13 +21465,13 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
else if (k == ANYOFM) {
SV * cp_list = get_ANYOFM_contents(o);
- Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
+ Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
if (OP(o) == NANYOFM) {
_invlist_invert(cp_list);
}
put_charclass_bitmap_innards(sv, NULL, cp_list, NULL, NULL, 0, TRUE);
- Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
+ Perl_sv_catpvf(aTHX_ sv, "%s]", PL_colors[1]);
SvREFCNT_dec(cp_list);
}
@@ -21502,11 +21503,11 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
sv_catpv(sv, bounds[FLAGS(o)]);
}
else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH)) {
- Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
+ Perl_sv_catpvf(aTHX_ sv, "[%d", -(o->flags));
if (o->next_off) {
Perl_sv_catpvf(aTHX_ sv, "..-%d", o->flags - o->next_off);
}
- Perl_sv_catpvf(aTHX_ sv, "]");
+ Perl_sv_catpvf(aTHX_ sv, "]");
}
else if (OP(o) == SBOL)
Perl_sv_catpvf(aTHX_ sv, " /%s/", o->flags ? "\\A" : "^");
@@ -21546,22 +21547,22 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
PERL_UNUSED_CONTEXT;
DEBUG_COMPILE_r(
- {
+ {
if (prog->maxlen > 0) {
const char * const s = SvPV_nolen_const(RX_UTF8(r)
- ? prog->check_utf8 : prog->check_substr);
+ ? prog->check_utf8 : prog->check_substr);
if (!PL_colorset) reginitcolors();
Perl_re_printf( aTHX_
- "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
- PL_colors[4],
- RX_UTF8(r) ? "utf8 " : "",
- PL_colors[5], PL_colors[0],
- s,
- PL_colors[1],
- (strlen(s) > PL_dump_re_max_len ? "..." : ""));
+ "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
+ PL_colors[4],
+ RX_UTF8(r) ? "utf8 " : "",
+ PL_colors[5], PL_colors[0],
+ s,
+ PL_colors[1],
+ (strlen(s) > PL_dump_re_max_len ? "..." : ""));
}
- } );
+ } );
/* use UTF8 check substring if regexp pattern itself is in UTF8 */
return RX_UTF8(r) ? prog->check_utf8 : prog->check_substr;
@@ -21609,7 +21610,7 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
SvREFCNT_dec(r->substrs->data[i].substr);
SvREFCNT_dec(r->substrs->data[i].utf8_substr);
}
- Safefree(r->substrs);
+ Safefree(r->substrs);
}
RX_MATCH_COPY_FREE(rx);
#ifdef PERL_ANY_COW
@@ -21656,7 +21657,7 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
PERL_ARGS_ASSERT_REG_TEMP_COPY;
if (!dsv)
- dsv = (REGEXP*) newSV_type(SVt_REGEXP);
+ dsv = (REGEXP*) newSV_type(SVt_REGEXP);
else {
assert(SvTYPE(dsv) == SVt_REGEXP || (SvTYPE(dsv) == SVt_PVLV));
@@ -21673,22 +21674,22 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
}
SvLEN_set(dsv, 0);
SvCUR_set(dsv, 0);
- SvOK_off((SV *)dsv);
+ SvOK_off((SV *)dsv);
- if (islv) {
- /* For PVLVs, the head (sv_any) points to an XPVLV, while
+ if (islv) {
+ /* For PVLVs, the head (sv_any) points to an XPVLV, while
* the LV's xpvlenu_rx will point to a regexp body, which
* we allocate here */
- REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
- assert(!SvPVX(dsv));
+ REGEXP *temp = (REGEXP *)newSV_type(SVt_REGEXP);
+ assert(!SvPVX(dsv));
((XPV*)SvANY(dsv))->xpv_len_u.xpvlenu_rx = temp->sv_any;
- temp->sv_any = NULL;
- SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
- SvREFCNT_dec_NN(temp);
- /* SvCUR still resides in the xpvlv struct, so the regexp copy-
- ing below will not set it. */
- SvCUR_set(dsv, SvCUR(ssv));
- }
+ temp->sv_any = NULL;
+ SvFLAGS(temp) = (SvFLAGS(temp) & ~SVTYPEMASK) | SVt_NULL;
+ SvREFCNT_dec_NN(temp);
+ /* SvCUR still resides in the xpvlv struct, so the regexp copy-
+ ing below will not set it. */
+ SvCUR_set(dsv, SvCUR(ssv));
+ }
}
/* This ensures that SvTHINKFIRST(sv) is true, and hence that
sv_force_normal(sv) is called. */
@@ -21702,7 +21703,7 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
The string pointer is copied here, being part of the regexp struct.
*/
memcpy(&(drx->xpv_cur), &(srx->xpv_cur),
- sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
+ sizeof(regexp) - STRUCT_OFFSET(regexp, xpv_cur));
if (!islv)
SvLEN_set(dsv, 0);
if (srx->offs) {
@@ -21713,15 +21714,15 @@ Perl_reg_temp_copy(pTHX_ REGEXP *dsv, REGEXP *ssv)
if (srx->substrs) {
int i;
Newx(drx->substrs, 1, struct reg_substr_data);
- StructCopy(srx->substrs, drx->substrs, struct reg_substr_data);
+ StructCopy(srx->substrs, drx->substrs, struct reg_substr_data);
for (i = 0; i < 2; i++) {
SvREFCNT_inc_void(drx->substrs->data[i].substr);
SvREFCNT_inc_void(drx->substrs->data[i].utf8_substr);
}
- /* check_substr and check_utf8, if non-NULL, point to either their
- anchored or float namesakes, and don't hold a second reference. */
+ /* check_substr and check_utf8, if non-NULL, point to either their
+ anchored or float namesakes, and don't hold a second reference. */
}
RX_MATCH_COPIED_off(dsv);
#ifdef PERL_ANY_COW
@@ -21763,10 +21764,10 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
}
DEBUG_COMPILE_r({
- if (!PL_colorset)
- reginitcolors();
- {
- SV *dsv= sv_newmortal();
+ if (!PL_colorset)
+ reginitcolors();
+ {
+ SV *dsv= sv_newmortal();
RE_PV_QUOTED_DECL(s, RX_UTF8(rx),
dsv, RX_PRECOMP(rx), RX_PRELEN(rx), PL_dump_re_max_len);
Perl_re_printf( aTHX_ "%sFreeing REx:%s %s\n",
@@ -21782,24 +21783,24 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
S_free_codeblocks(aTHX_ ri->code_blocks);
if (ri->data) {
- int n = ri->data->count;
+ int n = ri->data->count;
- while (--n >= 0) {
+ while (--n >= 0) {
/* If you add a ->what type here, update the comment in regcomp.h */
- switch (ri->data->what[n]) {
- case 'a':
- case 'r':
- case 's':
- case 'S':
- case 'u':
- SvREFCNT_dec(MUTABLE_SV(ri->data->data[n]));
- break;
- case 'f':
- Safefree(ri->data->data[n]);
- break;
- case 'l':
- case 'L':
- break;
+ switch (ri->data->what[n]) {
+ case 'a':
+ case 'r':
+ case 's':
+ case 'S':
+ case 'u':
+ SvREFCNT_dec(MUTABLE_SV(ri->data->data[n]));
+ break;
+ case 'f':
+ Safefree(ri->data->data[n]);
+ break;
+ case 'l':
+ case 'L':
+ break;
case 'T':
{ /* Aho Corasick add-on structure for a trie node.
Used in stclass optimization only */
@@ -21811,7 +21812,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
if ( !refcount ) {
PerlMemShared_free(aho->states);
PerlMemShared_free(aho->fail);
- /* do this last!!!! */
+ /* do this last!!!! */
PerlMemShared_free(ri->data->data[n]);
/* we should only ever get called once, so
* assert as much, and also guard the free
@@ -21826,11 +21827,11 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
}
}
break;
- case 't':
- {
- /* trie structure. */
- U32 refcount;
- reg_trie_data *trie=(reg_trie_data*)ri->data->data[n];
+ case 't':
+ {
+ /* trie structure. */
+ U32 refcount;
+ reg_trie_data *trie=(reg_trie_data*)ri->data->data[n];
OP_REFCNT_LOCK;
refcount = --trie->refcount;
OP_REFCNT_UNLOCK;
@@ -21842,19 +21843,19 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
PerlMemShared_free(trie->bitmap);
if (trie->jump)
PerlMemShared_free(trie->jump);
- PerlMemShared_free(trie->wordinfo);
+ PerlMemShared_free(trie->wordinfo);
/* do this last!!!! */
PerlMemShared_free(ri->data->data[n]);
- }
- }
- break;
- default:
- Perl_croak(aTHX_ "panic: regfree data code '%c'",
+ }
+ }
+ break;
+ default:
+ Perl_croak(aTHX_ "panic: regfree data code '%c'",
ri->data->what[n]);
- }
- }
- Safefree(ri->data->what);
- Safefree(ri->data);
+ }
+ }
+ Safefree(ri->data->what);
+ Safefree(ri->data);
}
Safefree(ri);
@@ -21896,15 +21897,15 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
Copy(r->offs, ret->offs, npar, regexp_paren_pair);
if (ret->substrs) {
- /* Do it this way to avoid reading from *r after the StructCopy().
- That way, if any of the sv_dup_inc()s dislodge *r from the L1
- cache, it doesn't matter. */
+ /* Do it this way to avoid reading from *r after the StructCopy().
+ That way, if any of the sv_dup_inc()s dislodge *r from the L1
+ cache, it doesn't matter. */
int i;
- const bool anchored = r->check_substr
- ? r->check_substr == r->substrs->data[0].substr
- : r->check_utf8 == r->substrs->data[0].utf8_substr;
+ const bool anchored = r->check_substr
+ ? r->check_substr == r->substrs->data[0].substr
+ : r->check_utf8 == r->substrs->data[0].utf8_substr;
Newx(ret->substrs, 1, struct reg_substr_data);
- StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
+ StructCopy(r->substrs, ret->substrs, struct reg_substr_data);
for (i = 0; i < 2; i++) {
ret->substrs->data[i].substr =
@@ -21913,29 +21914,29 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
sv_dup_inc(ret->substrs->data[i].utf8_substr, param);
}
- /* check_substr and check_utf8, if non-NULL, point to either their
- anchored or float namesakes, and don't hold a second reference. */
+ /* check_substr and check_utf8, if non-NULL, point to either their
+ anchored or float namesakes, and don't hold a second reference. */
- if (ret->check_substr) {
- if (anchored) {
- assert(r->check_utf8 == r->substrs->data[0].utf8_substr);
+ if (ret->check_substr) {
+ if (anchored) {
+ assert(r->check_utf8 == r->substrs->data[0].utf8_substr);
- ret->check_substr = ret->substrs->data[0].substr;
- ret->check_utf8 = ret->substrs->data[0].utf8_substr;
- } else {
- assert(r->check_substr == r->substrs->data[1].substr);
- assert(r->check_utf8 == r->substrs->data[1].utf8_substr);
+ ret->check_substr = ret->substrs->data[0].substr;
+ ret->check_utf8 = ret->substrs->data[0].utf8_substr;
+ } else {
+ assert(r->check_substr == r->substrs->data[1].substr);
+ assert(r->check_utf8 == r->substrs->data[1].utf8_substr);
- ret->check_substr = ret->substrs->data[1].substr;
- ret->check_utf8 = ret->substrs->data[1].utf8_substr;
- }
- } else if (ret->check_utf8) {
- if (anchored) {
- ret->check_utf8 = ret->substrs->data[0].utf8_substr;
- } else {
- ret->check_utf8 = ret->substrs->data[1].utf8_substr;
- }
- }
+ ret->check_substr = ret->substrs->data[1].substr;
+ ret->check_utf8 = ret->substrs->data[1].utf8_substr;
+ }
+ } else if (ret->check_utf8) {
+ if (anchored) {
+ ret->check_utf8 = ret->substrs->data[0].utf8_substr;
+ } else {
+ ret->check_utf8 = ret->substrs->data[1].utf8_substr;
+ }
+ }
}
RXp_PAREN_NAMES(ret) = hv_dup_inc(RXp_PAREN_NAMES(ret), param);
@@ -21944,12 +21945,12 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
Newx(ret->recurse_locinput, r->nparens + 1, char *);
if (ret->pprivate)
- RXi_SET(ret, CALLREGDUPE_PVT(dstr, param));
+ RXi_SET(ret, CALLREGDUPE_PVT(dstr, param));
if (RX_MATCH_COPIED(dstr))
- ret->subbeg = SAVEPVN(ret->subbeg, ret->sublen);
+ ret->subbeg = SAVEPVN(ret->subbeg, ret->sublen);
else
- ret->subbeg = NULL;
+ ret->subbeg = NULL;
#ifdef PERL_ANY_COW
ret->saved_copy = NULL;
#endif
@@ -21957,9 +21958,9 @@ Perl_re_dup_guts(pTHX_ const REGEXP *sstr, REGEXP *dstr, CLONE_PARAMS *param)
/* Whether mother_re be set or no, we need to copy the string. We
cannot refrain from copying it when the storage points directly to
our mother regexp, because that's
- 1: a buffer in a different thread
- 2: something we no longer hold a reference on
- so we need to copy it locally. */
+ 1: a buffer in a different thread
+ 2: something we no longer hold a reference on
+ so we need to copy it locally. */
RX_WRAPPED(dstr) = SAVEPVN(RX_WRAPPED_const(sstr), SvCUR(sstr)+1);
/* set malloced length to a non-zero value so it will be freed
* (otherwise in combination with SVf_FAKE it looks like an alien
@@ -22002,37 +22003,37 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
if (ri->code_blocks) {
- int n;
- Newx(reti->code_blocks, 1, struct reg_code_blocks);
- Newx(reti->code_blocks->cb, ri->code_blocks->count,
+ int n;
+ Newx(reti->code_blocks, 1, struct reg_code_blocks);
+ Newx(reti->code_blocks->cb, ri->code_blocks->count,
struct reg_code_block);
- Copy(ri->code_blocks->cb, reti->code_blocks->cb,
+ Copy(ri->code_blocks->cb, reti->code_blocks->cb,
ri->code_blocks->count, struct reg_code_block);
- for (n = 0; n < ri->code_blocks->count; n++)
- reti->code_blocks->cb[n].src_regex = (REGEXP*)
- sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param);
+ for (n = 0; n < ri->code_blocks->count; n++)
+ reti->code_blocks->cb[n].src_regex = (REGEXP*)
+ sv_dup_inc((SV*)(ri->code_blocks->cb[n].src_regex), param);
reti->code_blocks->count = ri->code_blocks->count;
reti->code_blocks->refcnt = 1;
}
else
- reti->code_blocks = NULL;
+ reti->code_blocks = NULL;
reti->regstclass = NULL;
if (ri->data) {
- struct reg_data *d;
+ struct reg_data *d;
const int count = ri->data->count;
- int i;
+ int i;
- Newxc(d, sizeof(struct reg_data) + count*sizeof(void *),
- char, struct reg_data);
- Newx(d->what, count, U8);
+ Newxc(d, sizeof(struct reg_data) + count*sizeof(void *),
+ char, struct reg_data);
+ Newx(d->what, count, U8);
- d->count = count;
- for (i = 0; i < count; i++) {
- d->what[i] = ri->data->what[i];
- switch (d->what[i]) {
- /* see also regcomp.h and regfree_internal() */
+ d->count = count;
+ for (i = 0; i < count; i++) {
+ d->what[i] = ri->data->what[i];
+ switch (d->what[i]) {
+ /* see also regcomp.h and regfree_internal() */
case 'a': /* actually an AV, but the dup function is identical.
values seem to be "plain sv's" generally. */
case 'r': /* a compiled regex (but still just another SV) */
@@ -22042,9 +22043,9 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
case 'S': /* actually an SV, but the dup function is identical. */
case 'u': /* actually an HV, but the dup function is identical.
values are "plain sv's" */
- d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
- break;
- case 'f':
+ d->data[i] = sv_dup_inc((const SV *)ri->data->data[i], param);
+ break;
+ case 'f':
/* Synthetic Start Class - "Fake" charclass we generate to optimize
* patterns which could start with several different things. Pre-TRIE
* this was more important than it is now, however this still helps
@@ -22052,40 +22053,40 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
* to [xa]. This is used by Perl_re_intuit_start() and S_find_byclass()
* in regexec.c
*/
- /* This is cheating. */
- Newx(d->data[i], 1, regnode_ssc);
- StructCopy(ri->data->data[i], d->data[i], regnode_ssc);
- reti->regstclass = (regnode*)d->data[i];
- break;
- case 'T':
+ /* This is cheating. */
+ Newx(d->data[i], 1, regnode_ssc);
+ StructCopy(ri->data->data[i], d->data[i], regnode_ssc);
+ reti->regstclass = (regnode*)d->data[i];
+ break;
+ case 'T':
/* AHO-CORASICK fail table */
/* Trie stclasses are readonly and can thus be shared
- * without duplication. We free the stclass in pregfree
- * when the corresponding reg_ac_data struct is freed.
- */
- reti->regstclass= ri->regstclass;
- /* FALLTHROUGH */
- case 't':
+ * without duplication. We free the stclass in pregfree
+ * when the corresponding reg_ac_data struct is freed.
+ */
+ reti->regstclass= ri->regstclass;
+ /* FALLTHROUGH */
+ case 't':
/* TRIE transition table */
- OP_REFCNT_LOCK;
- ((reg_trie_data*)ri->data->data[i])->refcount++;
- OP_REFCNT_UNLOCK;
- /* FALLTHROUGH */
+ OP_REFCNT_LOCK;
+ ((reg_trie_data*)ri->data->data[i])->refcount++;
+ OP_REFCNT_UNLOCK;
+ /* FALLTHROUGH */
case 'l': /* (?{...}) or (??{ ... }) code (cb->block) */
case 'L': /* same when RExC_pm_flags & PMf_HAS_CV and code
is not from another regexp */
- d->data[i] = ri->data->data[i];
- break;
+ d->data[i] = ri->data->data[i];
+ break;
default:
Perl_croak(aTHX_ "panic: re_dup_guts unknown data code '%c'",
ri->data->what[i]);
- }
- }
+ }
+ }
- reti->data = d;
+ reti->data = d;
}
else
- reti->data = NULL;
+ reti->data = NULL;
reti->name_list_idx = ri->name_list_idx;
@@ -22114,16 +22115,16 @@ Perl_regnext(pTHX_ regnode *p)
I32 offset;
if (!p)
- return(NULL);
+ return(NULL);
if (OP(p) > REGNODE_MAX) { /* regnode.type is unsigned */
- Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d",
+ Perl_croak(aTHX_ "Corrupted regexp opcode %d > %d",
(int)OP(p), (int)REGNODE_MAX);
}
offset = (reg_off_by_arg[OP(p)] ? ARG(p) : NEXT_OFF(p));
if (offset == 0)
- return(NULL);
+ return(NULL);
return(p+offset);
}
@@ -22142,7 +22143,7 @@ S_re_croak(pTHX_ bool utf8, const char* pat,...)
PERL_ARGS_ASSERT_RE_CROAK;
if (len > 510)
- len = 510;
+ len = 510;
Copy(pat, buf, len , char);
buf[len] = '\n';
buf[len + 1] = '\0';
@@ -22151,7 +22152,7 @@ S_re_croak(pTHX_ bool utf8, const char* pat,...)
va_end(args);
message = SvPV_const(msv, len);
if (len > 512)
- len = 512;
+ len = 512;
Copy(message, buf, len , char);
/* len-1 to avoid \n */
Perl_croak(aTHX_ "%" UTF8f, UTF8fARG(utf8, len-1, buf));
@@ -22169,8 +22170,8 @@ Perl_save_re_context(pTHX)
/* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
if (PL_curpm) {
- const REGEXP * const rx = PM_GETRE(PL_curpm);
- if (rx)
+ const REGEXP * const rx = PM_GETRE(PL_curpm);
+ if (rx)
nparens = RX_NPARENS(rx);
}
@@ -22209,13 +22210,13 @@ S_put_code_point(pTHX_ SV *sv, UV c)
Perl_sv_catpvf(aTHX_ sv, "\\x{%04" UVXf "}", c);
}
else if (isPRINT(c)) {
- const char string = (char) c;
+ const char string = (char) c;
/* We use {phrase} as metanotation in the class, so also escape literal
* braces */
- if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}')
- sv_catpvs(sv, "\\");
- sv_catpvn(sv, &string, 1);
+ if (isBACKSLASHED_PUNCT(c) || c == '{' || c == '}')
+ sv_catpvs(sv, "\\");
+ sv_catpvn(sv, &string, 1);
}
else if (isMNEMONIC_CNTRL(c)) {
Perl_sv_catpvf(aTHX_ sv, "%s", cntrl_to_mnemonic((U8) c));
@@ -22782,10 +22783,10 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
|| ( SvCUR(inverted_display) + inverted_bias
< SvCUR(as_is_display) + as_is_bias)))
{
- sv_catsv(sv, inverted_display);
+ sv_catsv(sv, inverted_display);
}
else if (as_is_display) {
- sv_catsv(sv, as_is_display);
+ sv_catsv(sv, as_is_display);
}
SvREFCNT_dec(as_is_display);
@@ -22814,8 +22815,8 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv,
STATIC const regnode *
S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
- const regnode *last, const regnode *plast,
- SV* sv, I32 indent, U32 depth)
+ const regnode *last, const regnode *plast,
+ SV* sv, I32 indent, U32 depth)
{
U8 op = PSEUDO; /* Arbitrary non-END op. */
const regnode *next;
@@ -22836,25 +22837,25 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
while (PL_regkind[op] != END && (!last || node < last)) {
assert(node);
- /* While that wasn't END last time... */
- NODE_ALIGN(node);
- op = OP(node);
- if (op == CLOSE || op == SRCLOSE || op == WHILEM)
- indent--;
- next = regnext((regnode *)node);
-
- /* Where, what. */
- if (OP(node) == OPTIMIZED) {
- if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
- optstart = node;
- else
- goto after_print;
- } else
- CLEAR_OPTSTART;
+ /* While that wasn't END last time... */
+ NODE_ALIGN(node);
+ op = OP(node);
+ if (op == CLOSE || op == SRCLOSE || op == WHILEM)
+ indent--;
+ next = regnext((regnode *)node);
+
+ /* Where, what. */
+ if (OP(node) == OPTIMIZED) {
+ if (!optstart && RE_DEBUG_FLAG(RE_DEBUG_COMPILE_OPTIMISE))
+ optstart = node;
+ else
+ goto after_print;
+ } else
+ CLEAR_OPTSTART;
regprop(r, sv, node, NULL, NULL);
Perl_re_printf( aTHX_ "%4" IVdf ":%*s%s", (IV)(node - start),
- (int)(2*indent + 1), "", SvPVX_const(sv));
+ (int)(2*indent + 1), "", SvPVX_const(sv));
if (OP(node) != OPTIMIZED) {
if (next == NULL) /* Next ptr. */
@@ -22868,39 +22869,39 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
}
after_print:
- if (PL_regkind[(U8)op] == BRANCHJ) {
- assert(next);
- {
+ if (PL_regkind[(U8)op] == BRANCHJ) {
+ assert(next);
+ {
const regnode *nnode = (OP(next) == LONGJMP
? regnext((regnode *)next)
: next);
if (last && nnode > last)
nnode = last;
DUMPUNTIL(NEXTOPER(NEXTOPER(node)), nnode);
- }
- }
- else if (PL_regkind[(U8)op] == BRANCH) {
- assert(next);
- DUMPUNTIL(NEXTOPER(node), next);
- }
- else if ( PL_regkind[(U8)op] == TRIE ) {
- const regnode *this_trie = node;
- const char op = OP(node);
+ }
+ }
+ else if (PL_regkind[(U8)op] == BRANCH) {
+ assert(next);
+ DUMPUNTIL(NEXTOPER(node), next);
+ }
+ else if ( PL_regkind[(U8)op] == TRIE ) {
+ const regnode *this_trie = node;
+ const char op = OP(node);
const U32 n = ARG(node);
- const reg_ac_data * const ac = op>=AHOCORASICK ?
+ const reg_ac_data * const ac = op>=AHOCORASICK ?
(reg_ac_data *)ri->data->data[n] :
NULL;
- const reg_trie_data * const trie =
- (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
+ const reg_trie_data * const trie =
+ (reg_trie_data*)ri->data->data[op<AHOCORASICK ? n : ac->trie];
#ifdef DEBUGGING
- AV *const trie_words
+ AV *const trie_words
= MUTABLE_AV(ri->data->data[n + TRIE_WORDS_OFFSET]);
#endif
- const regnode *nextbranch= NULL;
- I32 word_idx;
+ const regnode *nextbranch= NULL;
+ I32 word_idx;
SvPVCLEAR(sv);
- for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
- SV ** const elem_ptr = av_fetch(trie_words, word_idx, 0);
+ for (word_idx= 0; word_idx < (I32)trie->wordcount; word_idx++) {
+ SV ** const elem_ptr = av_fetch(trie_words, word_idx, 0);
Perl_re_indentf( aTHX_ "%s ",
indent+3,
@@ -22923,41 +22924,41 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
if (dist) {
if (!nextbranch)
nextbranch= this_trie + trie->jump[0];
- DUMPUNTIL(this_trie + dist, nextbranch);
+ DUMPUNTIL(this_trie + dist, nextbranch);
}
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode *)nextbranch);
} else {
Perl_re_printf( aTHX_ "\n");
- }
- }
- if (last && next > last)
- node= last;
- else
- node= next;
- }
- else if ( op == CURLY ) { /* "next" might be very big: optimizer */
- DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS,
+ }
+ }
+ if (last && next > last)
+ node= last;
+ else
+ node= next;
+ }
+ else if ( op == CURLY ) { /* "next" might be very big: optimizer */
+ DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS,
NEXTOPER(node) + EXTRA_STEP_2ARGS + 1);
- }
- else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
- assert(next);
- DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS, next);
- }
- else if ( op == PLUS || op == STAR) {
- DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
- }
- else if (PL_regkind[(U8)op] == EXACT || op == ANYOFHs) {
+ }
+ else if (PL_regkind[(U8)op] == CURLY && op != CURLYX) {
+ assert(next);
+ DUMPUNTIL(NEXTOPER(node) + EXTRA_STEP_2ARGS, next);
+ }
+ else if ( op == PLUS || op == STAR) {
+ DUMPUNTIL(NEXTOPER(node), NEXTOPER(node) + 1);
+ }
+ else if (PL_regkind[(U8)op] == EXACT || op == ANYOFHs) {
/* Literal string, where present. */
- node += NODE_SZ_STR(node) - 1;
- node = NEXTOPER(node);
- }
- else {
- node = NEXTOPER(node);
- node += regarglen[(U8)op];
- }
- if (op == CURLYX || op == OPEN || op == SROPEN)
- indent++;
+ node += NODE_SZ_STR(node) - 1;
+ node = NEXTOPER(node);
+ }
+ else {
+ node = NEXTOPER(node);
+ node += regarglen[(U8)op];
+ }
+ if (op == CURLYX || op == OPEN || op == SROPEN)
+ indent++;
}
CLEAR_OPTSTART;
#ifdef DEBUG_DUMPUNTIL
@@ -23218,7 +23219,7 @@ S_compile_wildcard(pTHX_ const char * subpattern, const STRLEN len,
STATIC I32
S_execute_wildcard(pTHX_ REGEXP * const prog, char* stringarg, char *strend,
- char *strbeg, SSize_t minend, SV *screamer, U32 nosave)
+ char *strbeg, SSize_t minend, SV *screamer, U32 nosave)
{
I32 result;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
@@ -24912,7 +24913,7 @@ S_parse_uniprop_string(pTHX_
COPHH * hinthash = (IN_PERL_COMPILETIME)
? CopHINTHASH_get(&PL_compiling)
: CopHINTHASH_get(PL_curcop);
- SV * pu_overrides = cophh_fetch_pv(hinthash, "private_use", 0, 0);
+ SV * pu_overrides = cophh_fetch_pv(hinthash, "private_use", 0, 0);
if (UNLIKELY(pu_overrides && SvPOK(pu_overrides))) {