diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 1999-11-13 19:43:37 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 1999-11-13 19:43:37 +0000 |
commit | 374d691e102e2975cdf0ae9698b9f6e2ad73d91c (patch) | |
tree | 56c3b8007cc66f1082269085877737267776d457 | |
parent | c54e8273062a87ae6b235cfa92b11d4b2da434ab (diff) | |
parent | 81be85b825fc39c8f8e4c3fb4748df6a7fa34de4 (diff) | |
download | perl-374d691e102e2975cdf0ae9698b9f6e2ad73d91c.tar.gz |
Integrate with Sarathy.
p4raw-id: //depot/cfgperl@4576
-rw-r--r-- | ext/Errno/Errno_pm.PL | 2 | ||||
-rw-r--r-- | ext/IO/lib/IO/Socket.pm | 3 | ||||
-rw-r--r-- | regexec.c | 461 | ||||
-rwxr-xr-x | t/lib/dumper.t | 216 | ||||
-rw-r--r-- | t/op/re_tests | 2 | ||||
-rwxr-xr-x | t/op/regexp.t | 2 | ||||
-rwxr-xr-x | t/pragma/overload.t | 7 |
7 files changed, 402 insertions, 291 deletions
diff --git a/ext/Errno/Errno_pm.PL b/ext/Errno/Errno_pm.PL index 7f884a8eed..18260a9dca 100644 --- a/ext/Errno/Errno_pm.PL +++ b/ext/Errno/Errno_pm.PL @@ -182,7 +182,7 @@ use strict; "\$Config{'archname'}-\$Config{'osvers'}" eq "$Config{'archname'}-$Config{'osvers'}" or - die "Errno architecture ($Config{'archname'}-$Config{'osvers'}) does not match executable architecture (\$Config{'archname'-\$Config{'osvers'})"; + die "Errno architecture ($Config{'archname'}-$Config{'osvers'}) does not match executable architecture (\$Config{'archname'}-\$Config{'osvers'})"; \$VERSION = "$VERSION"; \@ISA = qw(Exporter); diff --git a/ext/IO/lib/IO/Socket.pm b/ext/IO/lib/IO/Socket.pm index 0e115a5633..01cdc40cce 100644 --- a/ext/IO/lib/IO/Socket.pm +++ b/ext/IO/lib/IO/Socket.pm @@ -169,8 +169,7 @@ sub accept { } $peer = accept($new,$sock) || undef; }; - croak "$@" - if ($@ and $fh); + croak "$@" if $@ and $sock; return wantarray ? defined $peer ? ($new, $peer) : () @@ -254,6 +254,9 @@ S_cache_re(pTHX_ regexp *prog) PL_reg_re = prog; } +static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend, + char *startpos, I32 norun); + /* * Need to implement the following flags for reg_anch: * @@ -275,6 +278,13 @@ S_cache_re(pTHX_ regexp *prog) /* XXXX We assume that strpos is strbeg unless sv. */ +/* XXXX Some places assume that there is a fixed substring. + An update may be needed if optimizer marks as "INTUITable" + RExen without fixed substrings. Similarly, it is assumed that + lengths of all the strings are no more than minlen, thus they + cannot come from lookahead. + (Or minlen should take into account lookahead.) */ + /* A failure to find a constant substring means that there is no need to make an expensive call to REx engine, thus we celebrate a failure. Similarly, finding a substring too deep into the string means that less calls to @@ -285,10 +295,14 @@ S_cache_re(pTHX_ regexp *prog) b) Fixed substring; c) Whether we are anchored (beginning-of-line or \G); d) First node (of those at offset 0) which may distingush positions; - We use 'a', 'b', multiline-part of 'c', and try to find a position in the + We use a)b)d) and multiline-part of c), and try to find a position in the string which does not contradict any of them. */ +/* Most of decisions we do here should have been done at compile time. + The nodes of the REx which we used for the search should have been + deleted from the finite automaton. */ + char * Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *strend, U32 flags, re_scream_pos_data *data) @@ -301,7 +315,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, char *t; I32 ml_anch; char *tmp; - register char *other_last = Nullch; + register char *other_last = Nullch; /* other substr checked before this */ + char *check_at; /* check substr found at this pos */ #ifdef DEBUGGING char *i_strpos = strpos; #endif @@ -432,6 +447,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, if (!s) goto fail_finish; + check_at = s; + /* Finish the diagnostic message */ DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) ); @@ -447,7 +464,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* Take into account the "other" substring. */ /* XXXX May be hopelessly wrong for UTF... */ if (!other_last) - other_last = strpos - 1; + other_last = strpos; if (check == prog->float_substr) { do_other_anchored: { @@ -465,8 +482,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, else t = strpos; t += prog->anchored_offset; - if (t <= other_last) - t = other_last + 1; + if (t < other_last) /* These positions already checked */ + t = other_last; PL_bostr = tmp; last2 = last1 = strend - prog->minlen; if (last < last1) @@ -495,7 +512,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, ", trying floating at offset %ld...\n", (long)(s1 + 1 - i_strpos))); PL_regeol = strend; /* Used in HOP() */ - other_last = last1 + prog->anchored_offset; + other_last = last1 + prog->anchored_offset + 1; s = HOPc(last, 1); goto restart; } @@ -503,7 +520,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", (long)(s - i_strpos))); t = s - prog->anchored_offset; - other_last = s - 1; + other_last = s + 1; s = s1; if (t == strpos) goto try_at_start; @@ -520,8 +537,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, if (last - t > prog->float_max_offset) last = t + prog->float_max_offset; s = t + prog->float_min_offset; - if (s <= other_last) - s = other_last + 1; + if (s < other_last) + s = other_last; /* XXXX It is not documented what units *_offsets are in. Assume bytes. */ /* fbm_instr() takes into account exact value of end-of-str if the check is SvTAIL(ed). Since false positives are OK, @@ -546,7 +563,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, DEBUG_r(PerlIO_printf(Perl_debug_log, ", trying anchored starting at offset %ld...\n", (long)(s1 + 1 - i_strpos))); - other_last = last; + other_last = last + 1; PL_regeol = strend; /* Used in HOP() */ s = HOPc(t, 1); goto restart; @@ -554,7 +571,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, else { DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n", (long)(s - i_strpos))); - other_last = s - 1; + other_last = s + 1; s = s1; if (t == strpos) goto try_at_start; @@ -652,6 +669,72 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, s = strpos; } + /* Last resort... */ + /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */ + if (prog->regstclass) { + /* minlen == 0 is possible if regstclass is \b or \B, + and the fixed substr is ''$. + Since minlen is already taken into account, s+1 is before strend; + accidentally, minlen >= 1 guaranties no false positives at s + 1 + even for \b or \B. But (minlen? 1 : 0) below assumes that + regstclass does not come from lookahead... */ + /* If regstclass takes bytelength more than 1: If charlength==1, OK. + This leaves EXACTF only, which is dealt with in find_byclass(). */ + char *endpos = (prog->anchored_substr || ml_anch) + ? s + (prog->minlen? 1 : 0) + : (prog->float_substr ? check_at - start_shift + 1 + : strend) ; + char *startpos = sv ? strend - SvCUR(sv) : s; + + t = s; + s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1); + if (!s) { +#ifdef DEBUGGING + char *what; +#endif + if (endpos == strend) { + DEBUG_r( PerlIO_printf(Perl_debug_log, + "Could not match STCLASS...\n") ); + goto fail; + } + /* Contradict one of substrings */ + if (prog->anchored_substr) { + DEBUG_r( PerlIO_printf(Perl_debug_log, + "This position contradicts STCLASS...\n") ); + if (prog->anchored_substr == check) { + DEBUG_r( what = "anchored" ); + hop_and_restart: + PL_regeol = strend; /* Used in HOP() */ + s = HOPc(t, 1); + DEBUG_r( PerlIO_printf(Perl_debug_log, + "trying %s substr starting at offset %ld...\n", + what, (long)(s + start_shift - i_strpos)) ); + goto restart; + } + /* Have both, check is floating */ + if (t + start_shift >= check_at) /* Contradicts floating=check */ + goto retry_floating_check; + /* Recheck anchored substring, but not floating... */ + s = check_at; + DEBUG_r( PerlIO_printf(Perl_debug_log, + "trying anchored substr starting at offset %ld...\n", + (long)(other_last - i_strpos)) ); + goto do_other_anchored; + } + /* Check is floating subtring. */ + retry_floating_check: + t = check_at - start_shift; + DEBUG_r( what = "floating" ); + goto hop_and_restart; + } + DEBUG_r( if (t != s) + PerlIO_printf(Perl_debug_log, + "By STCLASS: moving %ld --> %ld\n", + (long)(t - i_strpos), (long)(s - i_strpos)); + else + PerlIO_printf(Perl_debug_log, + "Does not contradict STCLASS...\n") ); + } DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n", PL_colors[4], PL_colors[5], (long)(s - i_strpos)) ); return s; @@ -983,7 +1066,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * && (ln == 1 || (OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) - && regtry(prog, s) ) + && (norun || regtry(prog, s)) ) goto got_it; s++; } @@ -993,7 +1076,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * && (ln == 1 || (OP(c) == EXACTF ? ibcmp(s, m, ln) : ibcmp_locale(s, m, ln))) - && regtry(prog, s) ) + && (norun || regtry(prog, s)) ) goto got_it; s++; } @@ -1003,32 +1086,24 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUND: - if (minlen) { - dontbother++; - strend -= 1; - } - tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev; + tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) { tmp = !tmp; - if (regtry(prog, s)) + if ((norun || regtry(prog, s))) goto got_it; } s++; } - if ((minlen || tmp) && regtry(prog,s)) + if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) goto got_it; break; case BOUNDLUTF8: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case BOUNDUTF8: - if (minlen) { - dontbother++; - strend = reghop_c(strend, -1); - } - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev; + tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n'; tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == BOUND ? @@ -1036,60 +1111,54 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * isALNUM_LC_utf8((U8*)s))) { tmp = !tmp; - if (regtry(prog, s)) + if ((norun || regtry(prog, s))) goto got_it; } s += UTF8SKIP(s); } - if ((minlen || tmp) && regtry(prog,s)) + if ((!prog->minlen && tmp) && (norun || regtry(prog, s))) goto got_it; break; case NBOUNDL: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUND: - if (minlen) { - dontbother++; - strend -= 1; - } - tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev; + tmp = (s != startpos) ? UCHARAT(s - 1) : '\n'; tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s))) tmp = !tmp; - else if (regtry(prog, s)) + else if ((norun || regtry(prog, s))) goto got_it; s++; } - if ((minlen || !tmp) && regtry(prog,s)) + if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) goto got_it; break; case NBOUNDLUTF8: PL_reg_flags |= RF_tainted; /* FALL THROUGH */ case NBOUNDUTF8: - if (minlen) { - dontbother++; + if (prog->minlen) strend = reghop_c(strend, -1); - } - tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev; + tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n'; tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0); while (s < strend) { if (tmp == !(OP(c) == NBOUND ? swash_fetch(PL_utf8_alnum, (U8*)s) : isALNUM_LC_utf8((U8*)s))) tmp = !tmp; - else if (regtry(prog, s)) + else if ((norun || regtry(prog, s))) goto got_it; s += UTF8SKIP(s); } - if ((minlen || !tmp) && regtry(prog,s)) + if ((!prog->minlen && !tmp) && (norun || regtry(prog, s))) goto got_it; break; case ALNUM: while (s < strend) { if (isALNUM(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1102,7 +1171,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case ALNUMUTF8: while (s < strend) { if (swash_fetch(PL_utf8_alnum, (U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1116,7 +1185,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isALNUM_LC(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1130,7 +1199,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isALNUM_LC_utf8((U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1143,7 +1212,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NALNUM: while (s < strend) { if (!isALNUM(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1156,7 +1225,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NALNUMUTF8: while (s < strend) { if (!swash_fetch(PL_utf8_alnum, (U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1170,7 +1239,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isALNUM_LC(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1184,7 +1253,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isALNUM_LC_utf8((U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1197,7 +1266,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case SPACE: while (s < strend) { if (isSPACE(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1210,7 +1279,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case SPACEUTF8: while (s < strend) { if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1224,7 +1293,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isSPACE_LC(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1238,7 +1307,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1251,7 +1320,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NSPACE: while (s < strend) { if (!isSPACE(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1264,7 +1333,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NSPACEUTF8: while (s < strend) { if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1278,7 +1347,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isSPACE_LC(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1292,7 +1361,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1305,7 +1374,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case DIGIT: while (s < strend) { if (isDIGIT(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1318,7 +1387,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case DIGITUTF8: while (s < strend) { if (swash_fetch(PL_utf8_digit,(U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1332,7 +1401,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isDIGIT_LC(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1346,7 +1415,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (isDIGIT_LC_utf8((U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1359,7 +1428,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NDIGIT: while (s < strend) { if (!isDIGIT(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1372,7 +1441,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * case NDIGITUTF8: while (s < strend) { if (!swash_fetch(PL_utf8_digit,(U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1386,7 +1455,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isDIGIT_LC(*s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1400,7 +1469,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_flags |= RF_tainted; while (s < strend) { if (!isDIGIT_LC_utf8((U8*)s)) { - if (tmp && regtry(prog, s)) + if (tmp && (norun || regtry(prog, s))) goto got_it; else tmp = doevery; @@ -1414,6 +1483,270 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c)); break; } + return 0; + got_it: + return s; +} + +/* + - regexec_flags - match a regexp against a string + */ +I32 +Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend, + char *strbeg, I32 minend, SV *sv, void *data, U32 flags) +/* strend: pointer to null at end of string */ +/* strbeg: real beginning of string */ +/* minend: end of match must be >=minend after stringarg. */ +/* data: May be used for some additional optimizations. */ +/* nosave: For optimizations. */ +{ + dTHR; + register char *s; + register regnode *c; + register char *startpos = stringarg; + register I32 tmp; + I32 minlen; /* must match at least this many chars */ + I32 dontbother = 0; /* how many characters not to try at end */ + I32 start_shift = 0; /* Offset of the start to find + constant substr. */ /* CC */ + I32 end_shift = 0; /* Same for the end. */ /* CC */ + I32 scream_pos = -1; /* Internal iterator of scream. */ + char *scream_olds; + SV* oreplsv = GvSV(PL_replgv); + + PL_regcc = 0; + + cache_re(prog); +#ifdef DEBUGGING + PL_regnarrate = PL_debug & 512; +#endif + + /* Be paranoid... */ + if (prog == NULL || startpos == NULL) { + Perl_croak(aTHX_ "NULL regexp parameter"); + return 0; + } + + minlen = prog->minlen; + if (strend - startpos < minlen) goto phooey; + + if (startpos == strbeg) /* is ^ valid at stringarg? */ + PL_regprev = '\n'; + else { + PL_regprev = (U32)stringarg[-1]; + if (!PL_multiline && PL_regprev == '\n') + PL_regprev = '\0'; /* force ^ to NOT match */ + } + + /* Check validity of program. */ + if (UCHARAT(prog->program) != REG_MAGIC) { + Perl_croak(aTHX_ "corrupted regexp program"); + } + + PL_reg_flags = 0; + PL_reg_eval_set = 0; + PL_reg_maxiter = 0; + + if (prog->reganch & ROPT_UTF8) + PL_reg_flags |= RF_utf8; + + /* Mark beginning of line for ^ and lookbehind. */ + PL_regbol = startpos; + PL_bostr = strbeg; + PL_reg_sv = sv; + + /* Mark end of line for $ (and such) */ + PL_regeol = strend; + + /* see how far we have to get to not match where we matched before */ + PL_regtill = startpos+minend; + + /* We start without call_cc context. */ + PL_reg_call_cc = 0; + + /* If there is a "must appear" string, look for it. */ + s = startpos; + + if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */ + MAGIC *mg; + + if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ + PL_reg_ganch = startpos; + else if (sv && SvTYPE(sv) >= SVt_PVMG + && SvMAGIC(sv) + && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) { + PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */ + if (prog->reganch & ROPT_ANCH_GPOS) { + if (s > PL_reg_ganch) + goto phooey; + s = PL_reg_ganch; + } + } + else /* pos() not defined */ + PL_reg_ganch = strbeg; + } + + if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) { + re_scream_pos_data d; + + d.scream_olds = &scream_olds; + d.scream_pos = &scream_pos; + s = re_intuit_start(prog, sv, s, strend, flags, &d); + if (!s) + goto phooey; /* not present */ + } + + DEBUG_r( if (!PL_colorset) reginitcolors() ); + DEBUG_r(PerlIO_printf(Perl_debug_log, + "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n", + PL_colors[4],PL_colors[5],PL_colors[0], + prog->precomp, + PL_colors[1], + (strlen(prog->precomp) > 60 ? "..." : ""), + PL_colors[0], + (strend - startpos > 60 ? 60 : strend - startpos), + startpos, PL_colors[1], + (strend - startpos > 60 ? "..." : "")) + ); + + /* Simplest case: anchored match need be tried only once. */ + /* [unless only anchor is BOL and multiline is set] */ + if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { + if (s == startpos && regtry(prog, startpos)) + goto got_it; + else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT) + || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ + { + char *end; + + if (minlen) + dontbother = minlen - 1; + end = HOPc(strend, -dontbother) - 1; + /* for multiline we only have to try after newlines */ + if (prog->check_substr) { + if (s == startpos) + goto after_try; + while (1) { + if (regtry(prog, s)) + goto got_it; + after_try: + if (s >= end) + goto phooey; + s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); + if (!s) + goto phooey; + } + } else { + if (s > startpos) + s--; + while (s < end) { + if (*s++ == '\n') { /* don't need PL_utf8skip here */ + if (regtry(prog, s)) + goto got_it; + } + } + } + } + goto phooey; + } else if (prog->reganch & ROPT_ANCH_GPOS) { + if (regtry(prog, PL_reg_ganch)) + goto got_it; + goto phooey; + } + + /* Messy cases: unanchored match. */ + if (prog->anchored_substr && prog->reganch & ROPT_SKIP) { + /* we have /x+whatever/ */ + /* it must be a one character string (XXXX Except UTF?) */ + char ch = SvPVX(prog->anchored_substr)[0]; + if (UTF) { + while (s < strend) { + if (*s == ch) { + if (regtry(prog, s)) goto got_it; + s += UTF8SKIP(s); + while (s < strend && *s == ch) + s += UTF8SKIP(s); + } + s += UTF8SKIP(s); + } + } + else { + while (s < strend) { + if (*s == ch) { + if (regtry(prog, s)) goto got_it; + s++; + while (s < strend && *s == ch) + s++; + } + s++; + } + } + } + /*SUPPRESS 560*/ + else if (prog->anchored_substr != Nullsv + || (prog->float_substr != Nullsv + && prog->float_max_offset < strend - s)) { + SV *must = prog->anchored_substr + ? prog->anchored_substr : prog->float_substr; + I32 back_max = + prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset; + I32 back_min = + prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset; + I32 delta = back_max - back_min; + char *last = HOPc(strend, /* Cannot start after this */ + -(I32)(CHR_SVLEN(must) + - (SvTAIL(must) != 0) + back_min)); + char *last1; /* Last position checked before */ + + if (s > PL_bostr) + last1 = HOPc(s, -1); + else + last1 = s - 1; /* bogus */ + + /* XXXX check_substr already used to find `s', can optimize if + check_substr==must. */ + scream_pos = -1; + dontbother = end_shift; + strend = HOPc(strend, -dontbother); + while ( (s <= last) && + ((flags & REXEC_SCREAM) + ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg, + end_shift, &scream_pos, 0)) + : (s = fbm_instr((unsigned char*)HOP(s, back_min), + (unsigned char*)strend, must, + PL_multiline ? FBMrf_MULTILINE : 0))) ) { + if (HOPc(s, -back_max) > last1) { + last1 = HOPc(s, -back_min); + s = HOPc(s, -back_max); + } + else { + char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1; + + last1 = HOPc(s, -back_min); + s = t; + } + if (UTF) { + while (s <= last1) { + if (regtry(prog, s)) + goto got_it; + s += UTF8SKIP(s); + } + } + else { + while (s <= last1) { + if (regtry(prog, s)) + goto got_it; + s++; + } + } + } + goto phooey; + } + else if (c = prog->regstclass) { + if (minlen) /* don't bother with what can't match */ + strend = HOPc(strend, -(minlen - 1)); + if (find_byclass(prog, c, s, strend, startpos, 0)) + goto got_it; } else { dontbother = 0; diff --git a/t/lib/dumper.t b/t/lib/dumper.t index 505051f216..9130d1c690 100755 --- a/t/lib/dumper.t +++ b/t/lib/dumper.t @@ -9,8 +9,6 @@ BEGIN { } use Data::Dumper; -use Config; -my $Is_ebcdic = defined($Config{'ebcdic'}) && $Config{'ebcdic'} eq 'define'; $Data::Dumper::Pad = "#"; my $TMAX; @@ -240,20 +238,11 @@ EOT ############# 43 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #$VAR1 = { # "abc\0'\efg" => "mno\0" #}; EOT -} -else { -$WANT = <<"EOT"; -#\$VAR1 = { -# "\\201\\202\\203\\340\\360'\e\\206\\207" => "\\224\\225\\226\\340\\360" -#}; -EOT -} $foo = { "abc\000\'\efg" => "mno\000" }; { @@ -288,7 +277,6 @@ EOT ############# 49 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #$foo = \*::foo; #*::foo = \5; @@ -313,33 +301,6 @@ if (!$Is_ebcdic) { #@bar = @{*::foo{ARRAY}}; #%baz = %{*::foo{ARRAY}->[2]}; EOT -} -else { - $WANT = <<'EOT'; -#$foo = \*::foo; -#*::foo = \5; -#*::foo = [ -# #0 -# 10, -# #1 -# '', -# #2 -# { -# 'd' => {}, -# 'a' => 1, -# 'b' => '', -# 'c' => [] -# } -# ]; -#*::foo{ARRAY}->[1] = $foo; -#*::foo{ARRAY}->[2]{'d'} = *::foo{ARRAY}->[2]; -#*::foo{ARRAY}->[2]{'b'} = *::foo{SCALAR}; -#*::foo{ARRAY}->[2]{'c'} = *::foo{ARRAY}; -#*::foo = *::foo{ARRAY}->[2]; -#@bar = @{*::foo{ARRAY}}; -#%baz = %{*::foo{ARRAY}->[2]}; -EOT -} $Data::Dumper::Purity = 1; $Data::Dumper::Indent = 3; @@ -348,7 +309,6 @@ EOT ############# 55 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #$foo = \*::foo; #*::foo = \5; @@ -370,30 +330,6 @@ if (!$Is_ebcdic) { #$bar = *::foo{ARRAY}; #$baz = *::foo{ARRAY}->[2]; EOT -} -else { - $WANT = <<'EOT'; -#$foo = \*::foo; -#*::foo = \5; -#*::foo = [ -# 10, -# '', -# { -# 'd' => {}, -# 'a' => 1, -# 'b' => '', -# 'c' => [] -# } -#]; -#*::foo{ARRAY}->[1] = $foo; -#*::foo{ARRAY}->[2]{'d'} = *::foo{ARRAY}->[2]; -#*::foo{ARRAY}->[2]{'b'} = *::foo{SCALAR}; -#*::foo{ARRAY}->[2]{'c'} = *::foo{ARRAY}; -#*::foo = *::foo{ARRAY}->[2]; -#$bar = *::foo{ARRAY}; -#$baz = *::foo{ARRAY}->[2]; -EOT -} $Data::Dumper::Indent = 1; TEST q(Data::Dumper->Dump([\\*foo, \\@foo, \\%foo], ['foo', 'bar', 'baz'])); @@ -401,7 +337,6 @@ EOT ############# 61 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #@bar = ( # 10, @@ -423,37 +358,12 @@ if (!$Is_ebcdic) { #%baz = %{*::foo{HASH}}; #$foo = $bar[1]; EOT -} -else { - $WANT = <<'EOT'; -#@bar = ( -# 10, -# \*::foo, -# {} -#); -#*::foo = \5; -#*::foo = \@bar; -#*::foo = { -# 'd' => {}, -# 'a' => 1, -# 'b' => '', -# 'c' => [] -#}; -#*::foo{HASH}->{'d'} = *::foo{HASH}; -#*::foo{HASH}->{'b'} = *::foo{SCALAR}; -#*::foo{HASH}->{'c'} = \@bar; -#$bar[2] = *::foo{HASH}; -#%baz = %{*::foo{HASH}}; -#$foo = $bar[1]; -EOT -} TEST q(Data::Dumper->Dump([\\@foo, \\%foo, \\*foo], ['*bar', '*baz', '*foo'])); TEST q(Data::Dumper->Dumpxs([\\@foo, \\%foo, \\*foo], ['*bar', '*baz', '*foo'])) if $XS; ############# 67 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #$bar = [ # 10, @@ -475,37 +385,12 @@ if (!$Is_ebcdic) { #$baz = *::foo{HASH}; #$foo = $bar->[1]; EOT -} -else { - $WANT = <<'EOT'; -#$bar = [ -# 10, -# \*::foo, -# {} -#]; -#*::foo = \5; -#*::foo = $bar; -#*::foo = { -# 'd' => {}, -# 'a' => 1, -# 'b' => '', -# 'c' => [] -#}; -#*::foo{HASH}->{'d'} = *::foo{HASH}; -#*::foo{HASH}->{'b'} = *::foo{SCALAR}; -#*::foo{HASH}->{'c'} = $bar; -#$bar->[2] = *::foo{HASH}; -#$baz = *::foo{HASH}; -#$foo = $bar->[1]; -EOT -} TEST q(Data::Dumper->Dump([\\@foo, \\%foo, \\*foo], ['bar', 'baz', 'foo'])); TEST q(Data::Dumper->Dumpxs([\\@foo, \\%foo, \\*foo], ['bar', 'baz', 'foo'])) if $XS; ############# 73 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #$foo = \*::foo; #@bar = ( @@ -520,23 +405,6 @@ if (!$Is_ebcdic) { #); #%baz = %{$bar[2]}; EOT -} -else { - $WANT = <<'EOT'; -#$foo = \*::foo; -#@bar = ( -# 10, -# $foo, -# { -# d => $bar[2], -# a => 1, -# b => \5, -# c => \@bar -# } -#); -#%baz = %{$bar[2]}; -EOT -} $Data::Dumper::Purity = 0; $Data::Dumper::Quotekeys = 0; @@ -545,7 +413,6 @@ EOT ############# 79 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #$foo = \*::foo; #$bar = [ @@ -560,23 +427,6 @@ if (!$Is_ebcdic) { #]; #$baz = $bar->[2]; EOT -} -else { - $WANT = <<'EOT'; -#$foo = \*::foo; -#$bar = [ -# 10, -# $foo, -# { -# d => $bar->[2], -# a => 1, -# b => \5, -# c => $bar -# } -#]; -#$baz = $bar->[2]; -EOT -} TEST q(Data::Dumper->Dump([\\*foo, \\@foo, \\%foo], ['foo', 'bar', 'baz'])); TEST q(Data::Dumper->Dumpxs([\\*foo, \\@foo, \\%foo], ['foo', 'bar', 'baz'])) if $XS; @@ -598,7 +448,6 @@ EOT ############# 85 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #%kennels = ( # First => \'Fido', @@ -611,21 +460,6 @@ if (!$Is_ebcdic) { #); #%mutts = %kennels; EOT -} -else { - $WANT = <<'EOT'; -#%kennels = ( -# Second => \'Wags', -# First => \'Fido' -#); -#@dogs = ( -# ${$kennels{First}}, -# ${$kennels{Second}}, -# \%kennels -#); -#%mutts = %kennels; -EOT -} TEST q( $d = Data::Dumper->new([\\%kennel, \\@dogs, $mutts], @@ -653,7 +487,6 @@ EOT ############# 97 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #%kennels = ( # First => \'Fido', @@ -666,21 +499,6 @@ if (!$Is_ebcdic) { #); #%mutts = %kennels; EOT -} -else { - $WANT = <<'EOT'; -#%kennels = ( -# Second => \'Wags', -# First => \'Fido' -#); -#@dogs = ( -# ${$kennels{First}}, -# ${$kennels{Second}}, -# \%kennels -#); -#%mutts = %kennels; -EOT -} TEST q($d->Reset; $d->Dump); @@ -690,7 +508,6 @@ EOT ############# 103 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #@dogs = ( # 'Fido', @@ -703,21 +520,6 @@ if (!$Is_ebcdic) { #%kennels = %{$dogs[2]}; #%mutts = %{$dogs[2]}; EOT -} -else { - $WANT = <<'EOT'; -#@dogs = ( -# 'Fido', -# 'Wags', -# { -# Second => \$dogs[1], -# First => \$dogs[0] -# } -#); -#%kennels = %{$dogs[2]}; -#%mutts = %{$dogs[2]}; -EOT -} TEST q( $d = Data::Dumper->new([\\@dogs, \\%kennel, $mutts], @@ -741,7 +543,6 @@ EOT ############# 115 ## -if (!$Is_ebcdic) { $WANT = <<'EOT'; #@dogs = ( # 'Fido', @@ -756,23 +557,6 @@ if (!$Is_ebcdic) { # Second => \'Wags' #); EOT -} -else { - $WANT = <<'EOT'; -#@dogs = ( -# 'Fido', -# 'Wags', -# { -# Second => \'Wags', -# First => \'Fido' -# } -#); -#%kennels = ( -# Second => \'Wags', -# First => \'Fido' -#); -EOT -} TEST q( $d = Data::Dumper->new( [\@dogs, \%kennel], [qw(*dogs *kennels)] ); diff --git a/t/op/re_tests b/t/op/re_tests index d72a0f73b2..f866385096 100644 --- a/t/op/re_tests +++ b/t/op/re_tests @@ -742,3 +742,5 @@ tt+$ xxxtt y - - ([[:digit:]-z]+) =0-z= y $1 0-z ([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z \GX.*X aaaXbX n - - +(\d+\.\d+) 3.1415926 y $1 3.1415926 +(\ba.{0,10}br) have a web browser y $1 a web br diff --git a/t/op/regexp.t b/t/op/regexp.t index 74ca639a8c..4ffe1362c6 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -71,8 +71,6 @@ while (<TESTS>) { $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//)); # Certain tests don't work with utf8 (the re_test should be in UTF8) $skip = 1 if ($^H &= ~0x00000008) && $pat =~ /\[:\^(alnum|print|word):\]/; - # ebcdic platforms do not do [:ascii:] - $skip = 1 if ("\t" ne "\011") && $pat =~ /\[:\^?ascii:\]/; $result =~ s/B//i unless $skip; for $study ('', 'study \$subject') { $c = $iters; diff --git a/t/pragma/overload.t b/t/pragma/overload.t index f9a9c59c87..f673dce028 100755 --- a/t/pragma/overload.t +++ b/t/pragma/overload.t @@ -759,12 +759,7 @@ else { }, 'deref'; # Hash: my @cont = sort %$deref; - if ("\t" eq "\011") { # ascii - test "@cont", '23 5 fake foo'; # 178 - } - else { # ebcdic alpha-numeric sort order - test "@cont", 'fake foo 23 5'; # 178 - } + test "@cont", '23 5 fake foo'; # 178 my @keys = sort keys %$deref; test "@keys", 'fake foo'; # 179 my @val = sort values %$deref; |