summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>1999-11-13 19:43:37 +0000
committerJarkko Hietaniemi <jhi@iki.fi>1999-11-13 19:43:37 +0000
commit374d691e102e2975cdf0ae9698b9f6e2ad73d91c (patch)
tree56c3b8007cc66f1082269085877737267776d457
parentc54e8273062a87ae6b235cfa92b11d4b2da434ab (diff)
parent81be85b825fc39c8f8e4c3fb4748df6a7fa34de4 (diff)
downloadperl-374d691e102e2975cdf0ae9698b9f6e2ad73d91c.tar.gz
Integrate with Sarathy.
p4raw-id: //depot/cfgperl@4576
-rw-r--r--ext/Errno/Errno_pm.PL2
-rw-r--r--ext/IO/lib/IO/Socket.pm3
-rw-r--r--regexec.c461
-rwxr-xr-xt/lib/dumper.t216
-rw-r--r--t/op/re_tests2
-rwxr-xr-xt/op/regexp.t2
-rwxr-xr-xt/pragma/overload.t7
7 files changed, 402 insertions, 291 deletions
diff --git a/ext/Errno/Errno_pm.PL b/ext/Errno/Errno_pm.PL
index 7f884a8eed..18260a9dca 100644
--- a/ext/Errno/Errno_pm.PL
+++ b/ext/Errno/Errno_pm.PL
@@ -182,7 +182,7 @@ use strict;
"\$Config{'archname'}-\$Config{'osvers'}" eq
"$Config{'archname'}-$Config{'osvers'}" or
- die "Errno architecture ($Config{'archname'}-$Config{'osvers'}) does not match executable architecture (\$Config{'archname'-\$Config{'osvers'})";
+ die "Errno architecture ($Config{'archname'}-$Config{'osvers'}) does not match executable architecture (\$Config{'archname'}-\$Config{'osvers'})";
\$VERSION = "$VERSION";
\@ISA = qw(Exporter);
diff --git a/ext/IO/lib/IO/Socket.pm b/ext/IO/lib/IO/Socket.pm
index 0e115a5633..01cdc40cce 100644
--- a/ext/IO/lib/IO/Socket.pm
+++ b/ext/IO/lib/IO/Socket.pm
@@ -169,8 +169,7 @@ sub accept {
}
$peer = accept($new,$sock) || undef;
};
- croak "$@"
- if ($@ and $fh);
+ croak "$@" if $@ and $sock;
return wantarray ? defined $peer ? ($new, $peer)
: ()
diff --git a/regexec.c b/regexec.c
index fa891c86f8..e3f0cb4273 100644
--- a/regexec.c
+++ b/regexec.c
@@ -254,6 +254,9 @@ S_cache_re(pTHX_ regexp *prog)
PL_reg_re = prog;
}
+static char *find_byclass(regexp * prog, regnode *c, char *s, char *strend,
+ char *startpos, I32 norun);
+
/*
* Need to implement the following flags for reg_anch:
*
@@ -275,6 +278,13 @@ S_cache_re(pTHX_ regexp *prog)
/* XXXX We assume that strpos is strbeg unless sv. */
+/* XXXX Some places assume that there is a fixed substring.
+ An update may be needed if optimizer marks as "INTUITable"
+ RExen without fixed substrings. Similarly, it is assumed that
+ lengths of all the strings are no more than minlen, thus they
+ cannot come from lookahead.
+ (Or minlen should take into account lookahead.) */
+
/* A failure to find a constant substring means that there is no need to make
an expensive call to REx engine, thus we celebrate a failure. Similarly,
finding a substring too deep into the string means that less calls to
@@ -285,10 +295,14 @@ S_cache_re(pTHX_ regexp *prog)
b) Fixed substring;
c) Whether we are anchored (beginning-of-line or \G);
d) First node (of those at offset 0) which may distingush positions;
- We use 'a', 'b', multiline-part of 'c', and try to find a position in the
+ We use a)b)d) and multiline-part of c), and try to find a position in the
string which does not contradict any of them.
*/
+/* Most of decisions we do here should have been done at compile time.
+ The nodes of the REx which we used for the search should have been
+ deleted from the finite automaton. */
+
char *
Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
char *strend, U32 flags, re_scream_pos_data *data)
@@ -301,7 +315,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
char *t;
I32 ml_anch;
char *tmp;
- register char *other_last = Nullch;
+ register char *other_last = Nullch; /* other substr checked before this */
+ char *check_at; /* check substr found at this pos */
#ifdef DEBUGGING
char *i_strpos = strpos;
#endif
@@ -432,6 +447,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
if (!s)
goto fail_finish;
+ check_at = s;
+
/* Finish the diagnostic message */
DEBUG_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
@@ -447,7 +464,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
/* Take into account the "other" substring. */
/* XXXX May be hopelessly wrong for UTF... */
if (!other_last)
- other_last = strpos - 1;
+ other_last = strpos;
if (check == prog->float_substr) {
do_other_anchored:
{
@@ -465,8 +482,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
else
t = strpos;
t += prog->anchored_offset;
- if (t <= other_last)
- t = other_last + 1;
+ if (t < other_last) /* These positions already checked */
+ t = other_last;
PL_bostr = tmp;
last2 = last1 = strend - prog->minlen;
if (last < last1)
@@ -495,7 +512,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
", trying floating at offset %ld...\n",
(long)(s1 + 1 - i_strpos)));
PL_regeol = strend; /* Used in HOP() */
- other_last = last1 + prog->anchored_offset;
+ other_last = last1 + prog->anchored_offset + 1;
s = HOPc(last, 1);
goto restart;
}
@@ -503,7 +520,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
(long)(s - i_strpos)));
t = s - prog->anchored_offset;
- other_last = s - 1;
+ other_last = s + 1;
s = s1;
if (t == strpos)
goto try_at_start;
@@ -520,8 +537,8 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
if (last - t > prog->float_max_offset)
last = t + prog->float_max_offset;
s = t + prog->float_min_offset;
- if (s <= other_last)
- s = other_last + 1;
+ if (s < other_last)
+ s = other_last;
/* XXXX It is not documented what units *_offsets are in. Assume bytes. */
/* fbm_instr() takes into account exact value of end-of-str
if the check is SvTAIL(ed). Since false positives are OK,
@@ -546,7 +563,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
DEBUG_r(PerlIO_printf(Perl_debug_log,
", trying anchored starting at offset %ld...\n",
(long)(s1 + 1 - i_strpos)));
- other_last = last;
+ other_last = last + 1;
PL_regeol = strend; /* Used in HOP() */
s = HOPc(t, 1);
goto restart;
@@ -554,7 +571,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
else {
DEBUG_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
(long)(s - i_strpos)));
- other_last = s - 1;
+ other_last = s + 1;
s = s1;
if (t == strpos)
goto try_at_start;
@@ -652,6 +669,72 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
s = strpos;
}
+ /* Last resort... */
+ /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
+ if (prog->regstclass) {
+ /* minlen == 0 is possible if regstclass is \b or \B,
+ and the fixed substr is ''$.
+ Since minlen is already taken into account, s+1 is before strend;
+ accidentally, minlen >= 1 guaranties no false positives at s + 1
+ even for \b or \B. But (minlen? 1 : 0) below assumes that
+ regstclass does not come from lookahead... */
+ /* If regstclass takes bytelength more than 1: If charlength==1, OK.
+ This leaves EXACTF only, which is dealt with in find_byclass(). */
+ char *endpos = (prog->anchored_substr || ml_anch)
+ ? s + (prog->minlen? 1 : 0)
+ : (prog->float_substr ? check_at - start_shift + 1
+ : strend) ;
+ char *startpos = sv ? strend - SvCUR(sv) : s;
+
+ t = s;
+ s = find_byclass(prog, prog->regstclass, s, endpos, startpos, 1);
+ if (!s) {
+#ifdef DEBUGGING
+ char *what;
+#endif
+ if (endpos == strend) {
+ DEBUG_r( PerlIO_printf(Perl_debug_log,
+ "Could not match STCLASS...\n") );
+ goto fail;
+ }
+ /* Contradict one of substrings */
+ if (prog->anchored_substr) {
+ DEBUG_r( PerlIO_printf(Perl_debug_log,
+ "This position contradicts STCLASS...\n") );
+ if (prog->anchored_substr == check) {
+ DEBUG_r( what = "anchored" );
+ hop_and_restart:
+ PL_regeol = strend; /* Used in HOP() */
+ s = HOPc(t, 1);
+ DEBUG_r( PerlIO_printf(Perl_debug_log,
+ "trying %s substr starting at offset %ld...\n",
+ what, (long)(s + start_shift - i_strpos)) );
+ goto restart;
+ }
+ /* Have both, check is floating */
+ if (t + start_shift >= check_at) /* Contradicts floating=check */
+ goto retry_floating_check;
+ /* Recheck anchored substring, but not floating... */
+ s = check_at;
+ DEBUG_r( PerlIO_printf(Perl_debug_log,
+ "trying anchored substr starting at offset %ld...\n",
+ (long)(other_last - i_strpos)) );
+ goto do_other_anchored;
+ }
+ /* Check is floating subtring. */
+ retry_floating_check:
+ t = check_at - start_shift;
+ DEBUG_r( what = "floating" );
+ goto hop_and_restart;
+ }
+ DEBUG_r( if (t != s)
+ PerlIO_printf(Perl_debug_log,
+ "By STCLASS: moving %ld --> %ld\n",
+ (long)(t - i_strpos), (long)(s - i_strpos));
+ else
+ PerlIO_printf(Perl_debug_log,
+ "Does not contradict STCLASS...\n") );
+ }
DEBUG_r(PerlIO_printf(Perl_debug_log, "%sGuessed:%s match at offset %ld\n",
PL_colors[4], PL_colors[5], (long)(s - i_strpos)) );
return s;
@@ -983,7 +1066,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
&& (ln == 1 || (OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
- && regtry(prog, s) )
+ && (norun || regtry(prog, s)) )
goto got_it;
s++;
}
@@ -993,7 +1076,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
&& (ln == 1 || (OP(c) == EXACTF
? ibcmp(s, m, ln)
: ibcmp_locale(s, m, ln)))
- && regtry(prog, s) )
+ && (norun || regtry(prog, s)) )
goto got_it;
s++;
}
@@ -1003,32 +1086,24 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case BOUND:
- if (minlen) {
- dontbother++;
- strend -= 1;
- }
- tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
+ tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
tmp = !tmp;
- if (regtry(prog, s))
+ if ((norun || regtry(prog, s)))
goto got_it;
}
s++;
}
- if ((minlen || tmp) && regtry(prog,s))
+ if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
goto got_it;
break;
case BOUNDLUTF8:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case BOUNDUTF8:
- if (minlen) {
- dontbother++;
- strend = reghop_c(strend, -1);
- }
- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
+ tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
tmp = ((OP(c) == BOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == BOUND ?
@@ -1036,60 +1111,54 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
isALNUM_LC_utf8((U8*)s)))
{
tmp = !tmp;
- if (regtry(prog, s))
+ if ((norun || regtry(prog, s)))
goto got_it;
}
s += UTF8SKIP(s);
}
- if ((minlen || tmp) && regtry(prog,s))
+ if ((!prog->minlen && tmp) && (norun || regtry(prog, s)))
goto got_it;
break;
case NBOUNDL:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case NBOUND:
- if (minlen) {
- dontbother++;
- strend -= 1;
- }
- tmp = (s != startpos) ? UCHARAT(s - 1) : PL_regprev;
+ tmp = (s != startpos) ? UCHARAT(s - 1) : '\n';
tmp = ((OP(c) == NBOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
tmp = !tmp;
- else if (regtry(prog, s))
+ else if ((norun || regtry(prog, s)))
goto got_it;
s++;
}
- if ((minlen || !tmp) && regtry(prog,s))
+ if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
goto got_it;
break;
case NBOUNDLUTF8:
PL_reg_flags |= RF_tainted;
/* FALL THROUGH */
case NBOUNDUTF8:
- if (minlen) {
- dontbother++;
+ if (prog->minlen)
strend = reghop_c(strend, -1);
- }
- tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : PL_regprev;
+ tmp = (I32)(s != startpos) ? utf8_to_uv(reghop((U8*)s, -1), 0) : '\n';
tmp = ((OP(c) == NBOUND ? isALNUM_uni(tmp) : isALNUM_LC_uni(tmp)) != 0);
while (s < strend) {
if (tmp == !(OP(c) == NBOUND ?
swash_fetch(PL_utf8_alnum, (U8*)s) :
isALNUM_LC_utf8((U8*)s)))
tmp = !tmp;
- else if (regtry(prog, s))
+ else if ((norun || regtry(prog, s)))
goto got_it;
s += UTF8SKIP(s);
}
- if ((minlen || !tmp) && regtry(prog,s))
+ if ((!prog->minlen && !tmp) && (norun || regtry(prog, s)))
goto got_it;
break;
case ALNUM:
while (s < strend) {
if (isALNUM(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1102,7 +1171,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case ALNUMUTF8:
while (s < strend) {
if (swash_fetch(PL_utf8_alnum, (U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1116,7 +1185,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isALNUM_LC(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1130,7 +1199,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isALNUM_LC_utf8((U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1143,7 +1212,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NALNUM:
while (s < strend) {
if (!isALNUM(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1156,7 +1225,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NALNUMUTF8:
while (s < strend) {
if (!swash_fetch(PL_utf8_alnum, (U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1170,7 +1239,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isALNUM_LC(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1184,7 +1253,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isALNUM_LC_utf8((U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1197,7 +1266,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case SPACE:
while (s < strend) {
if (isSPACE(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1210,7 +1279,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case SPACEUTF8:
while (s < strend) {
if (*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1224,7 +1293,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isSPACE_LC(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1238,7 +1307,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (*s == ' ' || isSPACE_LC_utf8((U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1251,7 +1320,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NSPACE:
while (s < strend) {
if (!isSPACE(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1264,7 +1333,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NSPACEUTF8:
while (s < strend) {
if (!(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s))) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1278,7 +1347,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isSPACE_LC(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1292,7 +1361,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!(*s == ' ' || isSPACE_LC_utf8((U8*)s))) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1305,7 +1374,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case DIGIT:
while (s < strend) {
if (isDIGIT(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1318,7 +1387,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case DIGITUTF8:
while (s < strend) {
if (swash_fetch(PL_utf8_digit,(U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1332,7 +1401,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isDIGIT_LC(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1346,7 +1415,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1359,7 +1428,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NDIGIT:
while (s < strend) {
if (!isDIGIT(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1372,7 +1441,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
case NDIGITUTF8:
while (s < strend) {
if (!swash_fetch(PL_utf8_digit,(U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1386,7 +1455,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isDIGIT_LC(*s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1400,7 +1469,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
PL_reg_flags |= RF_tainted;
while (s < strend) {
if (!isDIGIT_LC_utf8((U8*)s)) {
- if (tmp && regtry(prog, s))
+ if (tmp && (norun || regtry(prog, s)))
goto got_it;
else
tmp = doevery;
@@ -1414,6 +1483,270 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
break;
}
+ return 0;
+ got_it:
+ return s;
+}
+
+/*
+ - regexec_flags - match a regexp against a string
+ */
+I32
+Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *strend,
+ char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
+/* strend: pointer to null at end of string */
+/* strbeg: real beginning of string */
+/* minend: end of match must be >=minend after stringarg. */
+/* data: May be used for some additional optimizations. */
+/* nosave: For optimizations. */
+{
+ dTHR;
+ register char *s;
+ register regnode *c;
+ register char *startpos = stringarg;
+ register I32 tmp;
+ I32 minlen; /* must match at least this many chars */
+ I32 dontbother = 0; /* how many characters not to try at end */
+ I32 start_shift = 0; /* Offset of the start to find
+ constant substr. */ /* CC */
+ I32 end_shift = 0; /* Same for the end. */ /* CC */
+ I32 scream_pos = -1; /* Internal iterator of scream. */
+ char *scream_olds;
+ SV* oreplsv = GvSV(PL_replgv);
+
+ PL_regcc = 0;
+
+ cache_re(prog);
+#ifdef DEBUGGING
+ PL_regnarrate = PL_debug & 512;
+#endif
+
+ /* Be paranoid... */
+ if (prog == NULL || startpos == NULL) {
+ Perl_croak(aTHX_ "NULL regexp parameter");
+ return 0;
+ }
+
+ minlen = prog->minlen;
+ if (strend - startpos < minlen) goto phooey;
+
+ if (startpos == strbeg) /* is ^ valid at stringarg? */
+ PL_regprev = '\n';
+ else {
+ PL_regprev = (U32)stringarg[-1];
+ if (!PL_multiline && PL_regprev == '\n')
+ PL_regprev = '\0'; /* force ^ to NOT match */
+ }
+
+ /* Check validity of program. */
+ if (UCHARAT(prog->program) != REG_MAGIC) {
+ Perl_croak(aTHX_ "corrupted regexp program");
+ }
+
+ PL_reg_flags = 0;
+ PL_reg_eval_set = 0;
+ PL_reg_maxiter = 0;
+
+ if (prog->reganch & ROPT_UTF8)
+ PL_reg_flags |= RF_utf8;
+
+ /* Mark beginning of line for ^ and lookbehind. */
+ PL_regbol = startpos;
+ PL_bostr = strbeg;
+ PL_reg_sv = sv;
+
+ /* Mark end of line for $ (and such) */
+ PL_regeol = strend;
+
+ /* see how far we have to get to not match where we matched before */
+ PL_regtill = startpos+minend;
+
+ /* We start without call_cc context. */
+ PL_reg_call_cc = 0;
+
+ /* If there is a "must appear" string, look for it. */
+ s = startpos;
+
+ if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to have PL_reg_ganch */
+ MAGIC *mg;
+
+ if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
+ PL_reg_ganch = startpos;
+ else if (sv && SvTYPE(sv) >= SVt_PVMG
+ && SvMAGIC(sv)
+ && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0) {
+ PL_reg_ganch = strbeg + mg->mg_len; /* Defined pos() */
+ if (prog->reganch & ROPT_ANCH_GPOS) {
+ if (s > PL_reg_ganch)
+ goto phooey;
+ s = PL_reg_ganch;
+ }
+ }
+ else /* pos() not defined */
+ PL_reg_ganch = strbeg;
+ }
+
+ if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
+ re_scream_pos_data d;
+
+ d.scream_olds = &scream_olds;
+ d.scream_pos = &scream_pos;
+ s = re_intuit_start(prog, sv, s, strend, flags, &d);
+ if (!s)
+ goto phooey; /* not present */
+ }
+
+ DEBUG_r( if (!PL_colorset) reginitcolors() );
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "%sMatching REx%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
+ PL_colors[4],PL_colors[5],PL_colors[0],
+ prog->precomp,
+ PL_colors[1],
+ (strlen(prog->precomp) > 60 ? "..." : ""),
+ PL_colors[0],
+ (strend - startpos > 60 ? 60 : strend - startpos),
+ startpos, PL_colors[1],
+ (strend - startpos > 60 ? "..." : ""))
+ );
+
+ /* Simplest case: anchored match need be tried only once. */
+ /* [unless only anchor is BOL and multiline is set] */
+ if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
+ if (s == startpos && regtry(prog, startpos))
+ goto got_it;
+ else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
+ || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
+ {
+ char *end;
+
+ if (minlen)
+ dontbother = minlen - 1;
+ end = HOPc(strend, -dontbother) - 1;
+ /* for multiline we only have to try after newlines */
+ if (prog->check_substr) {
+ if (s == startpos)
+ goto after_try;
+ while (1) {
+ if (regtry(prog, s))
+ goto got_it;
+ after_try:
+ if (s >= end)
+ goto phooey;
+ s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
+ if (!s)
+ goto phooey;
+ }
+ } else {
+ if (s > startpos)
+ s--;
+ while (s < end) {
+ if (*s++ == '\n') { /* don't need PL_utf8skip here */
+ if (regtry(prog, s))
+ goto got_it;
+ }
+ }
+ }
+ }
+ goto phooey;
+ } else if (prog->reganch & ROPT_ANCH_GPOS) {
+ if (regtry(prog, PL_reg_ganch))
+ goto got_it;
+ goto phooey;
+ }
+
+ /* Messy cases: unanchored match. */
+ if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
+ /* we have /x+whatever/ */
+ /* it must be a one character string (XXXX Except UTF?) */
+ char ch = SvPVX(prog->anchored_substr)[0];
+ if (UTF) {
+ while (s < strend) {
+ if (*s == ch) {
+ if (regtry(prog, s)) goto got_it;
+ s += UTF8SKIP(s);
+ while (s < strend && *s == ch)
+ s += UTF8SKIP(s);
+ }
+ s += UTF8SKIP(s);
+ }
+ }
+ else {
+ while (s < strend) {
+ if (*s == ch) {
+ if (regtry(prog, s)) goto got_it;
+ s++;
+ while (s < strend && *s == ch)
+ s++;
+ }
+ s++;
+ }
+ }
+ }
+ /*SUPPRESS 560*/
+ else if (prog->anchored_substr != Nullsv
+ || (prog->float_substr != Nullsv
+ && prog->float_max_offset < strend - s)) {
+ SV *must = prog->anchored_substr
+ ? prog->anchored_substr : prog->float_substr;
+ I32 back_max =
+ prog->anchored_substr ? prog->anchored_offset : prog->float_max_offset;
+ I32 back_min =
+ prog->anchored_substr ? prog->anchored_offset : prog->float_min_offset;
+ I32 delta = back_max - back_min;
+ char *last = HOPc(strend, /* Cannot start after this */
+ -(I32)(CHR_SVLEN(must)
+ - (SvTAIL(must) != 0) + back_min));
+ char *last1; /* Last position checked before */
+
+ if (s > PL_bostr)
+ last1 = HOPc(s, -1);
+ else
+ last1 = s - 1; /* bogus */
+
+ /* XXXX check_substr already used to find `s', can optimize if
+ check_substr==must. */
+ scream_pos = -1;
+ dontbother = end_shift;
+ strend = HOPc(strend, -dontbother);
+ while ( (s <= last) &&
+ ((flags & REXEC_SCREAM)
+ ? (s = screaminstr(sv, must, HOPc(s, back_min) - strbeg,
+ end_shift, &scream_pos, 0))
+ : (s = fbm_instr((unsigned char*)HOP(s, back_min),
+ (unsigned char*)strend, must,
+ PL_multiline ? FBMrf_MULTILINE : 0))) ) {
+ if (HOPc(s, -back_max) > last1) {
+ last1 = HOPc(s, -back_min);
+ s = HOPc(s, -back_max);
+ }
+ else {
+ char *t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
+
+ last1 = HOPc(s, -back_min);
+ s = t;
+ }
+ if (UTF) {
+ while (s <= last1) {
+ if (regtry(prog, s))
+ goto got_it;
+ s += UTF8SKIP(s);
+ }
+ }
+ else {
+ while (s <= last1) {
+ if (regtry(prog, s))
+ goto got_it;
+ s++;
+ }
+ }
+ }
+ goto phooey;
+ }
+ else if (c = prog->regstclass) {
+ if (minlen) /* don't bother with what can't match */
+ strend = HOPc(strend, -(minlen - 1));
+ if (find_byclass(prog, c, s, strend, startpos, 0))
+ goto got_it;
}
else {
dontbother = 0;
diff --git a/t/lib/dumper.t b/t/lib/dumper.t
index 505051f216..9130d1c690 100755
--- a/t/lib/dumper.t
+++ b/t/lib/dumper.t
@@ -9,8 +9,6 @@ BEGIN {
}
use Data::Dumper;
-use Config;
-my $Is_ebcdic = defined($Config{'ebcdic'}) && $Config{'ebcdic'} eq 'define';
$Data::Dumper::Pad = "#";
my $TMAX;
@@ -240,20 +238,11 @@ EOT
############# 43
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#$VAR1 = {
# "abc\0'\efg" => "mno\0"
#};
EOT
-}
-else {
-$WANT = <<"EOT";
-#\$VAR1 = {
-# "\\201\\202\\203\\340\\360'\e\\206\\207" => "\\224\\225\\226\\340\\360"
-#};
-EOT
-}
$foo = { "abc\000\'\efg" => "mno\000" };
{
@@ -288,7 +277,6 @@ EOT
############# 49
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#$foo = \*::foo;
#*::foo = \5;
@@ -313,33 +301,6 @@ if (!$Is_ebcdic) {
#@bar = @{*::foo{ARRAY}};
#%baz = %{*::foo{ARRAY}->[2]};
EOT
-}
-else {
- $WANT = <<'EOT';
-#$foo = \*::foo;
-#*::foo = \5;
-#*::foo = [
-# #0
-# 10,
-# #1
-# '',
-# #2
-# {
-# 'd' => {},
-# 'a' => 1,
-# 'b' => '',
-# 'c' => []
-# }
-# ];
-#*::foo{ARRAY}->[1] = $foo;
-#*::foo{ARRAY}->[2]{'d'} = *::foo{ARRAY}->[2];
-#*::foo{ARRAY}->[2]{'b'} = *::foo{SCALAR};
-#*::foo{ARRAY}->[2]{'c'} = *::foo{ARRAY};
-#*::foo = *::foo{ARRAY}->[2];
-#@bar = @{*::foo{ARRAY}};
-#%baz = %{*::foo{ARRAY}->[2]};
-EOT
-}
$Data::Dumper::Purity = 1;
$Data::Dumper::Indent = 3;
@@ -348,7 +309,6 @@ EOT
############# 55
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#$foo = \*::foo;
#*::foo = \5;
@@ -370,30 +330,6 @@ if (!$Is_ebcdic) {
#$bar = *::foo{ARRAY};
#$baz = *::foo{ARRAY}->[2];
EOT
-}
-else {
- $WANT = <<'EOT';
-#$foo = \*::foo;
-#*::foo = \5;
-#*::foo = [
-# 10,
-# '',
-# {
-# 'd' => {},
-# 'a' => 1,
-# 'b' => '',
-# 'c' => []
-# }
-#];
-#*::foo{ARRAY}->[1] = $foo;
-#*::foo{ARRAY}->[2]{'d'} = *::foo{ARRAY}->[2];
-#*::foo{ARRAY}->[2]{'b'} = *::foo{SCALAR};
-#*::foo{ARRAY}->[2]{'c'} = *::foo{ARRAY};
-#*::foo = *::foo{ARRAY}->[2];
-#$bar = *::foo{ARRAY};
-#$baz = *::foo{ARRAY}->[2];
-EOT
-}
$Data::Dumper::Indent = 1;
TEST q(Data::Dumper->Dump([\\*foo, \\@foo, \\%foo], ['foo', 'bar', 'baz']));
@@ -401,7 +337,6 @@ EOT
############# 61
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#@bar = (
# 10,
@@ -423,37 +358,12 @@ if (!$Is_ebcdic) {
#%baz = %{*::foo{HASH}};
#$foo = $bar[1];
EOT
-}
-else {
- $WANT = <<'EOT';
-#@bar = (
-# 10,
-# \*::foo,
-# {}
-#);
-#*::foo = \5;
-#*::foo = \@bar;
-#*::foo = {
-# 'd' => {},
-# 'a' => 1,
-# 'b' => '',
-# 'c' => []
-#};
-#*::foo{HASH}->{'d'} = *::foo{HASH};
-#*::foo{HASH}->{'b'} = *::foo{SCALAR};
-#*::foo{HASH}->{'c'} = \@bar;
-#$bar[2] = *::foo{HASH};
-#%baz = %{*::foo{HASH}};
-#$foo = $bar[1];
-EOT
-}
TEST q(Data::Dumper->Dump([\\@foo, \\%foo, \\*foo], ['*bar', '*baz', '*foo']));
TEST q(Data::Dumper->Dumpxs([\\@foo, \\%foo, \\*foo], ['*bar', '*baz', '*foo'])) if $XS;
############# 67
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#$bar = [
# 10,
@@ -475,37 +385,12 @@ if (!$Is_ebcdic) {
#$baz = *::foo{HASH};
#$foo = $bar->[1];
EOT
-}
-else {
- $WANT = <<'EOT';
-#$bar = [
-# 10,
-# \*::foo,
-# {}
-#];
-#*::foo = \5;
-#*::foo = $bar;
-#*::foo = {
-# 'd' => {},
-# 'a' => 1,
-# 'b' => '',
-# 'c' => []
-#};
-#*::foo{HASH}->{'d'} = *::foo{HASH};
-#*::foo{HASH}->{'b'} = *::foo{SCALAR};
-#*::foo{HASH}->{'c'} = $bar;
-#$bar->[2] = *::foo{HASH};
-#$baz = *::foo{HASH};
-#$foo = $bar->[1];
-EOT
-}
TEST q(Data::Dumper->Dump([\\@foo, \\%foo, \\*foo], ['bar', 'baz', 'foo']));
TEST q(Data::Dumper->Dumpxs([\\@foo, \\%foo, \\*foo], ['bar', 'baz', 'foo'])) if $XS;
############# 73
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#$foo = \*::foo;
#@bar = (
@@ -520,23 +405,6 @@ if (!$Is_ebcdic) {
#);
#%baz = %{$bar[2]};
EOT
-}
-else {
- $WANT = <<'EOT';
-#$foo = \*::foo;
-#@bar = (
-# 10,
-# $foo,
-# {
-# d => $bar[2],
-# a => 1,
-# b => \5,
-# c => \@bar
-# }
-#);
-#%baz = %{$bar[2]};
-EOT
-}
$Data::Dumper::Purity = 0;
$Data::Dumper::Quotekeys = 0;
@@ -545,7 +413,6 @@ EOT
############# 79
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#$foo = \*::foo;
#$bar = [
@@ -560,23 +427,6 @@ if (!$Is_ebcdic) {
#];
#$baz = $bar->[2];
EOT
-}
-else {
- $WANT = <<'EOT';
-#$foo = \*::foo;
-#$bar = [
-# 10,
-# $foo,
-# {
-# d => $bar->[2],
-# a => 1,
-# b => \5,
-# c => $bar
-# }
-#];
-#$baz = $bar->[2];
-EOT
-}
TEST q(Data::Dumper->Dump([\\*foo, \\@foo, \\%foo], ['foo', 'bar', 'baz']));
TEST q(Data::Dumper->Dumpxs([\\*foo, \\@foo, \\%foo], ['foo', 'bar', 'baz'])) if $XS;
@@ -598,7 +448,6 @@ EOT
############# 85
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#%kennels = (
# First => \'Fido',
@@ -611,21 +460,6 @@ if (!$Is_ebcdic) {
#);
#%mutts = %kennels;
EOT
-}
-else {
- $WANT = <<'EOT';
-#%kennels = (
-# Second => \'Wags',
-# First => \'Fido'
-#);
-#@dogs = (
-# ${$kennels{First}},
-# ${$kennels{Second}},
-# \%kennels
-#);
-#%mutts = %kennels;
-EOT
-}
TEST q(
$d = Data::Dumper->new([\\%kennel, \\@dogs, $mutts],
@@ -653,7 +487,6 @@ EOT
############# 97
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#%kennels = (
# First => \'Fido',
@@ -666,21 +499,6 @@ if (!$Is_ebcdic) {
#);
#%mutts = %kennels;
EOT
-}
-else {
- $WANT = <<'EOT';
-#%kennels = (
-# Second => \'Wags',
-# First => \'Fido'
-#);
-#@dogs = (
-# ${$kennels{First}},
-# ${$kennels{Second}},
-# \%kennels
-#);
-#%mutts = %kennels;
-EOT
-}
TEST q($d->Reset; $d->Dump);
@@ -690,7 +508,6 @@ EOT
############# 103
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#@dogs = (
# 'Fido',
@@ -703,21 +520,6 @@ if (!$Is_ebcdic) {
#%kennels = %{$dogs[2]};
#%mutts = %{$dogs[2]};
EOT
-}
-else {
- $WANT = <<'EOT';
-#@dogs = (
-# 'Fido',
-# 'Wags',
-# {
-# Second => \$dogs[1],
-# First => \$dogs[0]
-# }
-#);
-#%kennels = %{$dogs[2]};
-#%mutts = %{$dogs[2]};
-EOT
-}
TEST q(
$d = Data::Dumper->new([\\@dogs, \\%kennel, $mutts],
@@ -741,7 +543,6 @@ EOT
############# 115
##
-if (!$Is_ebcdic) {
$WANT = <<'EOT';
#@dogs = (
# 'Fido',
@@ -756,23 +557,6 @@ if (!$Is_ebcdic) {
# Second => \'Wags'
#);
EOT
-}
-else {
- $WANT = <<'EOT';
-#@dogs = (
-# 'Fido',
-# 'Wags',
-# {
-# Second => \'Wags',
-# First => \'Fido'
-# }
-#);
-#%kennels = (
-# Second => \'Wags',
-# First => \'Fido'
-#);
-EOT
-}
TEST q(
$d = Data::Dumper->new( [\@dogs, \%kennel], [qw(*dogs *kennels)] );
diff --git a/t/op/re_tests b/t/op/re_tests
index d72a0f73b2..f866385096 100644
--- a/t/op/re_tests
+++ b/t/op/re_tests
@@ -742,3 +742,5 @@ tt+$ xxxtt y - -
([[:digit:]-z]+) =0-z= y $1 0-z
([[:digit:]-[:alpha:]]+) =0-z= y $1 0-z
\GX.*X aaaXbX n - -
+(\d+\.\d+) 3.1415926 y $1 3.1415926
+(\ba.{0,10}br) have a web browser y $1 a web br
diff --git a/t/op/regexp.t b/t/op/regexp.t
index 74ca639a8c..4ffe1362c6 100755
--- a/t/op/regexp.t
+++ b/t/op/regexp.t
@@ -71,8 +71,6 @@ while (<TESTS>) {
$skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
# Certain tests don't work with utf8 (the re_test should be in UTF8)
$skip = 1 if ($^H &= ~0x00000008) && $pat =~ /\[:\^(alnum|print|word):\]/;
- # ebcdic platforms do not do [:ascii:]
- $skip = 1 if ("\t" ne "\011") && $pat =~ /\[:\^?ascii:\]/;
$result =~ s/B//i unless $skip;
for $study ('', 'study \$subject') {
$c = $iters;
diff --git a/t/pragma/overload.t b/t/pragma/overload.t
index f9a9c59c87..f673dce028 100755
--- a/t/pragma/overload.t
+++ b/t/pragma/overload.t
@@ -759,12 +759,7 @@ else {
}, 'deref';
# Hash:
my @cont = sort %$deref;
- if ("\t" eq "\011") { # ascii
- test "@cont", '23 5 fake foo'; # 178
- }
- else { # ebcdic alpha-numeric sort order
- test "@cont", 'fake foo 23 5'; # 178
- }
+ test "@cont", '23 5 fake foo'; # 178
my @keys = sort keys %$deref;
test "@keys", 'fake foo'; # 179
my @val = sort values %$deref;