summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>1999-06-09 14:14:27 -0400
committerGurusamy Sarathy <gsar@cpan.org>1999-07-06 09:05:02 +0000
commitf722798beaa437498e1f879b01996be44f2fdc1a (patch)
tree89f77d510121aa9a3b67c8d66e7f9c37783b143e
parent572bbb43c768e4239ddeb3245621277ab450fe70 (diff)
downloadperl-f722798beaa437498e1f879b01996be44f2fdc1a.tar.gz
applied slightly tweaked version of suggested patch for
improved RE API Message-Id: <199906092214.SAA14126@monk.mps.ohio-state.edu> Subject: [PATCH 5.005_57] REx engine rehash p4raw-id: //depot/perl@3606
-rw-r--r--Changes93
-rw-r--r--dump.c1
-rw-r--r--embed.h8
-rwxr-xr-xembed.pl4
-rw-r--r--embedvar.h12
-rw-r--r--ext/re/Makefile.PL2
-rw-r--r--ext/re/re.xs16
-rw-r--r--global.sym2
-rw-r--r--objXSUB.h14
-rw-r--r--perl.c3
-rw-r--r--perl.h9
-rw-r--r--pp.c26
-rw-r--r--pp_hot.c173
-rw-r--r--proto.h2
-rw-r--r--regcomp.c60
-rw-r--r--regcomp.h31
-rw-r--r--regexec.c310
-rw-r--r--regexp.h93
-rw-r--r--thrdvar.h11
-rw-r--r--util.c3
20 files changed, 590 insertions, 283 deletions
diff --git a/Changes b/Changes
index c1b80ca2ea..87d97f40cd 100644
--- a/Changes
+++ b/Changes
@@ -79,6 +79,99 @@ Version 5.005_58 Development release working toward 5.006
----------------
____________________________________________________________________________
+[ 3604] By: gsar on 1999/07/06 07:08:30
+ Log: From: paul.marquess@bt.com
+ Date: Tue, 8 Jun 1999 22:37:58 +0100
+ Message-ID: <5104D4DBC598D211B5FE0000F8FE7EB29C6C3C@mbtlipnt02.btlabs.bt.co.uk>
+ Subject: [PATCH 5.005_57] DB_File 1.67
+ Branch: perl
+ ! ext/DB_File/Changes ext/DB_File/DB_File.pm
+ ! ext/DB_File/DB_File.xs ext/DB_File/typemap
+____________________________________________________________________________
+[ 3603] By: gsar on 1999/07/06 07:04:50
+ Log: From: paul.marquess@bt.com
+ Date: Tue, 8 Jun 1999 22:34:01 +0100
+ Message-ID: <5104D4DBC598D211B5FE0000F8FE7EB29C6C3B@mbtlipnt02.btlabs.bt.co.uk>
+ Subject: [PATCH 5.005_57] DBM Filters
+ Branch: perl
+ ! ext/GDBM_File/GDBM_File.pm ext/GDBM_File/GDBM_File.xs
+ ! ext/NDBM_File/NDBM_File.pm ext/NDBM_File/NDBM_File.xs
+ ! ext/ODBM_File/ODBM_File.pm ext/ODBM_File/ODBM_File.xs
+ ! ext/SDBM_File/SDBM_File.pm ext/SDBM_File/SDBM_File.xs
+____________________________________________________________________________
+[ 3602] By: gsar on 1999/07/06 07:00:01
+ Log: slightly tweaked version of suggested patch
+ From: Dan Sugalski <sugalskd@ous.edu>
+ Date: Tue, 08 Jun 1999 14:09:38 -0700
+ Message-Id: <3.0.6.32.19990608140938.030f12e0@ous.edu>
+ Subject: [PATCH 5.005_57]Use NV instead of double in the core
+ Branch: perl
+ ! av.h bytecode.pl cv.h doio.c dump.c embed.pl
+ ! ext/ByteLoader/bytecode.h ext/ByteLoader/byterun.c hv.h
+ ! intrpvar.h mg.c op.c perl.h pp.c pp.h pp_ctl.c pp_sys.c
+ ! proto.h sv.c sv.h toke.c universal.c util.c
+____________________________________________________________________________
+[ 3601] By: gsar on 1999/07/06 06:52:57
+ Log: integrate cfgperl contents into mainline
+ Branch: perl
+ +> README.epoc epoc/config.h epoc/epoc.c epoc/epocish.h
+ +> epoc/perl.mmp epoc/perl.pkg
+ !> (integrate 30 files)
+____________________________________________________________________________
+[ 3598] By: jhi on 1999/07/05 20:02:55
+ Log: Integrate with mainperl.
+ Branch: cfgperl
+ +> lib/CGI/Pretty.pm
+ !> Changes ext/B/B/Bblock.pm ext/B/B/C.pm ext/B/B/CC.pm
+ !> ext/B/B/Stackobj.pm ext/GDBM_File/GDBM_File.xs mg.c op.c
+ !> opcode.h opcode.pl pp_sys.c t/lib/io_udp.t thread.h toke.c
+ !> vms/descrip_mms.template vms/subconfigure.com vms/vms.c
+ !> vms/vmsish.h
+____________________________________________________________________________
+[ 3597] By: jhi on 1999/07/05 19:59:48
+ Log: Hack SOCKS support some more plus a patch from Andy Dougherty
+ that addresses the notorious "Additional libraries" question.
+ Branch: cfgperl
+ ! Configure Porting/Glossary Porting/config.sh Porting/config_H
+ ! config_h.SH doio.c ext/Socket/Socket.xs hints/aix.sh perl.c
+ ! pp_sys.c
+____________________________________________________________________________
+[ 3596] By: gsar on 1999/07/05 18:30:51
+ Log: From: Ilya Zakharevich <ilya@math.ohio-state.edu>
+ Date: Tue, 8 Jun 1999 04:47:58 -0400 (EDT)
+ Message-Id: <199906080847.EAA03810@monk.mps.ohio-state.edu>
+ Subject: [PATCH 5.00557] Long-standing UDP sockets bug on OS/2
+ Branch: perl
+ ! pp_sys.c t/lib/io_udp.t
+____________________________________________________________________________
+[ 3595] By: gsar on 1999/07/05 18:29:08
+ Log: From: Ilya Zakharevich <ilya@math.ohio-state.edu>
+ Date: Tue, 8 Jun 1999 04:44:58 -0400 (EDT)
+ Message-Id: <199906080844.EAA03784@monk.mps.ohio-state.edu>
+ Subject: [PATCH 5.00557] Setting $^E wipes out $!
+ Branch: perl
+ ! mg.c
+____________________________________________________________________________
+[ 3594] By: gsar on 1999/07/05 18:24:53
+ Log: hand-apply whitespace mutiliated patch
+ From: Dan Sugalski <sugalskd@osshe.edu>
+ Date: Mon, 07 Jun 1999 14:46:42 -0700
+ Message-Id: <3.0.6.32.19990607144642.03079100@ous.edu>
+ Subject: [PATCH 5.005_57]Updated VMS patch
+ Branch: perl
+ ! thread.h vms/descrip_mms.template vms/subconfigure.com
+ ! vms/vms.c vms/vmsish.h
+____________________________________________________________________________
+[ 3593] By: gsar on 1999/07/05 17:53:04
+ Log: applied parts not duplicated by previous patches
+ From: "Vishal Bhatia" <vishalb@my-deja.com>
+ Date: Sat, 05 Jun 1999 08:42:17 -0700
+ Message-ID: <JAMCAJKJEJDPAAAA@my-deja.com>
+ Subject: Fwd: [PATCH 5.005_57] consolidated compiler changes
+ Branch: perl
+ ! Changes ext/B/B/Bblock.pm ext/B/B/C.pm ext/B/B/CC.pm
+ ! ext/B/B/Stackobj.pm
+____________________________________________________________________________
[ 3592] By: jhi on 1999/07/05 17:17:22
Log: AIX threaded build, plus few more on the side.
Branch: cfgperl
diff --git a/dump.c b/dump.c
index 9c7d3a9764..12d318d5f1 100644
--- a/dump.c
+++ b/dump.c
@@ -15,6 +15,7 @@
#include "EXTERN.h"
#define PERL_IN_DUMP_C
#include "perl.h"
+#include "regcomp.h"
#ifndef DBL_DIG
#define DBL_DIG 15 /* A guess that works lots of places */
diff --git a/embed.h b/embed.h
index d0ede0bcb3..0871c6f34a 100644
--- a/embed.h
+++ b/embed.h
@@ -448,6 +448,8 @@
#define pregexec Perl_pregexec
#define pregfree Perl_pregfree
#define pregcomp Perl_pregcomp
+#define re_intuit_start Perl_re_intuit_start
+#define re_intuit_string Perl_re_intuit_string
#define regexec_flags Perl_regexec_flags
#define regnext Perl_regnext
#define regprop Perl_regprop
@@ -1762,6 +1764,8 @@
#define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g)
#define pregfree(a) Perl_pregfree(aTHX_ a)
#define pregcomp(a,b,c) Perl_pregcomp(aTHX_ a,b,c)
+#define re_intuit_start(a,b,c,d,e,f) Perl_re_intuit_start(aTHX_ a,b,c,d,e,f)
+#define re_intuit_string(a) Perl_re_intuit_string(aTHX_ a)
#define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h)
#define regnext(a) Perl_regnext(aTHX_ a)
#define regprop(a,b) Perl_regprop(aTHX_ a,b)
@@ -3486,6 +3490,10 @@
#define pregfree Perl_pregfree
#define Perl_pregcomp CPerlObj::Perl_pregcomp
#define pregcomp Perl_pregcomp
+#define Perl_re_intuit_start CPerlObj::Perl_re_intuit_start
+#define re_intuit_start Perl_re_intuit_start
+#define Perl_re_intuit_string CPerlObj::Perl_re_intuit_string
+#define re_intuit_string Perl_re_intuit_string
#define Perl_regexec_flags CPerlObj::Perl_regexec_flags
#define regexec_flags Perl_regexec_flags
#define Perl_regnext CPerlObj::Perl_regnext
diff --git a/embed.pl b/embed.pl
index ad91f80962..ed7f3e45b5 100755
--- a/embed.pl
+++ b/embed.pl
@@ -1210,6 +1210,10 @@ p |I32 |pregexec |regexp* prog|char* stringarg \
|SV* screamer|U32 nosave
p |void |pregfree |struct regexp* r
p |regexp*|pregcomp |char* exp|char* xend|PMOP* pm
+p |char* |re_intuit_start|regexp* prog|SV* sv|char* strpos \
+ |char* strend|U32 flags \
+ |struct re_scream_pos_data_s *data
+p |SV* |re_intuit_string|regexp* prog
p |I32 |regexec_flags |regexp* prog|char* stringarg \
|char* strend|char* strbeg|I32 minend \
|SV* screamer|void* data|U32 flags
diff --git a/embedvar.h b/embedvar.h
index dbd94e9c51..f759b632ae 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -85,8 +85,11 @@
#define PL_regeol (my_perl->Tregeol)
#define PL_regexecp (my_perl->Tregexecp)
#define PL_regflags (my_perl->Tregflags)
+#define PL_regfree (my_perl->Tregfree)
#define PL_regindent (my_perl->Tregindent)
#define PL_reginput (my_perl->Treginput)
+#define PL_regint_start (my_perl->Tregint_start)
+#define PL_regint_string (my_perl->Tregint_string)
#define PL_reginterp_cnt (my_perl->Treginterp_cnt)
#define PL_reglastparen (my_perl->Treglastparen)
#define PL_regnarrate (my_perl->Tregnarrate)
@@ -212,8 +215,11 @@
#define PL_regeol (PL_curinterp->Tregeol)
#define PL_regexecp (PL_curinterp->Tregexecp)
#define PL_regflags (PL_curinterp->Tregflags)
+#define PL_regfree (PL_curinterp->Tregfree)
#define PL_regindent (PL_curinterp->Tregindent)
#define PL_reginput (PL_curinterp->Treginput)
+#define PL_regint_start (PL_curinterp->Tregint_start)
+#define PL_regint_string (PL_curinterp->Tregint_string)
#define PL_reginterp_cnt (PL_curinterp->Treginterp_cnt)
#define PL_reglastparen (PL_curinterp->Treglastparen)
#define PL_regnarrate (PL_curinterp->Tregnarrate)
@@ -854,8 +860,11 @@
#define PL_Tregeol PL_regeol
#define PL_Tregexecp PL_regexecp
#define PL_Tregflags PL_regflags
+#define PL_Tregfree PL_regfree
#define PL_Tregindent PL_regindent
#define PL_Treginput PL_reginput
+#define PL_Tregint_start PL_regint_start
+#define PL_Tregint_string PL_regint_string
#define PL_Treginterp_cnt PL_reginterp_cnt
#define PL_Treglastparen PL_reglastparen
#define PL_Tregnarrate PL_regnarrate
@@ -992,8 +1001,11 @@
#define PL_regeol (thr->Tregeol)
#define PL_regexecp (thr->Tregexecp)
#define PL_regflags (thr->Tregflags)
+#define PL_regfree (thr->Tregfree)
#define PL_regindent (thr->Tregindent)
#define PL_reginput (thr->Treginput)
+#define PL_regint_start (thr->Tregint_start)
+#define PL_regint_string (thr->Tregint_string)
#define PL_reginterp_cnt (thr->Treginterp_cnt)
#define PL_reglastparen (thr->Treglastparen)
#define PL_regnarrate (thr->Tregnarrate)
diff --git a/ext/re/Makefile.PL b/ext/re/Makefile.PL
index 040b085f4f..bd0f1f741c 100644
--- a/ext/re/Makefile.PL
+++ b/ext/re/Makefile.PL
@@ -5,7 +5,7 @@ WriteMakefile(
MAN3PODS => {}, # Pods will be built by installman.
XSPROTOARG => '-noprototypes',
OBJECT => 're_exec$(OBJ_EXT) re_comp$(OBJ_EXT) re$(OBJ_EXT)',
- DEFINE => '-DPERL_EXT_RE_BUILD',
+ DEFINE => '-DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG',
clean => { FILES => '*$(OBJ_EXT) *.c ../../lib/re.pm' },
);
diff --git a/ext/re/re.xs b/ext/re/re.xs
index b49a110377..10e44f76de 100644
--- a/ext/re/re.xs
+++ b/ext/re/re.xs
@@ -11,6 +11,11 @@ extern regexp* my_regcomp (pTHX_ char* exp, char* xend, PMOP* pm);
extern I32 my_regexec (pTHX_ regexp* prog, char* stringarg, char* strend,
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
+extern void my_regfree (pTHX_ struct regexp* r);
+extern char* my_re_intuit_start (pTHX_ regexp *prog, SV *sv, char *strpos,
+ char *strend, U32 flags,
+ struct re_scream_pos_data_s *data);
+extern SV* my_re_intuit_string (pTHX_ regexp *prog);
static int oldfl;
@@ -20,8 +25,12 @@ static void
deinstall(pTHX)
{
dTHR;
- PL_regexecp = &Perl_regexec_flags;
- PL_regcompp = &Perl_pregcomp;
+ PL_regexecp = FUNC_NAME_TO_PTR(Perl_regexec_flags);
+ PL_regcompp = FUNC_NAME_TO_PTR(Perl_pregcomp);
+ PL_regint_start = FUNC_NAME_TO_PTR(Perl_re_intuit_start);
+ PL_regint_string = FUNC_NAME_TO_PTR(Perl_re_intuit_string);
+ PL_regfree = FUNC_NAME_TO_PTR(Perl_pregfree);
+
if (!oldfl)
PL_debug &= ~R_DB;
}
@@ -33,6 +42,9 @@ install(pTHX)
PL_colorset = 0; /* Allow reinspection of ENV. */
PL_regexecp = &my_regexec;
PL_regcompp = &my_regcomp;
+ PL_regint_start = &my_re_intuit_start;
+ PL_regint_string = &my_re_intuit_string;
+ PL_regfree = &my_regfree;
oldfl = PL_debug & R_DB;
PL_debug |= R_DB;
}
diff --git a/global.sym b/global.sym
index efbca1d6ae..87ece3c083 100644
--- a/global.sym
+++ b/global.sym
@@ -408,6 +408,8 @@ Perl_regdump
Perl_pregexec
Perl_pregfree
Perl_pregcomp
+Perl_re_intuit_start
+Perl_re_intuit_string
Perl_regexec_flags
Perl_regnext
Perl_regprop
diff --git a/objXSUB.h b/objXSUB.h
index d14de86892..d91f84d0ee 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -546,10 +546,16 @@
#define PL_regexecp pPerl->PL_regexecp
#undef PL_regflags
#define PL_regflags pPerl->PL_regflags
+#undef PL_regfree
+#define PL_regfree pPerl->PL_regfree
#undef PL_regindent
#define PL_regindent pPerl->PL_regindent
#undef PL_reginput
#define PL_reginput pPerl->PL_reginput
+#undef PL_regint_start
+#define PL_regint_start pPerl->PL_regint_start
+#undef PL_regint_string
+#define PL_regint_string pPerl->PL_regint_string
#undef PL_reginterp_cnt
#define PL_reginterp_cnt pPerl->PL_reginterp_cnt
#undef PL_reglastparen
@@ -2426,6 +2432,14 @@
#define Perl_pregcomp pPerl->Perl_pregcomp
#undef pregcomp
#define pregcomp Perl_pregcomp
+#undef Perl_re_intuit_start
+#define Perl_re_intuit_start pPerl->Perl_re_intuit_start
+#undef re_intuit_start
+#define re_intuit_start Perl_re_intuit_start
+#undef Perl_re_intuit_string
+#define Perl_re_intuit_string pPerl->Perl_re_intuit_string
+#undef re_intuit_string
+#define re_intuit_string Perl_re_intuit_string
#undef Perl_regexec_flags
#define Perl_regexec_flags pPerl->Perl_regexec_flags
#undef regexec_flags
diff --git a/perl.c b/perl.c
index 39eaf300ee..062b33457c 100644
--- a/perl.c
+++ b/perl.c
@@ -2947,6 +2947,9 @@ S_init_main_thread(pTHX)
PL_maxscream = -1;
PL_regcompp = FUNC_NAME_TO_PTR(Perl_pregcomp);
PL_regexecp = FUNC_NAME_TO_PTR(Perl_regexec_flags);
+ PL_regint_start = FUNC_NAME_TO_PTR(Perl_re_intuit_start);
+ PL_regint_string = FUNC_NAME_TO_PTR(Perl_re_intuit_string);
+ PL_regfree = FUNC_NAME_TO_PTR(Perl_pregfree);
PL_regindent = 0;
PL_reginterp_cnt = 0;
diff --git a/perl.h b/perl.h
index 5eb7b1dbe1..b09660a959 100644
--- a/perl.h
+++ b/perl.h
@@ -145,6 +145,9 @@ class CPerlObj;
#define CALLRUNOPS CALL_FPTR(PL_runops)
#define CALLREGCOMP CALL_FPTR(PL_regcompp)
#define CALLREGEXEC CALL_FPTR(PL_regexecp)
+#define CALLREG_INTUIT_START CALL_FPTR(PL_regint_start)
+#define CALLREG_INTUIT_STRING CALL_FPTR(PL_regint_string)
+#define CALLREGFREE CALL_FPTR(PL_regfree)
#define CALLPROTECT CALL_FPTR(PL_protect)
#define NOOP (void)0
@@ -2385,6 +2388,12 @@ typedef regexp*(CPERLscope(*regcomp_t)) (pTHX_ char* exp, char* xend, PMOP* pm);
typedef I32 (CPERLscope(*regexec_t)) (pTHX_ regexp* prog, char* stringarg,
char* strend, char* strbeg, I32 minend,
SV* screamer, void* data, U32 flags);
+typedef char* (CPERLscope(*re_intuit_start_t)) (pTHX_ regexp *prog, SV *sv,
+ char *strpos, char *strend,
+ U32 flags,
+ struct re_scream_pos_data_s *d);
+typedef SV* (CPERLscope(*re_intuit_string_t)) (pTHX_ regexp *prog);
+typedef void (CPERLscope(*regfree_t)) (pTHX_ struct regexp* r);
/* Set up PERLVAR macros for populating structs */
diff --git a/pp.c b/pp.c
index e688848d9c..d28a8c21d6 100644
--- a/pp.c
+++ b/pp.c
@@ -4998,17 +4998,19 @@ PP(pp_split)
s = m;
}
}
- else if (rx->check_substr && !rx->nparens
+ else if ((rx->reganch & RE_USE_INTUIT) && !rx->nparens
&& (rx->reganch & ROPT_CHECK_ALL)
&& !(rx->reganch & ROPT_ANCH)) {
- int tail = SvTAIL(rx->check_substr) != 0;
+ int tail = (rx->reganch & RE_INTUIT_TAIL);
+ SV *csv = CALLREG_INTUIT_STRING(aTHX_ rx);
+ char c;
- i = SvCUR(rx->check_substr);
+ i = rx->minlen;
if (i == 1 && !tail) {
- i = *SvPVX(rx->check_substr);
+ c = *SvPV(csv,i);
while (--limit) {
/*SUPPRESS 530*/
- for (m = s; m < strend && *m != i; m++) ;
+ for (m = s; m < strend && *m != c; m++) ;
if (m >= strend)
break;
dstr = NEWSV(30, m-s);
@@ -5022,8 +5024,8 @@ PP(pp_split)
else {
#ifndef lint
while (s < strend && --limit &&
- (m=fbm_instr((unsigned char*)s, (unsigned char*)strend,
- rx->check_substr, PL_multiline ? FBMrf_MULTILINE : 0)) )
+ (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
+ csv, PL_multiline ? FBMrf_MULTILINE : 0)) )
#endif
{
dstr = NEWSV(31, m-s);
@@ -5031,14 +5033,18 @@ PP(pp_split)
if (make_mortal)
sv_2mortal(dstr);
XPUSHs(dstr);
- s = m + i - tail; /* Fake \n at the end */
+ s = m + i; /* Fake \n at the end */
}
}
}
else {
maxiters += (strend - s) * rx->nparens;
- while (s < strend && --limit &&
- CALLREGEXEC(aTHX_ rx, s, strend, orig, 1, sv, NULL, 0))
+ while (s < strend && --limit
+/* && (!rx->check_substr
+ || ((s = CALLREG_INTUIT_START(aTHX_ rx, sv, s, strend,
+ 0, NULL))))
+*/ && CALLREGEXEC(aTHX_ rx, s, strend, orig,
+ 1 /* minend */, sv, NULL, 0))
{
TAINT_IF(RX_MATCH_TAINTED(rx));
if (RX_MATCH_COPIED(rx) && rx->subbeg != orig) {
diff --git a/pp_hot.c b/pp_hot.c
index d3a1f5c7da..697c30697a 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -846,10 +846,8 @@ PP(pp_match)
register char *s;
char *strend;
I32 global;
- I32 r_flags = 0;
- char *truebase; /* Start of string, may be
- relocated if REx engine
- copies the string. */
+ I32 r_flags = REXEC_CHECKED;
+ char *truebase; /* Start of string */
register REGEXP *rx = pm->op_pmregexp;
bool rxtainted;
I32 gimme = GIMME;
@@ -909,9 +907,7 @@ PP(pp_match)
if ((gimme != G_ARRAY && !global && rx->nparens)
|| SvTEMP(TARG) || PL_sawampersand)
r_flags |= REXEC_COPY_STR;
- if (SvSCREAM(TARG) && rx->check_substr
- && SvTYPE(rx->check_substr) == SVt_PVBM
- && SvVALID(rx->check_substr))
+ if (SvSCREAM(TARG))
r_flags |= REXEC_SCREAM;
if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) {
@@ -927,76 +923,17 @@ play_it_again:
if (update_minmatch++)
minmatch = had_zerolen;
}
- if (rx->check_substr) {
- if (!(rx->reganch & ROPT_NOSCAN)) { /* Floating checkstring. */
- SV *c = rx->check_substr;
+ if (rx->reganch & RE_USE_INTUIT) {
+ s = CALLREG_INTUIT_START(aTHX_ rx, TARG, s, strend, r_flags, NULL);
- if (r_flags & REXEC_SCREAM) {
- I32 p = -1;
- char *b;
-
- if (PL_screamfirst[BmRARE(c)] < 0
- && !( BmRARE(c) == '\n' && (BmPREVIOUS(c) == SvCUR(c) - 1)
- && SvTAIL(c) ))
- goto nope;
-
- b = (char*)HOP((U8*)s, rx->check_offset_min);
- if (!(s = screaminstr(TARG, c, b - s, 0, &p, 0)))
- goto nope;
-
- if ((rx->reganch & ROPT_CHECK_ALL)
- && !PL_sawampersand && !SvTAIL(c))
- goto yup;
- }
- else if (!(s = fbm_instr((unsigned char*)HOP((U8*)s, rx->check_offset_min),
- (unsigned char*)strend, c,
- PL_multiline ? FBMrf_MULTILINE : 0)))
- goto nope;
- else if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand)
- goto yup;
- if (s && rx->check_offset_max < s - t) {
- ++BmUSEFUL(c);
- s = (char*)HOP((U8*)s, -rx->check_offset_max);
- }
- else
- s = t;
- }
- /* Now checkstring is fixed, i.e. at fixed offset from the
- beginning of match, and the match is anchored at s. */
- else if (!PL_multiline) { /* Anchored near beginning of string. */
- I32 slen;
- char *b = (char*)HOP((U8*)s, rx->check_offset_min);
-
- if (SvTAIL(rx->check_substr)) {
- slen = SvCUR(rx->check_substr); /* >= 1 */
-
- if ( strend - b > slen || strend - b < slen - 1 )
- goto nope;
- if ( strend - b == slen && strend[-1] != '\n')
- goto nope;
- /* Now should match b[0..slen-2] */
- slen--;
- if (slen && (*SvPVX(rx->check_substr) != *b
- || (slen > 1
- && memNE(SvPVX(rx->check_substr), b, slen))))
- goto nope;
- if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand)
- goto yup;
- } else { /* Assume len > 0 */
- if (*SvPVX(rx->check_substr) != *b
- || ((slen = SvCUR(rx->check_substr)) > 1
- && memNE(SvPVX(rx->check_substr), b, slen)))
- goto nope;
- if ((rx->reganch & ROPT_CHECK_ALL) && !PL_sawampersand)
- goto yup;
- }
- }
- if (!(rx->reganch & ROPT_NAUGHTY) && --BmUSEFUL(rx->check_substr) < 0
- && rx->check_substr == rx->float_substr) {
- SvREFCNT_dec(rx->check_substr);
- rx->check_substr = Nullsv; /* opt is being useless */
- rx->float_substr = Nullsv;
- }
+ if (!s)
+ goto nope;
+ if ( (rx->reganch & ROPT_CHECK_ALL)
+ && !PL_sawampersand
+ && ((rx->reganch & ROPT_NOSCAN)
+ || !((rx->reganch & RE_INTUIT_TAIL)
+ && (r_flags & REXEC_SCREAM))))
+ goto yup;
}
if (CALLREGEXEC(aTHX_ rx, s, strend, truebase, minmatch, TARG, NULL, r_flags))
{
@@ -1066,11 +1003,10 @@ play_it_again:
RETPUSHYES;
}
-yup: /* Confirmed by check_substr */
+yup: /* Confirmed by INTUIT */
if (rxtainted)
RX_MATCH_TAINTED_on(rx);
TAINT_IF(RX_MATCH_TAINTED(rx));
- ++BmUSEFUL(rx->check_substr);
PL_curpm = pm;
if (pm->op_pmflags & PMf_ONCE)
pm->op_pmdynflags |= PMdf_USED;
@@ -1081,7 +1017,7 @@ yup: /* Confirmed by check_substr */
if (global) {
rx->subbeg = truebase;
rx->startp[0] = s - truebase;
- rx->endp[0] = s - truebase + SvCUR(rx->check_substr);
+ rx->endp[0] = s - truebase + rx->minlen;
rx->sublen = strend - truebase;
goto gotcha;
}
@@ -1092,19 +1028,16 @@ yup: /* Confirmed by check_substr */
rx->sublen = strend - t;
RX_MATCH_COPIED_on(rx);
off = rx->startp[0] = s - t;
- rx->endp[0] = off + SvCUR(rx->check_substr);
+ rx->endp[0] = off + rx->minlen;
}
else { /* startp/endp are used by @- @+. */
rx->startp[0] = s - truebase;
- rx->endp[0] = s - truebase + SvCUR(rx->check_substr);
+ rx->endp[0] = s - truebase + rx->minlen;
}
LEAVE_SCOPE(oldsave);
RETPUSHYES;
nope:
- if (rx->check_substr)
- ++BmUSEFUL(rx->check_substr);
-
ret_no:
if (global && !(pm->op_pmflags & PMf_CONTINUE)) {
if (SvTYPE(TARG) >= SVt_PVMG && SvMAGIC(TARG)) {
@@ -1717,56 +1650,26 @@ PP(pp_subst)
}
r_flags = (rx->nparens || SvTEMP(TARG) || PL_sawampersand)
? REXEC_COPY_STR : 0;
- if (SvSCREAM(TARG) && rx->check_substr
- && SvTYPE(rx->check_substr) == SVt_PVBM
- && SvVALID(rx->check_substr))
+ if (SvSCREAM(TARG))
r_flags |= REXEC_SCREAM;
if (pm->op_pmflags & (PMf_MULTILINE|PMf_SINGLELINE)) {
SAVEINT(PL_multiline);
PL_multiline = pm->op_pmflags & PMf_MULTILINE;
}
orig = m = s;
- if (rx->check_substr) {
- if (!(rx->reganch & ROPT_NOSCAN)) { /* It floats. */
- if (r_flags & REXEC_SCREAM) {
- I32 p = -1;
- char *b;
-
- if (PL_screamfirst[BmRARE(rx->check_substr)] < 0)
- goto nope;
-
- b = (char*)HOP((U8*)s, rx->check_offset_min);
- if (!(s = screaminstr(TARG, rx->check_substr, b - s, 0, &p, 0)))
- goto nope;
- }
- else if (!(s = fbm_instr((unsigned char*)HOP((U8*)s, rx->check_offset_min),
- (unsigned char*)strend,
- rx->check_substr,
- PL_multiline ? FBMrf_MULTILINE : 0)))
- goto nope;
- if (s && rx->check_offset_max < s - m) {
- ++BmUSEFUL(rx->check_substr);
- s = (char*)HOP((U8*)s, -rx->check_offset_max);
- }
- else
- s = m;
- }
- /* Now checkstring is fixed, i.e. at fixed offset from the
- beginning of match, and the match is anchored at s. */
- else if (!PL_multiline) { /* Anchored at beginning of string. */
- I32 slen;
- char *b = (char*)HOP((U8*)s, rx->check_offset_min);
- if (*SvPVX(rx->check_substr) != *b
- || ((slen = SvCUR(rx->check_substr)) > 1
- && memNE(SvPVX(rx->check_substr), b, slen)))
- goto nope;
- }
- if (!(rx->reganch & ROPT_NAUGHTY) && --BmUSEFUL(rx->check_substr) < 0
- && rx->check_substr == rx->float_substr) {
- SvREFCNT_dec(rx->check_substr);
- rx->check_substr = Nullsv; /* opt is being useless */
- rx->float_substr = Nullsv;
- }
+ if (rx->reganch & RE_USE_INTUIT) {
+ s = CALLREG_INTUIT_START(aTHX_ rx, TARG, s, strend, r_flags, NULL);
+
+ if (!s)
+ goto nope;
+ /* How to do it in subst? */
+/* if ( (rx->reganch & ROPT_CHECK_ALL)
+ && !PL_sawampersand
+ && ((rx->reganch & ROPT_NOSCAN)
+ || !((rx->reganch & RE_INTUIT_TAIL)
+ && (r_flags & REXEC_SCREAM))))
+ goto yup;
+*/
}
/* only replace once? */
@@ -1778,7 +1681,9 @@ PP(pp_subst)
/* can do inplace substitution? */
if (c && clen <= rx->minlen && (once || !(r_flags & REXEC_COPY_STR))
&& !(rx->reganch & ROPT_LOOKBEHIND_SEEN)) {
- if (!CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL, r_flags)) {
+ if (!CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL,
+ r_flags | REXEC_CHECKED))
+ {
SPAGAIN;
PUSHs(&PL_sv_no);
LEAVE_SCOPE(oldsave);
@@ -1851,7 +1756,9 @@ PP(pp_subst)
}
s = rx->endp[0] + orig;
} while (CALLREGEXEC(aTHX_ rx, s, strend, orig, s == m,
- Nullsv, NULL, REXEC_NOT_FIRST)); /* don't match same null twice */
+ TARG, NULL,
+ /* don't match same null twice */
+ REXEC_NOT_FIRST|REXEC_IGNOREPOS));
if (s != d) {
i = strend - s;
SvCUR_set(TARG, d - SvPVX(TARG) + i);
@@ -1873,7 +1780,9 @@ PP(pp_subst)
RETURN;
}
- if (CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL, r_flags)) {
+ if (CALLREGEXEC(aTHX_ rx, s, strend, orig, 0, TARG, NULL,
+ r_flags | REXEC_CHECKED))
+ {
if (force_on_match) {
force_on_match = 0;
s = SvPV_force(TARG, len);
@@ -1933,8 +1842,6 @@ PP(pp_subst)
goto ret_no;
nope:
- ++BmUSEFUL(rx->check_substr);
-
ret_no:
SPAGAIN;
PUSHs(&PL_sv_no);
diff --git a/proto.h b/proto.h
index eae128ac0e..7fa642405b 100644
--- a/proto.h
+++ b/proto.h
@@ -452,6 +452,8 @@ VIRTUAL void Perl_regdump(pTHX_ regexp* r);
VIRTUAL I32 Perl_pregexec(pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave);
VIRTUAL void Perl_pregfree(pTHX_ struct regexp* r);
VIRTUAL regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, PMOP* pm);
+VIRTUAL char* Perl_re_intuit_start(pTHX_ regexp* prog, SV* sv, char* strpos, char* strend, U32 flags, struct re_scream_pos_data_s *data);
+VIRTUAL SV* Perl_re_intuit_string(pTHX_ regexp* prog);
VIRTUAL I32 Perl_regexec_flags(pTHX_ regexp* prog, char* stringarg, char* strend, char* strbeg, I32 minend, SV* screamer, void* data, U32 flags);
VIRTUAL regnode* Perl_regnext(pTHX_ regnode* p);
VIRTUAL void Perl_regprop(pTHX_ SV* sv, regnode* o);
diff --git a/regcomp.c b/regcomp.c
index 76ae52376e..59fe5a7d9f 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -25,7 +25,7 @@
# define PERL_IN_XSUB_RE
# endif
/* need access to debugger hooks */
-# ifndef DEBUGGING
+# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
# define DEBUGGING
# endif
#endif
@@ -35,8 +35,9 @@
# define Perl_pregcomp my_regcomp
# define Perl_regdump my_regdump
# define Perl_regprop my_regprop
-/* *These* symbols are masked to allow static link. */
# define Perl_pregfree my_regfree
+# define Perl_re_intuit_string my_re_intuit_string
+/* *These* symbols are masked to allow static link. */
# define Perl_regnext my_regnext
# define Perl_save_re_context my_save_re_context
# define Perl_reginitcolors my_reginitcolors
@@ -898,7 +899,11 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
PL_regkind[(U8)OP(first)] == NBOUND)
r->regstclass = first;
else if (PL_regkind[(U8)OP(first)] == BOL) {
- r->reganch |= (OP(first) == MBOL ? ROPT_ANCH_MBOL: ROPT_ANCH_BOL);
+ r->reganch |= (OP(first) == MBOL
+ ? ROPT_ANCH_MBOL
+ : (OP(first) == SBOL
+ ? ROPT_ANCH_SBOL
+ : ROPT_ANCH_BOL));
first = NEXTOPER(first);
goto again;
}
@@ -912,12 +917,21 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
!(r->reganch & ROPT_ANCH) )
{
/* turn .* into ^.* with an implied $*=1 */
- r->reganch |= ROPT_ANCH_BOL | ROPT_IMPLICIT;
+ int type = OP(NEXTOPER(first));
+
+ if (type == REG_ANY || type == ANYUTF8)
+ type = ROPT_ANCH_MBOL;
+ else
+ type = ROPT_ANCH_SBOL;
+
+ r->reganch |= type | ROPT_IMPLICIT;
first = NEXTOPER(first);
goto again;
}
- if (sawplus && (!sawopen || !PL_regsawback))
- r->reganch |= ROPT_SKIP; /* x+ must match 1st of run */
+ if (sawplus && (!sawopen || !PL_regsawback)
+ && !(PL_regseen & REG_SEEN_EVAL)) /* May examine pos and $& */
+ /* x+ must match at the 1st pos of run of x's */
+ r->reganch |= ROPT_SKIP;
/* Scan is after the zeroth branch, first is atomic matcher. */
DEBUG_r(PerlIO_printf(Perl_debug_log, "first at %d\n",
@@ -1010,6 +1024,11 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm)
r->check_offset_min = data.offset_float_min;
r->check_offset_max = data.offset_float_max;
}
+ if (r->check_substr) {
+ r->reganch |= RE_USE_INTUIT;
+ if (SvTAIL(r->check_substr))
+ r->reganch |= RE_INTUIT_TAIL;
+ }
}
else {
/* Several toplevels. Best we can is to set minlen. */
@@ -2846,6 +2865,8 @@ Perl_regdump(pTHX_ regexp *r)
PerlIO_printf(Perl_debug_log, "(BOL)");
if (r->reganch & ROPT_ANCH_MBOL)
PerlIO_printf(Perl_debug_log, "(MBOL)");
+ if (r->reganch & ROPT_ANCH_SBOL)
+ PerlIO_printf(Perl_debug_log, "(SBOL)");
if (r->reganch & ROPT_ANCH_GPOS)
PerlIO_printf(Perl_debug_log, "(GPOS)");
PerlIO_putc(Perl_debug_log, ' ');
@@ -2896,10 +2917,37 @@ Perl_regprop(pTHX_ SV *sv, regnode *o)
#endif /* DEBUGGING */
}
+SV *
+Perl_re_intuit_string(pTHX_ regexp *prog)
+{ /* Assume that RE_INTUIT is set */
+ DEBUG_r(
+ { STRLEN n_a;
+ char *s = SvPV(prog->check_substr,n_a);
+
+ if (!PL_colorset) reginitcolors();
+ PerlIO_printf(Perl_debug_log,
+ "%sUsing REx substr:%s `%s%.60s%s%s'\n",
+ PL_colors[4],PL_colors[5],PL_colors[0],
+ s,
+ PL_colors[1],
+ (strlen(s) > 60 ? "..." : ""));
+ } );
+
+ return prog->check_substr;
+}
+
void
Perl_pregfree(pTHX_ struct regexp *r)
{
dTHR;
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "%sFreeing REx:%s `%s%.60s%s%s'\n",
+ PL_colors[4],PL_colors[5],PL_colors[0],
+ r->precomp,
+ PL_colors[1],
+ (strlen(r->precomp) > 60 ? "..." : "")));
+
+
if (!r || (--r->refcnt > 0))
return;
if (r->precomp)
diff --git a/regcomp.h b/regcomp.h
index 7c5c13a2e6..518add0309 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -237,3 +237,34 @@ EXTCONST char PL_simple[] = {
#endif
END_EXTERN_C
+
+typedef struct re_scream_pos_data_s
+{
+ char **scream_olds; /* match pos */
+ I32 *scream_pos; /* Internal iterator of scream. */
+} re_scream_pos_data;
+
+struct reg_data {
+ U32 count;
+ U8 *what;
+ void* data[1];
+};
+
+struct reg_substr_datum {
+ I32 min_offset;
+ I32 max_offset;
+ SV *substr;
+};
+
+struct reg_substr_data {
+ struct reg_substr_datum data[3]; /* Actual array */
+};
+
+#define anchored_substr substrs->data[0].substr
+#define anchored_offset substrs->data[0].min_offset
+#define float_substr substrs->data[1].substr
+#define float_min_offset substrs->data[1].min_offset
+#define float_max_offset substrs->data[1].max_offset
+#define check_substr substrs->data[2].substr
+#define check_offset_min substrs->data[2].min_offset
+#define check_offset_max substrs->data[2].max_offset
diff --git a/regexec.c b/regexec.c
index 7dbf6dc8e4..c97f89efa7 100644
--- a/regexec.c
+++ b/regexec.c
@@ -25,7 +25,7 @@
# define PERL_IN_XSUB_RE
# endif
/* need access to debugger hooks */
-# ifndef DEBUGGING
+# if defined(PERL_EXT_RE_DEBUG) && !defined(DEBUGGING)
# define DEBUGGING
# endif
#endif
@@ -35,6 +35,7 @@
# define Perl_regexec_flags my_regexec
# define Perl_regdump my_regdump
# define Perl_regprop my_regprop
+# define Perl_re_intuit_start my_re_intuit_start
/* *These* symbols are masked to allow static link. */
# define Perl_pregexec my_pregexec
# define Perl_reginitcolors my_reginitcolors
@@ -258,6 +259,192 @@ S_restore_pos(pTHX_ void *arg)
}
}
+/*
+ * Need to implement the following flags for reg_anch:
+ *
+ * USE_INTUIT_NOML - Useful to call re_intuit_start() first
+ * USE_INTUIT_ML
+ * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
+ * INTUIT_AUTORITATIVE_ML
+ * INTUIT_ONCE_NOML - Intuit can match in one location only.
+ * INTUIT_ONCE_ML
+ *
+ * Another flag for this function: SECOND_TIME (so that float substrs
+ * with giant delta may be not rechecked).
+ */
+
+/* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
+
+/* If SCREAM, then sv should be compatible with strpos and strend.
+ Otherwise, only SvCUR(sv) is used to get strbeg. */
+
+/* XXXX We assume that strpos is strbeg unless sv. */
+
+char *
+Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos,
+ char *strend, U32 flags, re_scream_pos_data *data)
+{
+ I32 start_shift;
+ /* Should be nonnegative! */
+ I32 end_shift;
+ char *s;
+ char *t;
+ I32 ml_anch;
+
+ DEBUG_r( if (!PL_colorset) reginitcolors() );
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "%sGuessing start of match:%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
+ PL_colors[4],PL_colors[5],PL_colors[0],
+ prog->precomp,
+ PL_colors[1],
+ (strlen(prog->precomp) > 60 ? "..." : ""),
+ PL_colors[0],
+ (strend - strpos > 60 ? 60 : strend - strpos),
+ strpos, PL_colors[1],
+ (strend - strpos > 60 ? "..." : ""))
+ );
+
+ if (prog->minlen > strend - strpos)
+ goto fail;
+
+ /* XXXX Move further down? */
+ start_shift = prog->check_offset_min; /* okay to underestimate on CC */
+ /* Should be nonnegative! */
+ end_shift = prog->minlen - start_shift -
+ CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
+
+ if (prog->reganch & ROPT_ANCH) {
+ ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE)
+ || ( (prog->reganch & ROPT_ANCH_BOL)
+ && !PL_multiline ) );
+
+ if ((prog->check_offset_min == prog->check_offset_max) && !ml_anch) {
+ /* Anchored... */
+ I32 slen;
+
+ if ( !(prog->reganch & ROPT_ANCH_GPOS) /* Checked by the caller */
+ && (sv && (strpos + SvCUR(sv) != strend)) )
+ goto fail;
+
+ s = (char*)HOP((U8*)strpos, prog->check_offset_min);
+ if (SvTAIL(prog->check_substr)) {
+ slen = SvCUR(prog->check_substr); /* >= 1 */
+
+ if ( strend - s > slen || strend - s < slen - 1 ) {
+ s = Nullch;
+ goto finish;
+ }
+ if ( strend - s == slen && strend[-1] != '\n') {
+ s = Nullch;
+ goto finish;
+ }
+ /* Now should match s[0..slen-2] */
+ slen--;
+ if (slen && (*SvPVX(prog->check_substr) != *s
+ || (slen > 1
+ && memNE(SvPVX(prog->check_substr), s, slen))))
+ s = Nullch;
+ }
+ else if (*SvPVX(prog->check_substr) != *s
+ || ((slen = SvCUR(prog->check_substr)) > 1
+ && memNE(SvPVX(prog->check_substr), s, slen)))
+ s = Nullch;
+ else
+ s = strpos;
+ goto finish;
+ }
+ s = strpos;
+ if (!ml_anch && (s + prog->check_offset_max < strend - prog->minlen))
+ end_shift += strend - s - prog->minlen - prog->check_offset_max;
+ }
+ else {
+ ml_anch = 0;
+ s = strpos;
+ }
+
+ restart:
+ if (flags & REXEC_SCREAM) {
+ SV *c = prog->check_substr;
+ char *strbeg = SvPVX(sv); /* XXXX Assume PV_force() on SCREAM! */
+ I32 p = -1; /* Internal iterator of scream. */
+ I32 *pp = data ? data->scream_pos : &p;
+
+ if (PL_screamfirst[BmRARE(c)] >= 0
+ || ( BmRARE(c) == '\n'
+ && (BmPREVIOUS(c) == SvCUR(c) - 1)
+ && SvTAIL(c) ))
+ s = screaminstr(sv, prog->check_substr,
+ start_shift + (strpos - strbeg), end_shift, pp, 0);
+ else
+ s = Nullch;
+ if (data)
+ *data->scream_olds = s;
+ }
+ else
+ s = fbm_instr((unsigned char*)s + start_shift,
+ (unsigned char*)strend - end_shift,
+ prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
+
+ /* Update the count-of-usability, remove useless subpatterns,
+ unshift s. */
+ finish:
+ if (!s) {
+ ++BmUSEFUL(prog->check_substr); /* hooray */
+ goto fail; /* not present */
+ }
+ else if (s - strpos > prog->check_offset_max &&
+ ((prog->reganch & ROPT_UTF8)
+ ? ((t = reghopmaybe_c(s, -(prog->check_offset_max)))
+ && t >= strpos)
+ : (t = s - prog->check_offset_max) != 0) ) {
+ if (ml_anch && t[-1] != '\n') {
+ find_anchor:
+ while (t < strend - end_shift - prog->minlen) {
+ if (*t == '\n') {
+ if (t < s - prog->check_offset_min) {
+ s = t + 1;
+ goto set_useful;
+ }
+ s = t + 1;
+ goto restart;
+ }
+ t++;
+ }
+ s = Nullch;
+ goto finish;
+ }
+ s = t;
+ set_useful:
+ ++BmUSEFUL(prog->check_substr); /* hooray/2 */
+ }
+ else {
+ if (ml_anch && sv
+ && (strpos + SvCUR(sv) != strend) && strpos[-1] != '\n') {
+ t = strpos;
+ goto find_anchor;
+ }
+ if (!(prog->reganch & ROPT_NAUGHTY)
+ && --BmUSEFUL(prog->check_substr) < 0
+ && prog->check_substr == prog->float_substr) { /* boo */
+ /* If flags & SOMETHING - do not do it many times on the same match */
+ SvREFCNT_dec(prog->check_substr);
+ prog->check_substr = Nullsv; /* disable */
+ prog->float_substr = Nullsv; /* clear */
+ s = strpos;
+ prog->reganch &= ~RE_USE_INTUIT;
+ }
+ else
+ s = strpos;
+ }
+
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%sFound%s at offset %ld\n",
+ PL_colors[4],PL_colors[5], (long)(s - strpos)) );
+ return s;
+ fail:
+ DEBUG_r(PerlIO_printf(Perl_debug_log, "%sNot found...%s\n",
+ PL_colors[4],PL_colors[5]));
+ return Nullch;
+}
/*
- regexec_flags - match a regexp against a string
@@ -339,103 +526,78 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
/* If there is a "must appear" string, look for it. */
s = startpos;
- if (!(flags & REXEC_CHECKED)
- && prog->check_substr != Nullsv &&
- !(prog->reganch & ROPT_ANCH_GPOS) &&
- (!(prog->reganch & (ROPT_ANCH_BOL | ROPT_ANCH_MBOL))
- || (PL_multiline && prog->check_substr == prog->anchored_substr)) )
- {
- char *t;
- start_shift = prog->check_offset_min; /* okay to underestimate on CC */
- /* Should be nonnegative! */
- end_shift = minlen - start_shift -
- CHR_SVLEN(prog->check_substr) + (SvTAIL(prog->check_substr) != 0);
- if (flags & REXEC_SCREAM) {
- SV *c = prog->check_substr;
-
- if (PL_screamfirst[BmRARE(c)] >= 0
- || ( BmRARE(c) == '\n'
- && (BmPREVIOUS(c) == SvCUR(c) - 1)
- && SvTAIL(c) ))
- s = screaminstr(sv, prog->check_substr,
- start_shift + (stringarg - strbeg),
- end_shift, &scream_pos, 0);
- else
- s = Nullch;
- scream_olds = s;
- }
+
+ if (prog->reganch & ROPT_GPOS_SEEN) {
+ MAGIC *mg;
+
+ if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
+ && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
+ PL_reg_ganch = strbeg + mg->mg_len;
else
- s = fbm_instr((unsigned char*)s + start_shift,
- (unsigned char*)strend - end_shift,
- prog->check_substr, PL_multiline ? FBMrf_MULTILINE : 0);
- if (!s) {
- ++BmUSEFUL(prog->check_substr); /* hooray */
- goto phooey; /* not present */
- }
- else if (s - stringarg > prog->check_offset_max &&
- (UTF
- ? ((t = reghopmaybe_c(s, -(prog->check_offset_max))) && t >= stringarg)
- : (t = s - prog->check_offset_max) != 0
- )
- )
- {
- ++BmUSEFUL(prog->check_substr); /* hooray/2 */
- s = t;
- }
- else if (!(prog->reganch & ROPT_NAUGHTY)
- && --BmUSEFUL(prog->check_substr) < 0
- && prog->check_substr == prog->float_substr) { /* boo */
- SvREFCNT_dec(prog->check_substr);
- prog->check_substr = Nullsv; /* disable */
- prog->float_substr = Nullsv; /* clear */
- s = startpos;
+ PL_reg_ganch = startpos;
+ if (prog->reganch & ROPT_ANCH_GPOS) {
+ if (s > PL_reg_ganch)
+ goto phooey;
+ s = PL_reg_ganch;
}
- else
- s = startpos;
}
- DEBUG_r(if (!PL_colorset) reginitcolors());
- DEBUG_r(PerlIO_printf(Perl_debug_log,
+ if (!(flags & REXEC_CHECKED) && prog->check_substr != Nullsv) {
+ re_scream_pos_data d;
+
+ d.scream_olds = &scream_olds;
+ d.scream_pos = &scream_pos;
+ s = re_intuit_start(prog, sv, s, strend, flags, &d);
+ if (!s)
+ goto phooey; /* not present */
+ }
+
+ DEBUG_r( if (!PL_colorset) reginitcolors() );
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
"%sMatching%s `%s%.60s%s%s' against `%s%.*s%s%s'\n",
PL_colors[4],PL_colors[5],PL_colors[0],
prog->precomp,
PL_colors[1],
(strlen(prog->precomp) > 60 ? "..." : ""),
- PL_colors[0],
+ PL_colors[0],
(strend - startpos > 60 ? 60 : strend - startpos),
startpos, PL_colors[1],
(strend - startpos > 60 ? "..." : ""))
);
- if (prog->reganch & ROPT_GPOS_SEEN) {
- MAGIC *mg;
-
- if (!(flags & REXEC_IGNOREPOS) && sv && SvTYPE(sv) >= SVt_PVMG
- && SvMAGIC(sv) && (mg = mg_find(sv, 'g')) && mg->mg_len >= 0)
- PL_reg_ganch = strbeg + mg->mg_len;
- else
- PL_reg_ganch = startpos;
- }
-
/* Simplest case: anchored match need be tried only once. */
/* [unless only anchor is BOL and multiline is set] */
if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) {
- if (regtry(prog, startpos))
+ if (s == startpos && regtry(prog, startpos))
goto got_it;
else if (PL_multiline || (prog->reganch & ROPT_IMPLICIT)
|| (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */
{
+ char *end;
+
if (minlen)
dontbother = minlen - 1;
- strend = HOPc(strend, -dontbother);
+ end = HOPc(strend, -dontbother) - 1;
/* for multiline we only have to try after newlines */
- if (s > startpos)
- s--;
- while (s < strend) {
- if (*s++ == '\n') { /* don't need PL_utf8skip here */
- if (s < strend && regtry(prog, s))
+ if (prog->check_substr) {
+ while (1) {
+ if (regtry(prog, s))
goto got_it;
- }
+ if (s >= end)
+ goto phooey;
+ s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
+ if (!s)
+ goto phooey;
+ }
+ } else {
+ if (s > startpos)
+ s--;
+ while (s < end) {
+ if (*s++ == '\n') { /* don't need PL_utf8skip here */
+ if (regtry(prog, s))
+ goto got_it;
+ }
+ }
}
}
goto phooey;
@@ -448,7 +610,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char *
/* Messy cases: unanchored match. */
if (prog->anchored_substr && prog->reganch & ROPT_SKIP) {
/* we have /x+whatever/ */
- /* it must be a one character string */
+ /* it must be a one character string (XXXX Except UTF?) */
char ch = SvPVX(prog->anchored_substr)[0];
if (UTF) {
while (s < strend) {
diff --git a/regexp.h b/regexp.h
index 9da5bd47e0..5d787e018a 100644
--- a/regexp.h
+++ b/regexp.h
@@ -17,38 +17,13 @@ struct regnode {
typedef struct regnode regnode;
-struct reg_data {
- U32 count;
- U8 *what;
- void* data[1];
-};
-
-struct reg_substr_datum {
- I32 min_offset;
- I32 max_offset;
- SV *substr;
-};
-
-struct reg_substr_data {
- struct reg_substr_datum data[3]; /* Actual array */
-};
+struct reg_substr_data;
typedef struct regexp {
I32 *startp;
I32 *endp;
regnode *regstclass;
-#if 0
- SV *anchored_substr; /* Substring at fixed position wrt start. */
- I32 anchored_offset; /* Position of it. */
- SV *float_substr; /* Substring at variable position wrt start. */
- I32 float_min_offset; /* Minimal position of it. */
- I32 float_max_offset; /* Maximal position of it. */
- SV *check_substr; /* Substring to check before matching. */
- I32 check_offset_min; /* Offset of the above. */
- I32 check_offset_max; /* Offset of the above. */
-#else
struct reg_substr_data *substrs;
-#endif
char *precomp; /* pre-compilation regular expression */
struct reg_data *data; /* Additional data. */
char *subbeg; /* saved or original string
@@ -64,29 +39,20 @@ typedef struct regexp {
regnode program[1]; /* Unwarranted chumminess with compiler. */
} regexp;
-#define anchored_substr substrs->data[0].substr
-#define anchored_offset substrs->data[0].min_offset
-#define float_substr substrs->data[1].substr
-#define float_min_offset substrs->data[1].min_offset
-#define float_max_offset substrs->data[1].max_offset
-#define check_substr substrs->data[2].substr
-#define check_offset_min substrs->data[2].min_offset
-#define check_offset_max substrs->data[2].max_offset
-
-#define ROPT_ANCH (ROPT_ANCH_BOL|ROPT_ANCH_MBOL|ROPT_ANCH_GPOS)
-#define ROPT_ANCH_SINGLE (ROPT_ANCH_BOL|ROPT_ANCH_GPOS)
+#define ROPT_ANCH (ROPT_ANCH_BOL|ROPT_ANCH_MBOL|ROPT_ANCH_GPOS|ROPT_ANCH_SBOL)
+#define ROPT_ANCH_SINGLE (ROPT_ANCH_SBOL|ROPT_ANCH_GPOS)
#define ROPT_ANCH_BOL 0x00001
#define ROPT_ANCH_MBOL 0x00002
-#define ROPT_ANCH_GPOS 0x00004
-#define ROPT_SKIP 0x00008
-#define ROPT_IMPLICIT 0x00010 /* Converted .* to ^.* */
-#define ROPT_NOSCAN 0x00020 /* Check-string always at start. */
-#define ROPT_GPOS_SEEN 0x00040
-#define ROPT_CHECK_ALL 0x00080
-#define ROPT_LOOKBEHIND_SEEN 0x00100
-#define ROPT_EVAL_SEEN 0x00200
-#define ROPT_TAINTED_SEEN 0x00400
-#define ROPT_ANCH_SBOL 0x00800
+#define ROPT_ANCH_SBOL 0x00004
+#define ROPT_ANCH_GPOS 0x00008
+#define ROPT_SKIP 0x00010
+#define ROPT_IMPLICIT 0x00020 /* Converted .* to ^.* */
+#define ROPT_NOSCAN 0x00040 /* Check-string always at start. */
+#define ROPT_GPOS_SEEN 0x00080
+#define ROPT_CHECK_ALL 0x00100
+#define ROPT_LOOKBEHIND_SEEN 0x00200
+#define ROPT_EVAL_SEEN 0x00400
+#define ROPT_TAINTED_SEEN 0x00800
/* 0xf800 of reganch is used by PMf_COMPILETIME */
@@ -94,6 +60,19 @@ typedef struct regexp {
#define ROPT_NAUGHTY 0x20000 /* how exponential is this pattern? */
#define ROPT_COPY_DONE 0x40000 /* subbeg is a copy of the string */
+#define RE_USE_INTUIT_NOML 0x0100000 /* Best to intuit before matching */
+#define RE_USE_INTUIT_ML 0x0200000
+#define REINT_AUTORITATIVE_NOML 0x0400000 /* Can trust a positive answer */
+#define REINT_AUTORITATIVE_ML 0x0800000
+#define REINT_ONCE_NOML 0x1000000 /* Intuit can succed once only. */
+#define REINT_ONCE_ML 0x2000000
+#define RE_INTUIT_ONECHAR 0x4000000
+#define RE_INTUIT_TAIL 0x8000000
+
+#define RE_USE_INTUIT (RE_USE_INTUIT_NOML|RE_USE_INTUIT_ML)
+#define REINT_AUTORITATIVE (REINT_AUTORITATIVE_NOML|REINT_AUTORITATIVE_ML)
+#define REINT_ONCE (REINT_ONCE_NOML|REINT_ONCE_ML)
+
#define RX_MATCH_TAINTED(prog) ((prog)->reganch & ROPT_TAINTED_SEEN)
#define RX_MATCH_TAINTED_on(prog) ((prog)->reganch |= ROPT_TAINTED_SEEN)
#define RX_MATCH_TAINTED_off(prog) ((prog)->reganch &= ~ROPT_TAINTED_SEEN)
@@ -108,18 +87,22 @@ typedef struct regexp {
? RX_MATCH_COPIED_on(prog) \
: RX_MATCH_COPIED_off(prog))
-#define REXEC_COPY_STR 1 /* Need to copy the string. */
-#define REXEC_CHECKED 2 /* check_substr already checked. */
-#define REXEC_SCREAM 4 /* use scream table. */
-#define REXEC_IGNOREPOS 8 /* \G matches at start. */
+#define REXEC_COPY_STR 0x01 /* Need to copy the string. */
+#define REXEC_CHECKED 0x02 /* check_substr already checked. */
+#define REXEC_SCREAM 0x04 /* use scream table. */
+#define REXEC_IGNOREPOS 0x08 /* \G matches at start. */
#define REXEC_NOT_FIRST 0x10 /* This is another iteration of //g. */
+#define REXEC_ML 0x20 /* $* was set. */
#define ReREFCNT_inc(re) ((re && re->refcnt++), re)
-#define ReREFCNT_dec(re) pregfree(re)
+#define ReREFCNT_dec(re) CALLREGFREE(aTHX_ re)
#define FBMcf_TAIL_DOLLAR 1
-#define FBMcf_TAIL_Z 2
-#define FBMcf_TAIL_z 4
-#define FBMcf_TAIL (FBMcf_TAIL_DOLLAR|FBMcf_TAIL_Z|FBMcf_TAIL_z)
+#define FBMcf_TAIL_DOLLARM 2
+#define FBMcf_TAIL_Z 4
+#define FBMcf_TAIL_z 8
+#define FBMcf_TAIL (FBMcf_TAIL_DOLLAR|FBMcf_TAIL_DOLLARM|FBMcf_TAIL_Z|FBMcf_TAIL_z)
#define FBMrf_MULTILINE 1
+
+struct re_scream_pos_data_s;
diff --git a/thrdvar.h b/thrdvar.h
index a442367f6b..c8233934ab 100644
--- a/thrdvar.h
+++ b/thrdvar.h
@@ -170,9 +170,16 @@ PERLVAR(Treg_oldsaved, char*) /* old saved substr during match */
PERLVAR(Treg_oldsavedlen, STRLEN) /* old length of saved substr during match */
PERLVARI(Tregcompp, regcomp_t, FUNC_NAME_TO_PTR(Perl_pregcomp))
- /* Pointer to RE compiler */
+ /* Pointer to REx compiler */
PERLVARI(Tregexecp, regexec_t, FUNC_NAME_TO_PTR(Perl_regexec_flags))
- /* Pointer to RE executer */
+ /* Pointer to REx executer */
+PERLVARI(Tregint_start, re_intuit_start_t, FUNC_NAME_TO_PTR(Perl_re_intuit_start))
+ /* Pointer to optimized REx executer */
+PERLVARI(Tregint_string,re_intuit_string_t, FUNC_NAME_TO_PTR(Perl_re_intuit_string))
+ /* Pointer to optimized REx string */
+PERLVARI(Tregfree, regfree_t, FUNC_NAME_TO_PTR(Perl_pregfree))
+ /* Pointer to REx free()er */
+
PERLVARI(Treginterp_cnt,int, 0) /* Whether `Regexp'
was interpolated. */
PERLVARI(Treg_starttry, char *, 0) /* -Dr: where regtry was called. */
diff --git a/util.c b/util.c
index 99415f0918..242a30889d 100644
--- a/util.c
+++ b/util.c
@@ -3235,6 +3235,9 @@ Perl_new_struct_thread(pTHX_ struct perl_thread *t)
PL_maxscream = -1;
PL_regcompp = FUNC_NAME_TO_PTR(Perl_pregcomp);
PL_regexecp = FUNC_NAME_TO_PTR(Perl_regexec_flags);
+ PL_regint_start = FUNC_NAME_TO_PTR(Perl_re_intuit_start);
+ PL_regint_string = FUNC_NAME_TO_PTR(Perl_re_intuit_string);
+ PL_regfree = FUNC_NAME_TO_PTR(Perl_pregfree);
PL_regindent = 0;
PL_reginterp_cnt = 0;
PL_lastscream = Nullsv;