summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÆvar Arnfjörð Bjarmason <avar@cpan.org>2007-04-21 21:30:47 +0000
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2007-04-23 09:39:38 +0000
commit3ab4a224eb8d34c041977288575d251ee18f009f (patch)
tree3f95471c1ad1e1dc9e59e85e81615c9a477fe8db
parente1d1eefb8c88e0dcaf2bb9e6c04d7f6192be966f (diff)
downloadperl-3ab4a224eb8d34c041977288575d251ee18f009f.tar.gz
Re: [PATCH (incomplete)] Make regcomp use SV* sv, instead of char* exp, char* xend
Message-ID: <51dd1af80704211430m6ad1b4afy49b069faa61e33a9@mail.gmail.com> p4raw-id: //depot/perl@31027
-rw-r--r--embed.fnc4
-rw-r--r--embed.h4
-rw-r--r--op.c3
-rw-r--r--perl.h6
-rw-r--r--pod/perlreapi.pod31
-rw-r--r--pp_ctl.c10
-rw-r--r--proto.h10
-rw-r--r--regcomp.c26
-rw-r--r--regexec.c4
-rw-r--r--regexp.h2
-rw-r--r--uupacktool.pl4
11 files changed, 58 insertions, 46 deletions
diff --git a/embed.fnc b/embed.fnc
index 688aae22c9..3e86722a89 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -693,8 +693,8 @@ Ap |char * |reg_stringify |NN MAGIC *mg|NULLOK STRLEN *lp|NULLOK U32 *flags|NUL
#if defined(USE_ITHREADS)
Ap |void* |regdupe_internal|NN const regexp* r|NN CLONE_PARAMS* param
#endif
-Ap |regexp*|pregcomp |NN char* exp|NN char* xend|U32 pm_flags
-Ap |regexp*|re_compile |NN char* exp|NN char* xend|U32 pm_flags
+Ap |REGEXP*|pregcomp |NN const SV * const pattern|const U32 flags
+Ap |REGEXP*|re_compile |NN const SV * const pattern|const U32 flags
Ap |char* |re_intuit_start|NN regexp* prog|NULLOK SV* sv|NN char* strpos \
|NN char* strend|U32 flags \
|NULLOK struct re_scream_pos_data_s *data
diff --git a/embed.h b/embed.h
index 97a250049e..5d83dd2800 100644
--- a/embed.h
+++ b/embed.h
@@ -2967,8 +2967,8 @@
#if defined(USE_ITHREADS)
#define regdupe_internal(a,b) Perl_regdupe_internal(aTHX_ a,b)
#endif
-#define pregcomp(a,b,c) Perl_pregcomp(aTHX_ a,b,c)
-#define re_compile(a,b,c) Perl_re_compile(aTHX_ a,b,c)
+#define pregcomp(a,b) Perl_pregcomp(aTHX_ a,b)
+#define re_compile(a,b) Perl_re_compile(aTHX_ a,b)
#define re_intuit_start(a,b,c,d,e,f) Perl_re_intuit_start(aTHX_ a,b,c,d,e,f)
#define re_intuit_string(a) Perl_re_intuit_string(aTHX_ a)
#define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h)
diff --git a/op.c b/op.c
index 9e16fc83fc..befacc38c0 100644
--- a/op.c
+++ b/op.c
@@ -3436,8 +3436,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
}
if (DO_UTF8(pat))
pm_flags |= RXf_UTF8;
- /* FIXME - can we make this function take const char * args? */
- PM_SETRE(pm, CALLREGCOMP((char*)p, (char*)p + plen, pm_flags));
+ PM_SETRE(pm, CALLREGCOMP(pat, pm_flags));
#ifdef PERL_MAD
op_getmad(expr,(OP*)pm,'e');
diff --git a/perl.h b/perl.h
index 6d9488bb64..b22a3f7944 100644
--- a/perl.h
+++ b/perl.h
@@ -196,10 +196,10 @@
#define CALLRUNOPS CALL_FPTR(PL_runops)
-#define CALLREGCOMP(exp, xend, pm) Perl_pregcomp(aTHX_ (exp),(xend),(pm))
+#define CALLREGCOMP(sv, flags) Perl_pregcomp(aTHX_ (sv),(flags))
-#define CALLREGCOMP_ENG(prog, exp, xend, pm) \
- CALL_FPTR(((prog)->comp))(aTHX_ exp, xend, pm)
+#define CALLREGCOMP_ENG(prog, sv, flags) \
+ CALL_FPTR(((prog)->comp))(aTHX_ sv, flags)
#define CALLREGEXEC(prog,stringarg,strend,strbeg,minend,screamer,data,flags) \
CALL_FPTR((prog)->engine->exec)(aTHX_ (prog),(stringarg),(strend), \
(strbeg),(minend),(screamer),(data),(flags))
diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod
index 02e1ccb265..ff69bb75ca 100644
--- a/pod/perlreapi.pod
+++ b/pod/perlreapi.pod
@@ -9,7 +9,7 @@ the default one. Each engine is supposed to provide access to a constant
structure of the following format:
typedef struct regexp_engine {
- regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags);
+ REGEXP* (*comp) (pTHX_ const SV * const pattern, const U32 flags);
I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend,
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
@@ -45,12 +45,28 @@ The routines are as follows:
=head2 comp
- regexp* comp(char *exp, char *xend, U32 flags);
+ REGEXP* comp(pTHX_ const SV * const pattern, const U32 flags);
-Compile the pattern between exp and xend using the given flags and return a
-pointer to a prepared regexp structure that can perform the match. See L</The
-REGEXP structure> below for an explanation of the individual fields in the
-REGEXP struct.
+Compile the pattern stored in C<pattern> using the given C<flags> and
+return a pointer to a prepared C<REGEXP> structure that can perform
+the match. See L</The REGEXP structure> below for an explanation of
+the individual fields in the REGEXP struct.
+
+The C<pattern> parameter is the scalar that was used as the
+pattern. previous versions of perl would pass two C<char*> indicating
+the start and end of the stringifed pattern, the following snippet can
+be used to get the old parameters:
+
+ STRLEN plen;
+ char* exp = SvPV(pattern, plen);
+ char* xend = exp + plen;
+
+Since any scalar can be passed as a pattern it's possible to implement
+an engine that does something with an array (C<< "ook" =~ [ qw/ eek
+hlagh / ] >>) or with the non-stringified form of a compiled regular
+expression (C<< "ook" =~ qr/eek/ >>). perl's own engine will always
+stringify everything using the snippet above but that doesn't mean
+other engines have to.
The C<flags> paramater is a bitfield which indicates which of the
C<msixk> flags the regex was compiled with. In addition it contains
@@ -63,7 +79,8 @@ in F<pp.c> to find out whether your engine should be setting these.
The C<eogc> flags are stripped out before being passed to the comp
routine. The regex engine does not need to know whether any of these
-are set.
+are set as those flags should only affect what perl does with the
+pattern and its match variables, not how it gets compiled & executed.
=over 4
diff --git a/pp_ctl.c b/pp_ctl.c
index 0538d6f7cd..310ca86a67 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -125,7 +125,7 @@ PP(pp_regcomp)
}
else {
STRLEN len;
- const char *t = SvPV_const(tmpstr, len);
+ const char *t = SvOK(tmpstr) ? SvPV_const(tmpstr, len) : "";
re = PM_GETRE(pm);
/* Check against the last compiled regexp. */
@@ -150,10 +150,10 @@ PP(pp_regcomp)
if (DO_UTF8(tmpstr))
pm_flags |= RXf_UTF8;
- if (eng)
- PM_SETRE(pm, CALLREGCOMP_ENG(eng,(char *)t, (char *)t + len, pm_flags));
- else
- PM_SETRE(pm, CALLREGCOMP((char *)t, (char *)t + len, pm_flags));
+ if (eng)
+ PM_SETRE(pm, CALLREGCOMP_ENG(eng, tmpstr, pm_flags));
+ else
+ PM_SETRE(pm, CALLREGCOMP(tmpstr, pm_flags));
PL_reginterp_cnt = 0; /* XXXX Be extra paranoid - needed
inside tie/overload accessors. */
diff --git a/proto.h b/proto.h
index a582063c37..f8aa9a206b 100644
--- a/proto.h
+++ b/proto.h
@@ -1892,13 +1892,11 @@ PERL_CALLCONV void* Perl_regdupe_internal(pTHX_ const regexp* r, CLONE_PARAMS* p
__attribute__nonnull__(pTHX_2);
#endif
-PERL_CALLCONV regexp* Perl_pregcomp(pTHX_ char* exp, char* xend, U32 pm_flags)
- __attribute__nonnull__(pTHX_1)
- __attribute__nonnull__(pTHX_2);
+PERL_CALLCONV REGEXP* Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
+ __attribute__nonnull__(pTHX_1);
-PERL_CALLCONV regexp* Perl_re_compile(pTHX_ char* exp, char* xend, U32 pm_flags)
- __attribute__nonnull__(pTHX_1)
- __attribute__nonnull__(pTHX_2);
+PERL_CALLCONV REGEXP* Perl_re_compile(pTHX_ const SV * const pattern, const U32 flags)
+ __attribute__nonnull__(pTHX_1);
PERL_CALLCONV char* Perl_re_intuit_start(pTHX_ regexp* prog, SV* sv, char* strpos, char* strend, U32 flags, struct re_scream_pos_data_s *data)
__attribute__nonnull__(pTHX_1)
diff --git a/regcomp.c b/regcomp.c
index 48a8a307e0..c181777cea 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4076,8 +4076,8 @@ extern const struct regexp_engine my_reg_engine;
#endif
#ifndef PERL_IN_XSUB_RE
-regexp *
-Perl_pregcomp(pTHX_ char *exp, char *xend, U32 pm_flags)
+REGEXP *
+Perl_pregcomp(pTHX_ const SV * const pattern, const U32 flags)
{
dVAR;
HV * const table = GvHV(PL_hintgv);
@@ -4092,19 +4092,22 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, U32 pm_flags)
PerlIO_printf(Perl_debug_log, "Using engine %"UVxf"\n",
SvIV(*ptr));
});
- return CALLREGCOMP_ENG(eng, exp, xend, pm_flags);
+ return CALLREGCOMP_ENG(eng, pattern, flags);
}
}
- return Perl_re_compile(aTHX_ exp, xend, pm_flags);
+ return Perl_re_compile(aTHX_ pattern, flags);
}
#endif
-regexp *
-Perl_re_compile(pTHX_ char *exp, char *xend, U32 pm_flags)
+REGEXP *
+Perl_re_compile(pTHX_ const SV * const pattern, const U32 pm_flags)
{
dVAR;
- register regexp *r;
+ register REGEXP *r;
register regexp_internal *ri;
+ STRLEN plen;
+ char* exp = SvPV((SV*)pattern, plen);
+ char* xend = exp + plen;
regnode *scan;
regnode *first;
I32 flags;
@@ -4120,16 +4123,13 @@ Perl_re_compile(pTHX_ char *exp, char *xend, U32 pm_flags)
#endif
GET_RE_DEBUG_FLAGS_DECL;
DEBUG_r(if (!PL_colorset) reginitcolors());
-
- if (exp == NULL)
- FAIL("NULL regexp argument");
RExC_utf8 = RExC_orig_utf8 = pm_flags & RXf_UTF8;
DEBUG_COMPILE_r({
SV *dsv= sv_newmortal();
RE_PV_QUOTED_DECL(s, RExC_utf8,
- dsv, exp, (xend - exp), 60);
+ dsv, exp, plen, 60);
PerlIO_printf(Perl_debug_log, "%sCompiling REx%s %s\n",
PL_colors[4],PL_colors[5],s);
});
@@ -4184,7 +4184,7 @@ redo_first_pass:
thing.
XXX: somehow figure out how to make this less expensive...
-- dmq */
- STRLEN len = xend-exp;
+ STRLEN len = plen;
DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
"UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)exp, &len);
@@ -4230,7 +4230,7 @@ redo_first_pass:
RXi_SET( r, ri );
r->engine= RE_ENGINE_PTR;
r->refcnt = 1;
- r->prelen = xend - exp;
+ r->prelen = plen;
r->extflags = pm_flags;
{
bool has_k = ((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
diff --git a/regexec.c b/regexec.c
index fa853a475f..d84190b0d6 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3698,13 +3698,11 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
}
else {
- STRLEN len;
- const char * const t = SvPV_const(ret, len);
U32 pm_flags = 0;
const I32 osize = PL_regsize;
if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
- re = CALLREGCOMP((char*)t, (char*)t + len, pm_flags);
+ re = CALLREGCOMP(ret, pm_flags);
if (!(SvFLAGS(ret)
& (SVs_TEMP | SVs_PADTMP | SVf_READONLY
| SVs_GMG)))
diff --git a/regexp.h b/regexp.h
index a833c6b2b1..33e7c2091e 100644
--- a/regexp.h
+++ b/regexp.h
@@ -112,7 +112,7 @@ typedef struct re_scream_pos_data_s
* Any regex engine implementation must be able to build one of these.
*/
typedef struct regexp_engine {
- regexp* (*comp) (pTHX_ char* exp, char* xend, U32 pm_flags);
+ REGEXP* (*comp) (pTHX_ const SV * const pattern, const U32 flags);
I32 (*exec) (pTHX_ regexp* prog, char* stringarg, char* strend,
char* strbeg, I32 minend, SV* screamer,
void* data, U32 flags);
diff --git a/uupacktool.pl b/uupacktool.pl
index 9872a9e94e..20554d721d 100644
--- a/uupacktool.pl
+++ b/uupacktool.pl
@@ -16,7 +16,7 @@ sub handle_file {
my $mode = (stat($file))[2] & 07777;
open my $fh, "<", $file
- or die "Could not open input file $file: $!";
+ or do { warn "Could not open input file $file: $!"; exit 0 };
binmode $fh;
my $str = do { local $/; <$fh> };
@@ -62,7 +62,7 @@ EOFBLURB
} else {
print "Writing $file into $outfile\n" if $opts->{'v'};
open my $outfh, ">", $outfile
- or die "Could not open $outfile for writing: $!";
+ or do { warn "Could not open $outfile for writing: $!"; exit 0 };
binmode $outfh;
### $outstr might be empty, if the file was empty
print $outfh $outstr if $outstr;