summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2008-01-05 13:54:38 +0000
committerNicholas Clark <nick@ccl4.org>2008-01-05 13:54:38 +0000
commitefd26800e76b6f876fd6abe3a3f7e3e4128150a9 (patch)
tree3cee09b657aa4fbb21fef6d06429fb5c40ada62c
parent5a51db0500cb91f11a554807ca60350bc43f0e5b (diff)
downloadperl-efd26800e76b6f876fd6abe3a3f7e3e4128150a9.tar.gz
Add RX_UTF8(), which is effectively SvUTF8() but for regexps.
Remove RXp_PRECOMP() and RXp_WRAPPED(). Change the parameter of S_debug_start_match() from regexp to REGEXP. Change its callers [the only part wrong for 5.10.x] p4raw-id: //depot/perl@32840
-rw-r--r--embed.fnc4
-rw-r--r--proto.h2
-rw-r--r--regcomp.c16
-rw-r--r--regexec.c12
-rw-r--r--regexp.h9
5 files changed, 23 insertions, 20 deletions
diff --git a/embed.fnc b/embed.fnc
index b11cd6efff..85fca2acd3 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1454,7 +1454,9 @@ ERs |I32 |reg_check_named_buff_matched |NN const regexp *rex|NN const regnode *p
# ifdef DEBUGGING
Es |void |dump_exec_pos |NN const char *locinput|NN const regnode *scan|NN const char *loc_regeol\
|NN const char *loc_bostr|NN const char *loc_reg_starttry|const bool do_utf8
-Es |void |debug_start_match|NN const regexp *prog|const bool do_utf8|NN const char *start|NN const char *end|NN const char *blurb
+Es |void |debug_start_match|NN const REGEXP *prog|const bool do_utf8\
+ |NN const char *start|NN const char *end\
+ |NN const char *blurb
# endif
#endif
diff --git a/proto.h b/proto.h
index 5bbb5935a3..d24ba1e133 100644
--- a/proto.h
+++ b/proto.h
@@ -3886,7 +3886,7 @@ STATIC void S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, con
__attribute__nonnull__(pTHX_4)
__attribute__nonnull__(pTHX_5);
-STATIC void S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8, const char *start, const char *end, const char *blurb)
+STATIC void S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8, const char *start, const char *end, const char *blurb)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_3)
__attribute__nonnull__(pTHX_4)
diff --git a/regcomp.c b/regcomp.c
index b7fd317248..33ed6fc9d0 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4294,8 +4294,8 @@ redo_first_pass:
+ (sizeof(STD_PAT_MODS) - 1)
+ (sizeof("(?:)") - 1);
- Newx(RXp_WRAPPED(r), RXp_WRAPLEN(r) + 1, char );
- p = RXp_WRAPPED(r);
+ Newx(RX_WRAPPED(rx), RXp_WRAPLEN(r) + 1, char );
+ p = RX_WRAPPED(rx);
*p++='('; *p++='?';
if (has_p)
*p++ = KEEPCOPY_PAT_MOD; /*'p'*/
@@ -4319,8 +4319,8 @@ redo_first_pass:
*p++ = ':';
Copy(RExC_precomp, p, plen, char);
- assert ((RXp_WRAPPED(r) - p) < 16);
- r->pre_prefix = p - RXp_WRAPPED(r);
+ assert ((RX_WRAPPED(rx) - p) < 16);
+ r->pre_prefix = p - RX_WRAPPED(rx);
p += plen;
if (has_runon)
*p++ = '\n';
@@ -4798,7 +4798,7 @@ reStudy:
#ifdef STUPID_PATTERN_CHECKS
if (RX_PRELEN(r) == 0)
r->extflags |= RXf_NULL;
- if (r->extflags & RXf_SPLIT && RX_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == ' ')
+ if (r->extflags & RXf_SPLIT && RX_PRELEN(r) == 1 && RX_PRECOMP(rx)[0] == ' ')
/* XXX: this should happen BEFORE we compile */
r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
else if (RX_PRELEN(r) == 3 && memEQ("\\s+", RXp_PRECOMP(r), 3))
@@ -4806,7 +4806,7 @@ reStudy:
else if (RX_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == '^')
r->extflags |= RXf_START_ONLY;
#else
- if (r->extflags & RXf_SPLIT && RXp_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == ' ')
+ if (r->extflags & RXf_SPLIT && RXp_PRELEN(r) == 1 && RX_PRECOMP(rx)[0] == ' ')
/* XXX: this should happen BEFORE we compile */
r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
else {
@@ -9159,7 +9159,7 @@ Perl_pregfree2(pTHX_ REGEXP *rx)
CALLREGFREE_PVT(rx); /* free the private data */
if (r->paren_names)
SvREFCNT_dec(r->paren_names);
- Safefree(RXp_WRAPPED(r));
+ Safefree(RX_WRAPPED(rx));
}
if (r->substrs) {
if (r->anchored_substr)
@@ -9258,7 +9258,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
{
SV *dsv= sv_newmortal();
RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8),
- dsv, RXp_PRECOMP(r), RXp_PRELEN(r), 60);
+ dsv, RX_PRECOMP(rx), RXp_PRELEN(r), 60);
PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n",
PL_colors[4],PL_colors[5],s);
}
diff --git a/regexec.c b/regexec.c
index 2b7ae4a445..2fb1fbe594 100644
--- a/regexec.c
+++ b/regexec.c
@@ -401,7 +401,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
PL_reg_flags |= RF_utf8;
}
DEBUG_EXECUTE_r(
- debug_start_match(prog, do_utf8, strpos, strend,
+ debug_start_match(rx, do_utf8, strpos, strend,
sv ? "Guessing start of match in sv for"
: "Guessing start of match in string for");
);
@@ -1784,7 +1784,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre
RX_MATCH_UTF8_set(rx, do_utf8);
DEBUG_EXECUTE_r(
- debug_start_match(prog, do_utf8, startpos, strend,
+ debug_start_match(rx, do_utf8, startpos, strend,
"Matching");
);
@@ -2553,15 +2553,15 @@ regmatch(), slabs allocated since entry are freed.
#ifdef DEBUGGING
STATIC void
-S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8,
+S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
const char *start, const char *end, const char *blurb)
{
- const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0;
+ const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
if (!PL_colorset)
reginitcolors();
{
RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
- RXp_PRECOMP(prog), RXp_PRELEN(prog), 60);
+ RX_PRECOMP(prog), RX_PRELEN(prog), 60);
RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
start, end - start, 60);
@@ -3775,7 +3775,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
re->sublen = rex->sublen;
rei = RXi_GET(re);
DEBUG_EXECUTE_r(
- debug_start_match(re, do_utf8, locinput, PL_regeol,
+ debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
"Matching embedded");
);
startpoint = rei->program + 1;
diff --git a/regexp.h b/regexp.h
index 1a2b17c1a3..f76a8eafff 100644
--- a/regexp.h
+++ b/regexp.h
@@ -358,19 +358,17 @@ and check for NULL.
? RX_MATCH_COPIED_on(prog) \
: RX_MATCH_COPIED_off(prog))
-#define RXp_PRECOMP(rx) ((rx)->wrapped + (rx)->pre_prefix)
/* FIXME? Are we hardcoding too much here and constraining plugin extension
writers? Specifically, the value 1 assumes that the wrapped version always
has exactly one character at the end, a ')'. Will that always be true? */
#define RXp_PRELEN(rx) ((rx)->wraplen - (rx)->pre_prefix - 1)
-#define RXp_WRAPPED(rx) ((rx)->wrapped)
#define RXp_WRAPLEN(rx) ((rx)->wraplen)
#define RXp_EXTFLAGS(rx) ((rx)->extflags)
/* For source compatibility. We used to store these explicitly. */
-#define RX_PRECOMP(prog) RXp_PRECOMP((struct regexp *)SvANY(prog))
+#define RX_PRECOMP(prog) (((struct regexp *)SvANY(prog))->wrapped + ((struct regexp *)SvANY(prog))->pre_prefix)
#define RX_PRELEN(prog) RXp_PRELEN((struct regexp *)SvANY(prog))
-#define RX_WRAPPED(prog) RXp_WRAPPED((struct regexp *)SvANY(prog))
+#define RX_WRAPPED(prog) (((struct regexp *)SvANY(prog))->wrapped)
#define RX_WRAPLEN(prog) RXp_WRAPLEN((struct regexp *)SvANY(prog))
#define RX_CHECK_SUBSTR(prog) (((struct regexp *)SvANY(prog))->check_substr)
#define RX_EXTFLAGS(prog) RXp_EXTFLAGS((struct regexp *)SvANY(prog))
@@ -417,6 +415,9 @@ and check for NULL.
#define RX_MATCH_UTF8_set(prog, t) ((t) \
? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \
: (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0)))
+
+/* Whether the pattern stored at RX_WRAPPED is in UTF-8 */
+#define RX_UTF8(prog) (RX_EXTFLAGS(prog) & RXf_UTF8)
#define REXEC_COPY_STR 0x01 /* Need to copy the string. */
#define REXEC_CHECKED 0x02 /* check_substr already checked. */