diff options
author | Nicholas Clark <nick@ccl4.org> | 2008-01-05 13:54:38 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2008-01-05 13:54:38 +0000 |
commit | efd26800e76b6f876fd6abe3a3f7e3e4128150a9 (patch) | |
tree | 3cee09b657aa4fbb21fef6d06429fb5c40ada62c | |
parent | 5a51db0500cb91f11a554807ca60350bc43f0e5b (diff) | |
download | perl-efd26800e76b6f876fd6abe3a3f7e3e4128150a9.tar.gz |
Add RX_UTF8(), which is effectively SvUTF8() but for regexps.
Remove RXp_PRECOMP() and RXp_WRAPPED().
Change the parameter of S_debug_start_match() from regexp to REGEXP.
Change its callers [the only part wrong for 5.10.x]
p4raw-id: //depot/perl@32840
-rw-r--r-- | embed.fnc | 4 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | regcomp.c | 16 | ||||
-rw-r--r-- | regexec.c | 12 | ||||
-rw-r--r-- | regexp.h | 9 |
5 files changed, 23 insertions, 20 deletions
@@ -1454,7 +1454,9 @@ ERs |I32 |reg_check_named_buff_matched |NN const regexp *rex|NN const regnode *p # ifdef DEBUGGING Es |void |dump_exec_pos |NN const char *locinput|NN const regnode *scan|NN const char *loc_regeol\ |NN const char *loc_bostr|NN const char *loc_reg_starttry|const bool do_utf8 -Es |void |debug_start_match|NN const regexp *prog|const bool do_utf8|NN const char *start|NN const char *end|NN const char *blurb +Es |void |debug_start_match|NN const REGEXP *prog|const bool do_utf8\ + |NN const char *start|NN const char *end\ + |NN const char *blurb # endif #endif @@ -3886,7 +3886,7 @@ STATIC void S_dump_exec_pos(pTHX_ const char *locinput, const regnode *scan, con __attribute__nonnull__(pTHX_4) __attribute__nonnull__(pTHX_5); -STATIC void S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8, const char *start, const char *end, const char *blurb) +STATIC void S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8, const char *start, const char *end, const char *blurb) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_3) __attribute__nonnull__(pTHX_4) @@ -4294,8 +4294,8 @@ redo_first_pass: + (sizeof(STD_PAT_MODS) - 1) + (sizeof("(?:)") - 1); - Newx(RXp_WRAPPED(r), RXp_WRAPLEN(r) + 1, char ); - p = RXp_WRAPPED(r); + Newx(RX_WRAPPED(rx), RXp_WRAPLEN(r) + 1, char ); + p = RX_WRAPPED(rx); *p++='('; *p++='?'; if (has_p) *p++ = KEEPCOPY_PAT_MOD; /*'p'*/ @@ -4319,8 +4319,8 @@ redo_first_pass: *p++ = ':'; Copy(RExC_precomp, p, plen, char); - assert ((RXp_WRAPPED(r) - p) < 16); - r->pre_prefix = p - RXp_WRAPPED(r); + assert ((RX_WRAPPED(rx) - p) < 16); + r->pre_prefix = p - RX_WRAPPED(rx); p += plen; if (has_runon) *p++ = '\n'; @@ -4798,7 +4798,7 @@ reStudy: #ifdef STUPID_PATTERN_CHECKS if (RX_PRELEN(r) == 0) r->extflags |= RXf_NULL; - if (r->extflags & RXf_SPLIT && RX_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == ' ') + if (r->extflags & RXf_SPLIT && RX_PRELEN(r) == 1 && RX_PRECOMP(rx)[0] == ' ') /* XXX: this should happen BEFORE we compile */ r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); else if (RX_PRELEN(r) == 3 && memEQ("\\s+", RXp_PRECOMP(r), 3)) @@ -4806,7 +4806,7 @@ reStudy: else if (RX_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == '^') r->extflags |= RXf_START_ONLY; #else - if (r->extflags & RXf_SPLIT && RXp_PRELEN(r) == 1 && RXp_PRECOMP(r)[0] == ' ') + if (r->extflags & RXf_SPLIT && RXp_PRELEN(r) == 1 && RX_PRECOMP(rx)[0] == ' ') /* XXX: this should happen BEFORE we compile */ r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); else { @@ -9159,7 +9159,7 @@ Perl_pregfree2(pTHX_ REGEXP *rx) CALLREGFREE_PVT(rx); /* free the private data */ if (r->paren_names) SvREFCNT_dec(r->paren_names); - Safefree(RXp_WRAPPED(r)); + Safefree(RX_WRAPPED(rx)); } if (r->substrs) { if (r->anchored_substr) @@ -9258,7 +9258,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) { SV *dsv= sv_newmortal(); RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8), - dsv, RXp_PRECOMP(r), RXp_PRELEN(r), 60); + dsv, RX_PRECOMP(rx), RXp_PRELEN(r), 60); PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n", PL_colors[4],PL_colors[5],s); } @@ -401,7 +401,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos, PL_reg_flags |= RF_utf8; } DEBUG_EXECUTE_r( - debug_start_match(prog, do_utf8, strpos, strend, + debug_start_match(rx, do_utf8, strpos, strend, sv ? "Guessing start of match in sv for" : "Guessing start of match in string for"); ); @@ -1784,7 +1784,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *stre RX_MATCH_UTF8_set(rx, do_utf8); DEBUG_EXECUTE_r( - debug_start_match(prog, do_utf8, startpos, strend, + debug_start_match(rx, do_utf8, startpos, strend, "Matching"); ); @@ -2553,15 +2553,15 @@ regmatch(), slabs allocated since entry are freed. #ifdef DEBUGGING STATIC void -S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8, +S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8, const char *start, const char *end, const char *blurb) { - const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0; + const bool utf8_pat = RX_UTF8(prog) ? 1 : 0; if (!PL_colorset) reginitcolors(); { RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0), - RXp_PRECOMP(prog), RXp_PRELEN(prog), 60); + RX_PRECOMP(prog), RX_PRELEN(prog), 60); RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1), start, end - start, 60); @@ -3775,7 +3775,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) re->sublen = rex->sublen; rei = RXi_GET(re); DEBUG_EXECUTE_r( - debug_start_match(re, do_utf8, locinput, PL_regeol, + debug_start_match(re_sv, do_utf8, locinput, PL_regeol, "Matching embedded"); ); startpoint = rei->program + 1; @@ -358,19 +358,17 @@ and check for NULL. ? RX_MATCH_COPIED_on(prog) \ : RX_MATCH_COPIED_off(prog)) -#define RXp_PRECOMP(rx) ((rx)->wrapped + (rx)->pre_prefix) /* FIXME? Are we hardcoding too much here and constraining plugin extension writers? Specifically, the value 1 assumes that the wrapped version always has exactly one character at the end, a ')'. Will that always be true? */ #define RXp_PRELEN(rx) ((rx)->wraplen - (rx)->pre_prefix - 1) -#define RXp_WRAPPED(rx) ((rx)->wrapped) #define RXp_WRAPLEN(rx) ((rx)->wraplen) #define RXp_EXTFLAGS(rx) ((rx)->extflags) /* For source compatibility. We used to store these explicitly. */ -#define RX_PRECOMP(prog) RXp_PRECOMP((struct regexp *)SvANY(prog)) +#define RX_PRECOMP(prog) (((struct regexp *)SvANY(prog))->wrapped + ((struct regexp *)SvANY(prog))->pre_prefix) #define RX_PRELEN(prog) RXp_PRELEN((struct regexp *)SvANY(prog)) -#define RX_WRAPPED(prog) RXp_WRAPPED((struct regexp *)SvANY(prog)) +#define RX_WRAPPED(prog) (((struct regexp *)SvANY(prog))->wrapped) #define RX_WRAPLEN(prog) RXp_WRAPLEN((struct regexp *)SvANY(prog)) #define RX_CHECK_SUBSTR(prog) (((struct regexp *)SvANY(prog))->check_substr) #define RX_EXTFLAGS(prog) RXp_EXTFLAGS((struct regexp *)SvANY(prog)) @@ -417,6 +415,9 @@ and check for NULL. #define RX_MATCH_UTF8_set(prog, t) ((t) \ ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \ : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0))) + +/* Whether the pattern stored at RX_WRAPPED is in UTF-8 */ +#define RX_UTF8(prog) (RX_EXTFLAGS(prog) & RXf_UTF8) #define REXEC_COPY_STR 0x01 /* Need to copy the string. */ #define REXEC_CHECKED 0x02 /* check_substr already checked. */ |