eliminate RF_utf8 flag from PL_reg_flags

This global flag indicates whether the currently executing regex is utf8. Replace it with a boolean var local to to the matching function, and pass it around via function args, or as a member of the regmatch_info struct. This is a first step to eliminating PL_reg_flags.
author: David Mitchell <davem@iabyn.com> 2012-12-25 18:09:32 +0000
committer: David Mitchell <davem@iabyn.com> 2012-12-25 18:09:32 +0000
commit: 984e6dd18b2c93d4a6ae228bb9d2d98aadf7ca6e (patch)
tree: 869b7de1f2b2401a7eda4134742ca35370778d07
parent: 4fab19ce1519686c7468c2e8b79b34d7d9b7f5bc (diff)
download: perl-984e6dd18b2c93d4a6ae228bb9d2d98aadf7ca6e.tar.gz
5 files changed, 68 insertions, 66 deletions
diff --git a/embed.fnc b/embed.fnc
index 75688fb7c2..97e16bada6 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2032,7 +2032,9 @@ Es	|U8	|regtail_study	|NN struct RExC_state_t *pRExC_state \
 ERs	|bool	|isFOO_lc	|const U8 classnum|const U8 character
 ERs	|bool	|isFOO_utf8_lc	|const U8 classnum|NN const U8* character
 ERs	|I32	|regmatch	|NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
-ERs	|I32	|regrepeat	|NN const regexp *prog|NN char **startposp|NN const regnode *p|I32 max|int depth
+ERs	|I32	|regrepeat	|NN const regexp *prog|NN char **startposp \
+				|NN const regnode *p|I32 max|int depth \
+				|bool is_utf8_pat
 ERs	|I32	|regtry		|NN regmatch_info *reginfo|NN char **startposp
 ERs	|bool	|reginclass	|NULLOK const regexp * const prog|NN const regnode * const n|NN const U8 * const p\
 				|bool const utf8_target
@@ -2049,7 +2051,10 @@ ERsn	|U8*	|reghop4	|NN U8 *s|I32 off|NN const U8 *llim \
 				|NN const U8 *rlim
 #endif
 ERsn	|U8*	|reghopmaybe3	|NN U8 *s|I32 off|NN const U8 *lim
-ERs	|char*	|find_byclass	|NN regexp * prog|NN const regnode *c|NN char *s|NN const char *strend|NULLOK regmatch_info *reginfo
+ERs	|char*	|find_byclass	|NN regexp * prog|NN const regnode *c \
+				|NN char *s|NN const char *strend \
+				|NULLOK regmatch_info *reginfo \
+				|bool is_utf_pat
 Es	|void	|to_utf8_substr	|NN regexp * prog
 Es	|bool	|to_byte_substr	|NN regexp * prog
 ERs	|I32	|reg_check_named_buff_matched	|NN const regexp *rex \
diff --git a/embed.h b/embed.h
index 1136f60466..9afd44278f 100644
--- a/embed.h
+++ b/embed.h
@@ -973,7 +973,7 @@
 #  endif
 #  if defined(PERL_IN_REGEXEC_C)
 #define core_regclass_swash(a,b,c,d)	S_core_regclass_swash(aTHX_ a,b,c,d)
-#define find_byclass(a,b,c,d,e)	S_find_byclass(aTHX_ a,b,c,d,e)
+#define find_byclass(a,b,c,d,e,f)	S_find_byclass(aTHX_ a,b,c,d,e,f)
 #define isFOO_lc(a,b)		S_isFOO_lc(aTHX_ a,b)
 #define isFOO_utf8_lc(a,b)	S_isFOO_utf8_lc(aTHX_ a,b)
 #define reg_check_named_buff_matched(a,b)	S_reg_check_named_buff_matched(aTHX_ a,b)
@@ -983,7 +983,7 @@
 #define reghopmaybe3		S_reghopmaybe3
 #define reginclass(a,b,c,d)	S_reginclass(aTHX_ a,b,c,d)
 #define regmatch(a,b,c)		S_regmatch(aTHX_ a,b,c)
-#define regrepeat(a,b,c,d,e)	S_regrepeat(aTHX_ a,b,c,d,e)
+#define regrepeat(a,b,c,d,e,f)	S_regrepeat(aTHX_ a,b,c,d,e,f)
 #define regtry(a,b)		S_regtry(aTHX_ a,b)
 #define to_byte_substr(a)	S_to_byte_substr(aTHX_ a)
 #define to_utf8_substr(a)	S_to_utf8_substr(aTHX_ a)
diff --git a/proto.h b/proto.h
index ee2b14ef7e..a962d32a72 100644
--- a/proto.h
+++ b/proto.h
@@ -6834,7 +6834,7 @@ STATIC SV*	S_core_regclass_swash(pTHX_ const regexp *prog, const struct regnode
 #define PERL_ARGS_ASSERT_CORE_REGCLASS_SWASH	\
 	assert(node)
 
-STATIC char*	S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo)
+STATIC char*	S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo, bool is_utf_pat)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1)
 			__attribute__nonnull__(pTHX_2)
@@ -6899,7 +6899,7 @@ STATIC I32	S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *pro
 #define PERL_ARGS_ASSERT_REGMATCH	\
 	assert(reginfo); assert(startpos); assert(prog)
 
-STATIC I32	S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth)
+STATIC I32	S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth, bool is_utf8_pat)
 			__attribute__warn_unused_result__
 			__attribute__nonnull__(pTHX_1)
 			__attribute__nonnull__(pTHX_2)
diff --git a/regexec.c b/regexec.c
index 03204cdc6d..4ca48210e9 100644
--- a/regexec.c
+++ b/regexec.c
@@ -96,10 +96,6 @@ static const char* const non_utf8_target_but_utf8_required
 #define RF_tainted	1	/* tainted information used? e.g. locale */
 #define RF_warned	2		/* warned about big count? */
 
-#define RF_utf8		8		/* Pattern contains multibyte chars? */
-
-#define UTF_PATTERN ((PL_reg_flags & RF_utf8) != 0)
-
 #define HAS_NONLATIN1_FOLD_CLOSURE(i) _HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(i)
 
 #ifndef STATIC
@@ -611,6 +607,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
     char *checked_upto = NULL;          /* how far into the string we have already checked using find_byclass*/
     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
     RXi_GET_DECL(prog,progi);
+    bool is_utf8_pat;
 #ifdef DEBUGGING
     const char * const i_strpos = strpos;
 #endif
@@ -622,10 +619,7 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 
     RX_MATCH_UTF8_set(rx,utf8_target);
 
-    if (RX_UTF8(rx))
-	PL_reg_flags |= RF_utf8;
-    else
-	PL_reg_flags &= ~RF_utf8;
+    is_utf8_pat = cBOOL(RX_UTF8(rx));
 
     DEBUG_EXECUTE_r( 
         debug_start_match(rx, utf8_target, strpos, strend,
@@ -1129,7 +1123,8 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
 
 	t = s;
-        s = find_byclass(prog, progi->regstclass, checked_upto, endpos, NULL);
+        s = find_byclass(prog, progi->regstclass, checked_upto, endpos,
+                            NULL, is_utf8_pat);
 	if (s) {
 	    checked_upto = s;
 	} else {
@@ -1437,7 +1432,7 @@ if ((!reginfo || regtry(reginfo, &s))) \
 
 STATIC char *
 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
-    const char *strend, regmatch_info *reginfo)
+    const char *strend, regmatch_info *reginfo, bool is_utf8_pat)
 {
     dVAR;
     const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
@@ -1483,7 +1478,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         break;
 
     case EXACTFA:
-        if (UTF_PATTERN || utf8_target) {
+        if (is_utf8_pat || utf8_target) {
             utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
             goto do_exactf_utf8;
         }
@@ -1503,7 +1498,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         goto do_exactf_non_utf8;
 
     case EXACTFL:
-        if (UTF_PATTERN || utf8_target) {
+        if (is_utf8_pat || utf8_target) {
             utf8_fold_flags = FOLDEQ_UTF8_LOCALE;
             goto do_exactf_utf8;
         }
@@ -1512,15 +1507,15 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         goto do_exactf_non_utf8;
 
     case EXACTFU_SS:
-        if (UTF_PATTERN) {
+        if (is_utf8_pat) {
             utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
         }
         goto do_exactf_utf8;
 
     case EXACTFU_TRICKYFOLD:
     case EXACTFU:
-        if (UTF_PATTERN || utf8_target) {
-            utf8_fold_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+        if (is_utf8_pat || utf8_target) {
+            utf8_fold_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
             goto do_exactf_utf8;
         }
 
@@ -1576,7 +1571,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         pat_string = STRING(c);
         ln  = STR_LEN(c);	/* length to match in octets/bytes */
         pat_end = pat_string + ln;
-        lnc = (UTF_PATTERN)     /* length to match in characters */
+        lnc = is_utf8_pat       /* length to match in characters */
                 ? utf8_length((U8 *) pat_string, (U8 *) pat_end)
                 : ln;
 
@@ -1612,7 +1607,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
         while (s <= e) {
             char *my_strend= (char *)strend;
             if (foldEQ_utf8_flags(s, &my_strend, 0,  utf8_target,
-                  pat_string, NULL, ln, cBOOL(UTF_PATTERN), utf8_fold_flags)
+                  pat_string, NULL, ln, is_utf8_pat, utf8_fold_flags)
                 && (!reginfo || regtry(reginfo, &s)) )
             {
                 goto got_it;
@@ -2114,9 +2109,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
     PL_reg_state.re_state_eval_setup_done = FALSE;
     PL_reg_maxiter = 0;
 
-    if (RX_UTF8(rx))
-	PL_reg_flags |= RF_utf8;
-
+    reginfo.is_utf8_pat = cBOOL(RX_UTF8(rx));
     /* Mark beginning of line for ^ and lookbehind. */
     reginfo.bol = startpos; /* XXX not used ??? */
     PL_bostr  = strbeg;
@@ -2291,7 +2284,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
     /* Messy cases:  unanchored match. */
     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
 	/* we have /x+whatever/ */
-	/* it must be a one character string (XXXX Except UTF_PATTERN?) */
+	/* it must be a one character string (XXXX Except is_utf8_pat?) */
 	char ch;
 #ifdef DEBUGGING
 	int did_match = 0;
@@ -2461,7 +2454,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
 		     quoted, (int)(strend - s));
 	    }
 	});
-        if (find_byclass(prog, c, s, strend, &reginfo))
+        if (find_byclass(prog, c, s, strend, &reginfo, reginfo.is_utf8_pat))
 	    goto got_it;
 	DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
     }
@@ -3238,7 +3231,8 @@ S_clear_backtrack_stack(pTHX_ void *p)
     }
 }
 static bool
-S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c1_utf8, int *c2p, U8* c2_utf8)
+S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
+        U8* c1_utf8, int *c2p, U8* c2_utf8, bool is_utf8_pat)
 {
     /* This function determines if there are one or two characters that match
      * the first character of the passed-in EXACTish node <text_node>, and if
@@ -3310,7 +3304,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c
          * character.  If both the pat and the target are UTF-8, we can just
          * copy the input to the output, avoiding finding the code point of
          * that character */
-        if (! UTF_PATTERN) {
+        if (!is_utf8_pat) {
             c2 = c1 = *pat;
         }
         else if (utf8_target) {
@@ -3323,10 +3317,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c
         }
     }
     else /* an EXACTFish node */
-         if ((UTF_PATTERN
+         if ((is_utf8_pat
                     && is_MULTI_CHAR_FOLD_utf8_safe(pat,
                                                     pat + STR_LEN(text_node)))
-             || (! UTF_PATTERN
+             || (!is_utf8_pat
                     && is_MULTI_CHAR_FOLD_latin1_safe(pat,
                                                     pat + STR_LEN(text_node))))
     {
@@ -3336,7 +3330,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8* c
         use_chrtest_void = TRUE;
     }
     else { /* an EXACTFish node which doesn't begin with a multi-char fold */
-        c1 = (UTF_PATTERN) ? valid_utf8_to_uvchr(pat, NULL) : *pat;
+        c1 = is_utf8_pat ? valid_utf8_to_uvchr(pat, NULL) : *pat;
         if (c1 > 256) {
             /* Load the folds hash, if not already done */
             SV** listp;
@@ -3564,6 +3558,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
     U32 maxopenparen = 0;       /* max '(' index seen so far */
     int to_complement;  /* Invert the result? */
     _char_class_number classnum;
+    bool is_utf8_pat = reginfo->is_utf8_pat;
 
 #ifdef DEBUGGING
     GET_RE_DEBUG_FLAGS_DECL;
@@ -4052,7 +4047,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	case EXACT: {            /*  /abc/        */
 	    char *s = STRING(scan);
 	    ln = STR_LEN(scan);
-	    if (utf8_target != UTF_PATTERN) {
+	    if (utf8_target != is_utf8_pat) {
 		/* The target and the pattern have differing utf8ness. */
 		char *l = locinput;
 		const char * const e = s + ln;
@@ -4141,7 +4136,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	case EXACTFU:            /*  /abc/iu      */
 	    folder = foldEQ_latin1;
 	    fold_array = PL_fold_latin1;
-	    fold_utf8_flags = (UTF_PATTERN) ? FOLDEQ_S1_ALREADY_FOLDED : 0;
+	    fold_utf8_flags = is_utf8_pat ? FOLDEQ_S1_ALREADY_FOLDED : 0;
 	    goto do_exactf;
 
 	case EXACTFA:            /*  /abc/iaa     */
@@ -4159,13 +4154,13 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 	    s = STRING(scan);
 	    ln = STR_LEN(scan);
 
-	    if (utf8_target || UTF_PATTERN || state_num == EXACTFU_SS) {
+	    if (utf8_target || is_utf8_pat || state_num == EXACTFU_SS) {
 	      /* Either target or the pattern are utf8, or has the issue where
 	       * the fold lengths may differ. */
 		const char * const l = locinput;
 		char *e = PL_regeol;
 
-		if (! foldEQ_utf8_flags(s, 0,  ln, cBOOL(UTF_PATTERN),
+		if (! foldEQ_utf8_flags(s, 0,  ln, is_utf8_pat,
 			                l, &e, 0,  utf8_target, fold_utf8_flags))
 		{
 		    sayNO;
@@ -5117,12 +5112,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 		/* XXXX This is too dramatic a measure... */
 		PL_reg_maxiter = 0;
 
-		ST.toggle_reg_flags = PL_reg_flags;
-		if (RX_UTF8(re_sv))
-		    PL_reg_flags |= RF_utf8;
-		else
-		    PL_reg_flags &= ~RF_utf8;
-		ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
+		ST.saved_utf8_pat = is_utf8_pat;
+		is_utf8_pat = cBOOL(RX_UTF8(re_sv));
 
 		ST.prev_rex = rex_sv;
 		ST.prev_curlyx = cur_curlyx;
@@ -5141,7 +5132,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 
 	case EVAL_AB: /* cleanup after a successful (??{A})B */
 	    /* note: this is called twice; first after popping B, then A */
-	    PL_reg_flags ^= ST.toggle_reg_flags; 
+            is_utf8_pat = ST.saved_utf8_pat;
 	    rex_sv = ST.prev_rex;
 	    SET_reg_curpm(rex_sv);
 	    rex = ReANY(rex_sv);
@@ -5159,7 +5150,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
 
 	case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
 	    /* note: this is called twice; first after popping B, then A */
-	    PL_reg_flags ^= ST.toggle_reg_flags; 
+            is_utf8_pat = ST.saved_utf8_pat;
 	    rex_sv = ST.prev_rex;
 	    SET_reg_curpm(rex_sv);
 	    rex = ReANY(rex_sv);
@@ -5783,7 +5774,8 @@ NULL
 	             */
 		    if (PL_regkind[OP(text_node)] == EXACT) {
                         if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
-                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
+                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
+                           is_utf8_pat))
                         {
                             sayNO;
                         }
@@ -5959,7 +5951,8 @@ NULL
                         if this changes back then the macro for IS_TEXT and 
                         friends need to change. */
                         if (! S_setup_EXACTISH_ST_c1_c2(aTHX_
-                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8))
+                           text_node, &ST.c1, ST.c1_utf8, &ST.c2, ST.c2_utf8,
+                           is_utf8_pat))
                         {
                             sayNO;
                         }
@@ -5972,7 +5965,9 @@ NULL
 	    if (minmod) {
                 char *li = locinput;
 		minmod = 0;
-		if (ST.min && regrepeat(rex, &li, ST.A, ST.min, depth) < ST.min)
+		if (ST.min &&
+                        regrepeat(rex, &li, ST.A, ST.min, depth, is_utf8_pat)
+                            < ST.min)
 		    sayNO;
                 SET_locinput(li);
 		ST.count = ST.min;
@@ -6008,7 +6003,8 @@ NULL
                 /* avoid taking address of locinput, so it can remain
                  * a register var */
                 char *li = locinput;
-		ST.count = regrepeat(rex, &li, ST.A, ST.max, depth);
+		ST.count = regrepeat(rex, &li, ST.A, ST.max, depth,
+                                        is_utf8_pat);
 		if (ST.count < ST.min)
 		    sayNO;
                 SET_locinput(li);
@@ -6092,7 +6088,7 @@ NULL
                      * locinput matches */
                     char *li = ST.oldloc;
 		    ST.count += n;
-		    if (regrepeat(rex, &li, ST.A, n, depth) < n)
+		    if (regrepeat(rex, &li, ST.A, n, depth, is_utf8_pat) < n)
 			sayNO;
                     assert(n == REG_INFTY || locinput == li);
 		}
@@ -6116,7 +6112,7 @@ NULL
 	    /* failed -- move forward one */
             {
                 char *li = locinput;
-                if (!regrepeat(rex, &li, ST.A, 1, depth)) {
+                if (!regrepeat(rex, &li, ST.A, 1, depth, is_utf8_pat)) {
                     sayNO;
                 }
                 locinput = li;
@@ -6191,9 +6187,8 @@ NULL
 	    fake_end:
 	    if (cur_eval) {
 		/* we've just finished A in /(??{A})B/; now continue with B */
-		st->u.eval.toggle_reg_flags
-			    = cur_eval->u.eval.toggle_reg_flags;
-		PL_reg_flags ^= st->u.eval.toggle_reg_flags; 
+                st->u.eval.saved_utf8_pat = is_utf8_pat;
+		is_utf8_pat = cur_eval->u.eval.saved_utf8_pat;
 
 		st->u.eval.prev_rex = rex_sv;		/* inner */
 
@@ -6644,7 +6639,8 @@ no_silent:
  * depth     - (for debugging) backtracking depth.
  */
 STATIC I32
-S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth)
+S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p,
+                I32 max, int depth, bool is_utf8_pat)
 {
     dVAR;
     char *scan;     /* Pointer to current position in target string */
@@ -6723,7 +6719,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
         }
 	break;
     case EXACT:
-        assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
 
 	c = (U8)*STRING(p);
 
@@ -6731,7 +6727,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
          * under UTF-8, or both target and pattern aren't UTF-8.  Note that we
          * can use UTF8_IS_INVARIANT() even if the pattern isn't UTF-8, as it's
          * true iff it doesn't matter if the argument is in UTF-8 or not */
-        if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! UTF_PATTERN)) {
+        if (UTF8_IS_INVARIANT(c) || (! utf8_target && ! is_utf8_pat)) {
             if (utf8_target && scan + max < loceol) {
                 /* We didn't adjust <loceol> because is UTF-8, but ok to do so,
                  * since here, to match at all, 1 char == 1 byte */
@@ -6741,7 +6737,7 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
 		scan++;
 	    }
 	}
-	else if (UTF_PATTERN) {
+	else if (is_utf8_pat) {
             if (utf8_target) {
                 STRLEN scan_char_len;
 
@@ -6803,23 +6799,25 @@ S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 ma
     case EXACTFU_SS:
     case EXACTFU_TRICKYFOLD:
     case EXACTFU:
-	utf8_flags = (UTF_PATTERN) ? FOLDEQ_S2_ALREADY_FOLDED : 0;
+	utf8_flags = is_utf8_pat ? FOLDEQ_S2_ALREADY_FOLDED : 0;
 
     do_exactf: {
         int c1, c2;
         U8 c1_utf8[UTF8_MAXBYTES+1], c2_utf8[UTF8_MAXBYTES+1];
 
-        assert(STR_LEN(p) == (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1);
+        assert(STR_LEN(p) == is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
 
-        if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8)) {
+        if (S_setup_EXACTISH_ST_c1_c2(aTHX_ p, &c1, c1_utf8, &c2, c2_utf8,
+                                        is_utf8_pat))
+        {
             if (c1 == CHRTEST_VOID) {
                 /* Use full Unicode fold matching */
                 char *tmpeol = PL_regeol;
-                STRLEN pat_len = (UTF_PATTERN) ? UTF8SKIP(STRING(p)) : 1;
+                STRLEN pat_len = is_utf8_pat ? UTF8SKIP(STRING(p)) : 1;
                 while (hardcount < max
                         && foldEQ_utf8_flags(scan, &tmpeol, 0, utf8_target,
                                              STRING(p), NULL, pat_len,
-                                             cBOOL(UTF_PATTERN), utf8_flags))
+                                             is_utf8_pat, utf8_flags))
                 {
                     scan = tmpeol;
                     tmpeol = PL_regeol;
diff --git a/regexp.h b/regexp.h
index 664915b0f6..035e2bab3b 100644
--- a/regexp.h
+++ b/regexp.h
@@ -575,6 +575,7 @@ typedef struct {
     SV *sv;
     char *ganch;
     char *cutpoint;
+    bool is_utf8_pat;
 } regmatch_info;
  
 
@@ -646,9 +647,7 @@ typedef struct regmatch_state {
 	    struct regmatch_state *prev_eval;
 	    struct regmatch_state *prev_curlyx;
 	    REGEXP	*prev_rex;
-	    U32		toggle_reg_flags; /* what bits in PL_reg_flags to
-					    flip when transitioning between
-					    inner and outer rexen */
+	    bool	saved_utf8_pat; /* saved copy of is_utf8_pat */
 	    CHECKPOINT	cp;	/* remember current savestack indexes */
 	    CHECKPOINT	lastcp;
 	    U32        close_paren; /* which close bracket is our end */
author	David Mitchell <davem@iabyn.com>	2012-12-25 18:09:32 +0000
committer	David Mitchell <davem@iabyn.com>	2012-12-25 18:09:32 +0000
commit	984e6dd18b2c93d4a6ae228bb9d2d98aadf7ca6e (patch)
tree	869b7de1f2b2401a7eda4134742ca35370778d07
parent	4fab19ce1519686c7468c2e8b79b34d7d9b7f5bc (diff)
download	perl-984e6dd18b2c93d4a6ae228bb9d2d98aadf7ca6e.tar.gz