summaryrefslogtreecommitdiff
path: root/regexp.h
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2013-05-20 14:17:33 +0100
committerDavid Mitchell <davem@iabyn.com>2013-06-02 22:28:51 +0100
commit0603fe5cded5ad964b7ff06f91a5c2c244f93337 (patch)
treeb6616deee0a3f0010670987085ff949e5a08c581 /regexp.h
parent8adc0f72b0398cece49d44d4acc0962d03543ea9 (diff)
downloadperl-0603fe5cded5ad964b7ff06f91a5c2c244f93337.tar.gz
stop callers of rex engine using RX_MATCH_UTF8_set
The way that the regex engine knows that the match string is utf8 is currently a complete mess. It's partially signalled by the utf8 flag of the passed SV, but also by the RXf_MATCH_UTF8 flag in the regex itself, and the value of PL_reg_match_utf8. Currently all the callers of the engine (such as pp_match, pp_split etc) initially use RX_MATCH_UTF8_set() before calling the engine. This sets both the RXf_MATCH_UTF8 flag on the regex, and PL_reg_match_utf8. Then the two entry points to the engine (regexec_flags() and re_intuit_start()) initially repeat the RX_MATCH_UTF8_set() themselves. Remove the usage of RX_MATCH_UTF8_set() by the callers of the engine, and instead just rely on the engine to do it. Also, remove the "secret" setting of PL_reg_match_utf8 by RX_MATCH_UTF8_set(), and do it explicitly. This is a prelude to eliminating PL_reg_match_utf8.
Diffstat (limited to 'regexp.h')
-rw-r--r--regexp.h6
1 files changed, 3 insertions, 3 deletions
diff --git a/regexp.h b/regexp.h
index 6ec6214d61..cd2bad8769 100644
--- a/regexp.h
+++ b/regexp.h
@@ -406,7 +406,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RXf_CHECK_ALL (1<<(RXf_BASE_SHIFT+10))
/* UTF8 related */
-#define RXf_MATCH_UTF8 (1<<(RXf_BASE_SHIFT+11))
+#define RXf_MATCH_UTF8 (1<<(RXf_BASE_SHIFT+11)) /* $1 etc are utf8 */
/* Intuit related */
#define RXf_USE_INTUIT_NOML (1<<(RXf_BASE_SHIFT+12))
@@ -528,8 +528,8 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RX_MATCH_UTF8_on(prog) (RX_EXTFLAGS(prog) |= RXf_MATCH_UTF8)
#define RX_MATCH_UTF8_off(prog) (RX_EXTFLAGS(prog) &= ~RXf_MATCH_UTF8)
#define RX_MATCH_UTF8_set(prog, t) ((t) \
- ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \
- : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0)))
+ ? RX_MATCH_UTF8_on(prog) \
+ : RX_MATCH_UTF8_off(prog))
/* Whether the pattern stored at RX_WRAPPED is in UTF-8 */
#define RX_UTF8(prog) SvUTF8(prog)