From a62b1201c068dc7b099bcb7182e188c4d2fbf34c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 26 Dec 2010 10:31:16 -0700 Subject: Use multi-bit field for regex character set The /d, /l, and /u regex modifiers are mutually exclusive. This patch changes the field that stores the character set to use more than one bit with an enum determining which one. This data structure more closely follows the semantics of their being mutually exclusive, and conserves bits as well, and is better expandable. A small API is added to set and query the bit field. This patch is not .xs source backwards compatible. A handful of cpan programs are affected. --- pp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'pp.c') diff --git a/pp.c b/pp.c index df28740929..026eea11f0 100644 --- a/pp.c +++ b/pp.c @@ -5870,7 +5870,7 @@ PP(pp_split) DIE(aTHX_ "panic: pp_split"); rx = PM_GETRE(pm); - TAINT_IF((RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) && + TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); RX_MATCH_UTF8_set(rx, do_utf8); @@ -5916,7 +5916,7 @@ PP(pp_split) while (*s == ' ' || is_utf8_space((U8*)s)) s += UTF8SKIP(s); } - else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (isSPACE_LC(*s)) s++; } @@ -5946,7 +5946,8 @@ PP(pp_split) else m += t; } - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (m < strend && !isSPACE_LC(*m)) ++m; } else { @@ -5978,7 +5979,8 @@ PP(pp_split) if (do_utf8) { while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) )) s += UTF8SKIP(s); - } else if (RX_EXTFLAGS(rx) & RXf_PMf_LOCALE) { + } + else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) { while (s < strend && isSPACE_LC(*s)) ++s; } else { -- cgit v1.2.1