summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-02-02 12:20:42 -0700
committerKarl Williamson <public@khwilliamson.com>2014-02-02 12:38:29 -0700
commitbfa0ee78b652802412c3cab86bb873ed67ea6550 (patch)
tree59c400b9bff94ba274e312ef9a0b1b75454afa2e /regcomp.c
parent02257115537194d7a3b36a956d5643069f78c54f (diff)
downloadperl-bfa0ee78b652802412c3cab86bb873ed67ea6550.tar.gz
'use utf8' should imply /u regex matching
This should be true even if the pattern isn't in utf8.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c14
1 files changed, 10 insertions, 4 deletions
diff --git a/regcomp.c b/regcomp.c
index ab5fc0409b..bcd159c572 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6289,7 +6289,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/* ignore the utf8ness if the pattern is 0 length */
RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
- RExC_uni_semantics = 0;
+
+ /* 'use utf8' in the program indicates Unicode rules are wanted */
+ RExC_uni_semantics = (PL_hints & HINT_UTF8);
+
RExC_contains_locale = 0;
RExC_contains_i = 0;
pRExC_state->runtime_code_qr = NULL;
@@ -6340,10 +6343,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
if (initial_charset == REGEX_LOCALE_CHARSET) {
RExC_contains_locale = 1;
}
- else if (RExC_utf8 && initial_charset == REGEX_DEPENDS_CHARSET) {
+ else if ((RExC_utf8 || RExC_uni_semantics)
+ && initial_charset == REGEX_DEPENDS_CHARSET)
+ {
- /* Set to use unicode semantics if the pattern is in utf8 and has the
- * 'depends' charset specified, as it means unicode when utf8 */
+ /* Set to use unicode semantics if has the 'depends' charset specified,
+ * and either the pattern is in utf8 (as it means unicode when utf8),
+ * or we already know we want unicode rules */
set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
}