summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2014-02-02 12:20:42 -0700
committerKarl Williamson <public@khwilliamson.com>2014-02-02 12:38:29 -0700
commitbfa0ee78b652802412c3cab86bb873ed67ea6550 (patch)
tree59c400b9bff94ba274e312ef9a0b1b75454afa2e
parent02257115537194d7a3b36a956d5643069f78c54f (diff)
downloadperl-bfa0ee78b652802412c3cab86bb873ed67ea6550.tar.gz
'use utf8' should imply /u regex matching
This should be true even if the pattern isn't in utf8.
-rw-r--r--regcomp.c14
-rw-r--r--t/re/pat.t6
2 files changed, 15 insertions, 5 deletions
diff --git a/regcomp.c b/regcomp.c
index ab5fc0409b..bcd159c572 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6289,7 +6289,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
/* ignore the utf8ness if the pattern is 0 length */
RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat);
- RExC_uni_semantics = 0;
+
+ /* 'use utf8' in the program indicates Unicode rules are wanted */
+ RExC_uni_semantics = (PL_hints & HINT_UTF8);
+
RExC_contains_locale = 0;
RExC_contains_i = 0;
pRExC_state->runtime_code_qr = NULL;
@@ -6340,10 +6343,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
if (initial_charset == REGEX_LOCALE_CHARSET) {
RExC_contains_locale = 1;
}
- else if (RExC_utf8 && initial_charset == REGEX_DEPENDS_CHARSET) {
+ else if ((RExC_utf8 || RExC_uni_semantics)
+ && initial_charset == REGEX_DEPENDS_CHARSET)
+ {
- /* Set to use unicode semantics if the pattern is in utf8 and has the
- * 'depends' charset specified, as it means unicode when utf8 */
+ /* Set to use unicode semantics if has the 'depends' charset specified,
+ * and either the pattern is in utf8 (as it means unicode when utf8),
+ * or we already know we want unicode rules */
set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET);
}
diff --git a/t/re/pat.t b/t/re/pat.t
index 91274e60c7..6211065498 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -20,7 +20,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 712; # Update this when adding/deleting tests.
+plan tests => 713; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -1538,6 +1538,10 @@ EOP
like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works";
}
+ {
+ use utf8;
+ unlike("\xe0", qr/\W/, "'use utf8' implies /u");
+ }
} # End of sub run_tests