diff options
author | Karl Williamson <public@khwilliamson.com> | 2014-02-02 12:20:42 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2014-02-02 12:38:29 -0700 |
commit | bfa0ee78b652802412c3cab86bb873ed67ea6550 (patch) | |
tree | 59c400b9bff94ba274e312ef9a0b1b75454afa2e | |
parent | 02257115537194d7a3b36a956d5643069f78c54f (diff) | |
download | perl-bfa0ee78b652802412c3cab86bb873ed67ea6550.tar.gz |
'use utf8' should imply /u regex matching
This should be true even if the pattern isn't in utf8.
-rw-r--r-- | regcomp.c | 14 | ||||
-rw-r--r-- | t/re/pat.t | 6 |
2 files changed, 15 insertions, 5 deletions
@@ -6289,7 +6289,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* ignore the utf8ness if the pattern is 0 length */ RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat); - RExC_uni_semantics = 0; + + /* 'use utf8' in the program indicates Unicode rules are wanted */ + RExC_uni_semantics = (PL_hints & HINT_UTF8); + RExC_contains_locale = 0; RExC_contains_i = 0; pRExC_state->runtime_code_qr = NULL; @@ -6340,10 +6343,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (initial_charset == REGEX_LOCALE_CHARSET) { RExC_contains_locale = 1; } - else if (RExC_utf8 && initial_charset == REGEX_DEPENDS_CHARSET) { + else if ((RExC_utf8 || RExC_uni_semantics) + && initial_charset == REGEX_DEPENDS_CHARSET) + { - /* Set to use unicode semantics if the pattern is in utf8 and has the - * 'depends' charset specified, as it means unicode when utf8 */ + /* Set to use unicode semantics if has the 'depends' charset specified, + * and either the pattern is in utf8 (as it means unicode when utf8), + * or we already know we want unicode rules */ set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET); } diff --git a/t/re/pat.t b/t/re/pat.t index 91274e60c7..6211065498 100644 --- a/t/re/pat.t +++ b/t/re/pat.t @@ -20,7 +20,7 @@ BEGIN { require './test.pl'; } -plan tests => 712; # Update this when adding/deleting tests. +plan tests => 713; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1538,6 +1538,10 @@ EOP like "\x{AA}", qr/a?[\W_]/d, "\\W with /d synthetic start class works"; } + { + use utf8; + unlike("\xe0", qr/\W/, "'use utf8' implies /u"); + } } # End of sub run_tests |