diff options
author | Karl Williamson <public@khwilliamson.com> | 2014-02-02 12:20:42 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2014-02-02 12:38:29 -0700 |
commit | bfa0ee78b652802412c3cab86bb873ed67ea6550 (patch) | |
tree | 59c400b9bff94ba274e312ef9a0b1b75454afa2e /regcomp.c | |
parent | 02257115537194d7a3b36a956d5643069f78c54f (diff) | |
download | perl-bfa0ee78b652802412c3cab86bb873ed67ea6550.tar.gz |
'use utf8' should imply /u regex matching
This should be true even if the pattern isn't in utf8.
Diffstat (limited to 'regcomp.c')
-rw-r--r-- | regcomp.c | 14 |
1 files changed, 10 insertions, 4 deletions
@@ -6289,7 +6289,10 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, /* ignore the utf8ness if the pattern is 0 length */ RExC_utf8 = RExC_orig_utf8 = (plen == 0 || IN_BYTES) ? 0 : SvUTF8(pat); - RExC_uni_semantics = 0; + + /* 'use utf8' in the program indicates Unicode rules are wanted */ + RExC_uni_semantics = (PL_hints & HINT_UTF8); + RExC_contains_locale = 0; RExC_contains_i = 0; pRExC_state->runtime_code_qr = NULL; @@ -6340,10 +6343,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (initial_charset == REGEX_LOCALE_CHARSET) { RExC_contains_locale = 1; } - else if (RExC_utf8 && initial_charset == REGEX_DEPENDS_CHARSET) { + else if ((RExC_utf8 || RExC_uni_semantics) + && initial_charset == REGEX_DEPENDS_CHARSET) + { - /* Set to use unicode semantics if the pattern is in utf8 and has the - * 'depends' charset specified, as it means unicode when utf8 */ + /* Set to use unicode semantics if has the 'depends' charset specified, + * and either the pattern is in utf8 (as it means unicode when utf8), + * or we already know we want unicode rules */ set_regex_charset(&rx_flags, REGEX_UNICODE_CHARSET); } |