diff options
author | Karl Williamson <khw@cpan.org> | 2018-10-28 21:24:22 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-11-16 10:06:57 -0700 |
commit | 39cd6e6a879395d79d698d684b3839f9423d1607 (patch) | |
tree | e3c5035bb445896f019655cae5877f04375d051c | |
parent | d0d8d0cb339722488ab4472815d936625b06704b (diff) | |
download | perl-39cd6e6a879395d79d698d684b3839f9423d1607.tar.gz |
regcomp.c: Make sure UTF-8 regex pattern uses /u
When a pattern is in UTF-8, Unicode rules should be selected. This
commit makes sure that this happens and that the displayable form of the
pattern shows /u.
I don't know of any bugs this fixes.
-rw-r--r-- | regcomp.c | 11 |
1 files changed, 9 insertions, 2 deletions
@@ -6967,7 +6967,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx) * properly wrapped with the right modifiers */ bool has_p = ((RExC_rx->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY); - bool has_charset = (get_regex_charset(RExC_rx->extflags) + bool has_charset = RExC_utf8 || (get_regex_charset(RExC_rx->extflags) != REGEX_DEPENDS_CHARSET); /* The caret is output if there are any defaults: if not all the STD @@ -7011,7 +7011,14 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx) } if (has_charset) { STRLEN len; - const char* const name = get_regex_charset_name(RExC_rx->extflags, &len); + const char* name; + + name = get_regex_charset_name(RExC_rx->extflags, &len); + if strEQ(name, DEPENDS_PAT_MODS) { /* /d under UTF-8 => /u */ + assert(RExC_utf8); + name = UNICODE_PAT_MODS; + len = sizeof(UNICODE_PAT_MODS) - 1; + } Copy(name, p, len, char); p += len; } |