summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-10-28 21:24:22 -0600
committerKarl Williamson <khw@cpan.org>2018-11-16 10:06:57 -0700
commit39cd6e6a879395d79d698d684b3839f9423d1607 (patch)
treee3c5035bb445896f019655cae5877f04375d051c
parentd0d8d0cb339722488ab4472815d936625b06704b (diff)
downloadperl-39cd6e6a879395d79d698d684b3839f9423d1607.tar.gz
regcomp.c: Make sure UTF-8 regex pattern uses /u
When a pattern is in UTF-8, Unicode rules should be selected. This commit makes sure that this happens and that the displayable form of the pattern shows /u. I don't know of any bugs this fixes.
-rw-r--r--regcomp.c11
1 files changed, 9 insertions, 2 deletions
diff --git a/regcomp.c b/regcomp.c
index 0a7940d0ef..3549619aae 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6967,7 +6967,7 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
* properly wrapped with the right modifiers */
bool has_p = ((RExC_rx->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY);
- bool has_charset = (get_regex_charset(RExC_rx->extflags)
+ bool has_charset = RExC_utf8 || (get_regex_charset(RExC_rx->extflags)
!= REGEX_DEPENDS_CHARSET);
/* The caret is output if there are any defaults: if not all the STD
@@ -7011,7 +7011,14 @@ S_set_regex_pv(pTHX_ RExC_state_t *pRExC_state, REGEXP *Rx)
}
if (has_charset) {
STRLEN len;
- const char* const name = get_regex_charset_name(RExC_rx->extflags, &len);
+ const char* name;
+
+ name = get_regex_charset_name(RExC_rx->extflags, &len);
+ if strEQ(name, DEPENDS_PAT_MODS) { /* /d under UTF-8 => /u */
+ assert(RExC_utf8);
+ name = UNICODE_PAT_MODS;
+ len = sizeof(UNICODE_PAT_MODS) - 1;
+ }
Copy(name, p, len, char);
p += len;
}