summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-12-23 13:33:07 -0700
committerKarl Williamson <khw@cpan.org>2018-12-26 12:50:37 -0700
commit627a7895564679975632d9b637b27e9c09d3d985 (patch)
tree9327f6dc100bff6d53d4cbac11f182be8a4156f2 /regexec.c
parentaa419ff31a1e359d67cd44223a599ef9f276ca12 (diff)
downloadperl-627a7895564679975632d9b637b27e9c09d3d985.tar.gz
Add regnode EXACTFUP, for problematic
If a non-UTF-8 pattern contains a MICRO SIGN, this special node is now created. This character is the only one not needing UTF-8 to represent, but its fold does need UTF-8, which causes some issues, so it has to be specially handled. When matching against a non-UTF-8 target string, the pattern is effectively folded, but not if the target is UTF-8. By creating this node, we can remove the special handling required for the nodes that don't have a MICRO SIGN, in a future commit.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c8
1 files changed, 8 insertions, 0 deletions
diff --git a/regexec.c b/regexec.c
index 13db8fd7b5..63d4a94f94 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2321,6 +2321,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
goto do_exactf_non_utf8;
case EXACTFU_SS:
+ case EXACTFUP: /* Problematic even though pattern isn't UTF-8. Use
+ full functionality normally not done except for
+ UTF-8 */
assert(! is_utf8_pat);
goto do_exactf_utf8;
@@ -4661,6 +4664,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
/* FALLTHROUGH */
case EXACTFAA:
case EXACTFU_SS:
+ case EXACTFUP:
case EXACTFU:
c2 = PL_fold_latin1[c1];
break;
@@ -6419,6 +6423,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
goto do_exactf;
case EXACTFU_SS: /* /\x{df}/iu */
+ case EXACTFUP: /* /foo/iu, and something is problematic in
+ 'foo' so can't take shortcuts. */
assert(! is_utf8_pat);
/* FALLTHROUGH */
case EXACTFU: /* /abc/iu */
@@ -6460,6 +6466,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
if ( utf8_target
|| is_utf8_pat
|| state_num == EXACTFU_SS
+ || state_num == EXACTFUP
|| (state_num == EXACTFL && IN_UTF8_CTYPE_LOCALE))
{
/* Either target or the pattern are utf8, or has the issue where
@@ -9361,6 +9368,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
/* FALLTHROUGH */
case EXACTFU_SS:
+ case EXACTFUP:
do_exactf: {
int c1, c2;