diff options
author | Karl Williamson <khw@cpan.org> | 2018-12-23 13:33:07 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-12-26 12:50:37 -0700 |
commit | 627a7895564679975632d9b637b27e9c09d3d985 (patch) | |
tree | 9327f6dc100bff6d53d4cbac11f182be8a4156f2 /regexec.c | |
parent | aa419ff31a1e359d67cd44223a599ef9f276ca12 (diff) | |
download | perl-627a7895564679975632d9b637b27e9c09d3d985.tar.gz |
Add regnode EXACTFUP, for problematic
If a non-UTF-8 pattern contains a MICRO SIGN, this special node is now
created. This character is the only one not needing UTF-8 to represent,
but its fold does need UTF-8, which causes some issues, so it has to be
specially handled. When matching against a non-UTF-8 target string, the
pattern is effectively folded, but not if the target is UTF-8. By
creating this node, we can remove the special handling required for the
nodes that don't have a MICRO SIGN, in a future commit.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 8 |
1 files changed, 8 insertions, 0 deletions
@@ -2321,6 +2321,9 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, goto do_exactf_non_utf8; case EXACTFU_SS: + case EXACTFUP: /* Problematic even though pattern isn't UTF-8. Use + full functionality normally not done except for + UTF-8 */ assert(! is_utf8_pat); goto do_exactf_utf8; @@ -4661,6 +4664,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, /* FALLTHROUGH */ case EXACTFAA: case EXACTFU_SS: + case EXACTFUP: case EXACTFU: c2 = PL_fold_latin1[c1]; break; @@ -6419,6 +6423,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) goto do_exactf; case EXACTFU_SS: /* /\x{df}/iu */ + case EXACTFUP: /* /foo/iu, and something is problematic in + 'foo' so can't take shortcuts. */ assert(! is_utf8_pat); /* FALLTHROUGH */ case EXACTFU: /* /abc/iu */ @@ -6460,6 +6466,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) if ( utf8_target || is_utf8_pat || state_num == EXACTFU_SS + || state_num == EXACTFUP || (state_num == EXACTFL && IN_UTF8_CTYPE_LOCALE)) { /* Either target or the pattern are utf8, or has the issue where @@ -9361,6 +9368,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, /* FALLTHROUGH */ case EXACTFU_SS: + case EXACTFUP: do_exactf: { int c1, c2; |