diff options
author | Karl Williamson <khw@cpan.org> | 2018-11-17 15:51:19 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-11-27 09:54:46 -0700 |
commit | f6b4b99d2e584fbcd85eeed475eea10b87858e54 (patch) | |
tree | 8ecdbacbe8841a096baf9453e3afb486d530ecf1 /regexec.c | |
parent | 51fa1a74ca7dc85c6e81e990c50128380cac9da5 (diff) | |
download | perl-f6b4b99d2e584fbcd85eeed475eea10b87858e54.tar.gz |
Add regnode EXACT_ONLY8
This is a regnode that otherwise would be an EXACT except that it
contains a code point that requires UTF-8 to represent. Hence if the
target string isn't UTF-8, we know it can't possibly match, without
needing to try.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 21 |
1 files changed, 19 insertions, 2 deletions
@@ -4449,7 +4449,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, U8 *pat = (U8*)STRING(text_node); U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' }; - if (OP(text_node) == EXACT || OP(text_node) == EXACTL) { + if ( OP(text_node) == EXACT + || OP(text_node) == EXACT_ONLY8 + || OP(text_node) == EXACTL) + { /* In an exact node, only one thing can be matched, that first * character. If both the pat and the target are UTF-8, we can just @@ -6246,9 +6249,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) if (utf8_target && UTF8_IS_ABOVE_LATIN1(*locinput)) { _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend); } + goto do_exact; + case EXACT_ONLY8: + if (! utf8_target) { + sayNO; + } /* FALLTHROUGH */ case EXACT: { /* /abc/ */ - char *s = STRING(scan); + char *s; + do_exact: + s = STRING(scan); ln = STR_LEN(scan); if (utf8_target != is_utf8_pat) { /* The target and the pattern have differing utf8ness. */ @@ -9184,8 +9194,15 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) { _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(scan, loceol); } + goto do_exact; + + case EXACT_ONLY8: + if (! utf8_target) { + break; + } /* FALLTHROUGH */ case EXACT: + do_exact: assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1); c = (U8)*STRING(p); |