summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-11-17 15:51:19 -0700
committerKarl Williamson <khw@cpan.org>2018-11-27 09:54:46 -0700
commitf6b4b99d2e584fbcd85eeed475eea10b87858e54 (patch)
tree8ecdbacbe8841a096baf9453e3afb486d530ecf1 /regexec.c
parent51fa1a74ca7dc85c6e81e990c50128380cac9da5 (diff)
downloadperl-f6b4b99d2e584fbcd85eeed475eea10b87858e54.tar.gz
Add regnode EXACT_ONLY8
This is a regnode that otherwise would be an EXACT except that it contains a code point that requires UTF-8 to represent. Hence if the target string isn't UTF-8, we know it can't possibly match, without needing to try.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c21
1 files changed, 19 insertions, 2 deletions
diff --git a/regexec.c b/regexec.c
index 1756a95b7f..a69fdea857 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4449,7 +4449,10 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
U8 *pat = (U8*)STRING(text_node);
U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
- if (OP(text_node) == EXACT || OP(text_node) == EXACTL) {
+ if ( OP(text_node) == EXACT
+ || OP(text_node) == EXACT_ONLY8
+ || OP(text_node) == EXACTL)
+ {
/* In an exact node, only one thing can be matched, that first
* character. If both the pat and the target are UTF-8, we can just
@@ -6246,9 +6249,16 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
if (utf8_target && UTF8_IS_ABOVE_LATIN1(*locinput)) {
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
}
+ goto do_exact;
+ case EXACT_ONLY8:
+ if (! utf8_target) {
+ sayNO;
+ }
/* FALLTHROUGH */
case EXACT: { /* /abc/ */
- char *s = STRING(scan);
+ char *s;
+ do_exact:
+ s = STRING(scan);
ln = STR_LEN(scan);
if (utf8_target != is_utf8_pat) {
/* The target and the pattern have differing utf8ness. */
@@ -9184,8 +9194,15 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p,
if (utf8_target && UTF8_IS_ABOVE_LATIN1(*scan)) {
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(scan, loceol);
}
+ goto do_exact;
+
+ case EXACT_ONLY8:
+ if (! utf8_target) {
+ break;
+ }
/* FALLTHROUGH */
case EXACT:
+ do_exact:
assert(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
c = (U8)*STRING(p);