summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-01-02 15:42:25 -0700
committerKarl Williamson <khw@cpan.org>2021-01-03 05:56:55 -0700
commit46033c991aa390e9da2bdb8c8005b6806d7bffdc (patch)
treef368eb2be584b31ac38beb66e21d3331c0703ff9 /regexec.c
parent12c5822e63454624e46d26e95b1964592be3affe (diff)
downloadperl-46033c991aa390e9da2bdb8c8005b6806d7bffdc.tar.gz
regexec.c: Fix assertion failure GH #18451
This was caused by copying too many characters for the size of the buffer. Only one character is needed.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c39
1 files changed, 26 insertions, 13 deletions
diff --git a/regexec.c b/regexec.c
index 3ca2288361..b46693e5ac 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4694,24 +4694,37 @@ S_setup_EXACTISH_ST(pTHX_ const regnode * const text_node,
*
* Everything generally matches at least itself. But if there is a
* UTF8ness mismatch, we have to convert to that of the target string. */
- if (utf8_pat == utf8_target || UTF8_IS_INVARIANT(*pat)) {
- lengths[0] = MIN(pat_len, C_ARRAY_LENGTH(matches[0]));
- Copy(pat, matches[0], lengths[0], U8);
+ if (UTF8_IS_INVARIANT(*pat)) { /* Immaterial if either is in UTF-8 */
+ matches[0][0] = pat[0];
+ lengths[0] = 1;
m->count++;
}
- else if (utf8_target) { /* target is UTF-8; pattern isn't */
- matches[0][0] = UTF8_EIGHT_BIT_HI(pat[0]);
- matches[0][1] = UTF8_EIGHT_BIT_LO(pat[0]);
- lengths[0] = 2;
- m->count++;
- }
- else { /* pattern is UTF-8, target isn't */
- if (UTF8_IS_DOWNGRADEABLE_START(*pat)) {
- matches[0][0] = EIGHT_BIT_UTF8_TO_NATIVE(pat[0], pat[1]);
- lengths[0] = 1;
+ else if (utf8_target) {
+ if (utf8_pat) {
+ lengths[0] = UTF8SKIP(pat);
+ Copy(pat, matches[0], lengths[0], U8);
+ m->count++;
+ }
+ else { /* target is UTF-8, pattern isn't */
+ matches[0][0] = UTF8_EIGHT_BIT_HI(pat[0]);
+ matches[0][1] = UTF8_EIGHT_BIT_LO(pat[0]);
+ lengths[0] = 2;
m->count++;
}
}
+ else if (! utf8_pat) { /* Neither is UTF-8 */
+ matches[0][0] = pat[0];
+ lengths[0] = 1;
+ m->count++;
+ }
+ else /* target isn't UTF-8; pattern is. No match possible unless the
+ pattern's first character can fit in a byte */
+ if (UTF8_IS_DOWNGRADEABLE_START(*pat))
+ {
+ matches[0][0] = EIGHT_BIT_UTF8_TO_NATIVE(pat[0], pat[1]);
+ lengths[0] = 1;
+ m->count++;
+ }
/* Here we have taken care of any necessary node-type changes */