diff options
author | Karl Williamson <khw@cpan.org> | 2021-01-02 15:42:25 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2021-01-03 05:56:55 -0700 |
commit | 46033c991aa390e9da2bdb8c8005b6806d7bffdc (patch) | |
tree | f368eb2be584b31ac38beb66e21d3331c0703ff9 /regexec.c | |
parent | 12c5822e63454624e46d26e95b1964592be3affe (diff) | |
download | perl-46033c991aa390e9da2bdb8c8005b6806d7bffdc.tar.gz |
regexec.c: Fix assertion failure GH #18451
This was caused by copying too many characters for the size of the
buffer. Only one character is needed.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 39 |
1 files changed, 26 insertions, 13 deletions
@@ -4694,24 +4694,37 @@ S_setup_EXACTISH_ST(pTHX_ const regnode * const text_node, * * Everything generally matches at least itself. But if there is a * UTF8ness mismatch, we have to convert to that of the target string. */ - if (utf8_pat == utf8_target || UTF8_IS_INVARIANT(*pat)) { - lengths[0] = MIN(pat_len, C_ARRAY_LENGTH(matches[0])); - Copy(pat, matches[0], lengths[0], U8); + if (UTF8_IS_INVARIANT(*pat)) { /* Immaterial if either is in UTF-8 */ + matches[0][0] = pat[0]; + lengths[0] = 1; m->count++; } - else if (utf8_target) { /* target is UTF-8; pattern isn't */ - matches[0][0] = UTF8_EIGHT_BIT_HI(pat[0]); - matches[0][1] = UTF8_EIGHT_BIT_LO(pat[0]); - lengths[0] = 2; - m->count++; - } - else { /* pattern is UTF-8, target isn't */ - if (UTF8_IS_DOWNGRADEABLE_START(*pat)) { - matches[0][0] = EIGHT_BIT_UTF8_TO_NATIVE(pat[0], pat[1]); - lengths[0] = 1; + else if (utf8_target) { + if (utf8_pat) { + lengths[0] = UTF8SKIP(pat); + Copy(pat, matches[0], lengths[0], U8); + m->count++; + } + else { /* target is UTF-8, pattern isn't */ + matches[0][0] = UTF8_EIGHT_BIT_HI(pat[0]); + matches[0][1] = UTF8_EIGHT_BIT_LO(pat[0]); + lengths[0] = 2; m->count++; } } + else if (! utf8_pat) { /* Neither is UTF-8 */ + matches[0][0] = pat[0]; + lengths[0] = 1; + m->count++; + } + else /* target isn't UTF-8; pattern is. No match possible unless the + pattern's first character can fit in a byte */ + if (UTF8_IS_DOWNGRADEABLE_START(*pat)) + { + matches[0][0] = EIGHT_BIT_UTF8_TO_NATIVE(pat[0], pat[1]); + lengths[0] = 1; + m->count++; + } /* Here we have taken care of any necessary node-type changes */ |