summaryrefslogtreecommitdiff
path: root/sv.c
diff options
context:
space:
mode:
authorLeon Timmermans <fawaka@gmail.com>2021-04-19 18:49:52 +0200
committerLeon Timmermans <fawaka@gmail.com>2021-05-24 13:56:14 +0200
commite6e9dd290698d47a0db9e1d676d2b82e0bb0a52b (patch)
treee0122058bbf0ffb5df1de8d5af2088d6b98959f6 /sv.c
parent18e5156b1786a26cd70a7f8ff84c10956d93faf3 (diff)
downloadperl-e6e9dd290698d47a0db9e1d676d2b82e0bb0a52b.tar.gz
Do not cache utf8 offsets for non-canonical lengths
In particular, if the length is beyond the end, it should not be stored as the end.
Diffstat (limited to 'sv.c')
-rw-r--r--sv.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/sv.c b/sv.c
index ceef9966da..eec33eb5ca 100644
--- a/sv.c
+++ b/sv.c
@@ -7225,7 +7225,8 @@ Perl_sv_len_utf8_nomg(pTHX_ SV * const sv)
offset. */
static STRLEN
S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send,
- STRLEN *const uoffset_p, bool *const at_end)
+ STRLEN *const uoffset_p, bool *const at_end,
+ bool* canonical_position)
{
const U8 *s = start;
STRLEN uoffset = *uoffset_p;
@@ -7245,6 +7246,9 @@ S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send,
it's actually a bounds error */
s = send;
}
+ /* If the unicode position is beyond the end, we return the end but
+ shouldn't cache that position */
+ *canonical_position = (uoffset == 0);
*uoffset_p -= uoffset;
return s - start;
}
@@ -7298,6 +7302,7 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start
STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy. */
bool found = FALSE;
bool at_end = FALSE;
+ bool canonical_position = FALSE;
PERL_ARGS_ASSERT_SV_POS_U2B_CACHED;
@@ -7338,7 +7343,8 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start
uoffset -= uoffset0;
boffset = boffset0
+ sv_pos_u2b_forwards(start + boffset0,
- send, &uoffset, &at_end);
+ send, &uoffset, &at_end,
+ &canonical_position);
uoffset += uoffset0;
}
}
@@ -7380,7 +7386,8 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start
STRLEN real_boffset;
uoffset -= uoffset0;
real_boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0,
- send, &uoffset, &at_end);
+ send, &uoffset, &at_end,
+ &canonical_position);
uoffset += uoffset0;
if (found && PL_utf8cache < 0)
@@ -7389,7 +7396,7 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start
boffset = real_boffset;
}
- if (PL_utf8cache && !SvGMAGICAL(sv) && SvPOK(sv)) {
+ if (PL_utf8cache && canonical_position && !SvGMAGICAL(sv) && SvPOK(sv)) {
if (at_end)
utf8_mg_len_cache_update(sv, mgp, uoffset);
else