diff options
author | Nicholas Clark <nick@ccl4.org> | 2010-07-11 20:11:10 +0100 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2011-09-02 08:18:56 -0700 |
commit | 3808f14a4da5927a92ec30527c260c01bad3f340 (patch) | |
tree | 640106947120036dc8124a1a5da525e958eaae96 | |
parent | ae38025c1617ab0606fba64e716a4820603b5f04 (diff) | |
download | perl-3808f14a4da5927a92ec30527c260c01bad3f340.tar.gz |
Avoid UTF-8 cache panics with offsets beyond the string. Fixes RT #75898.
Change S_sv_pos_u2b_forwards() to take a point to the (requested) UTF-8 offset,
and return the actual UTF-8 offset for the byte position returned. This ensures
that the cache is consistent with reality.
(cherry picked from commit 3e2d3818e517e0037c1ab6a482f31d50271f9e27)
-rw-r--r-- | embed.fnc | 4 | ||||
-rw-r--r-- | proto.h | 9 | ||||
-rw-r--r-- | sv.c | 22 | ||||
-rw-r--r-- | t/op/index.t | 11 |
4 files changed, 32 insertions, 14 deletions
@@ -1856,12 +1856,12 @@ s |int |sv_2iuv_non_preserve |NN SV *const sv sR |I32 |expect_number |NN char **const pattern # sn |STRLEN |sv_pos_u2b_forwards|NN const U8 *const start \ - |NN const U8 *const send|STRLEN uoffset + |NN const U8 *const send|NN STRLEN *const uoffset sn |STRLEN |sv_pos_u2b_midway|NN const U8 *const start \ |NN const U8 *send|STRLEN uoffset|const STRLEN uend s |STRLEN |sv_pos_u2b_cached|NN SV *const sv|NN MAGIC **const mgp \ |NN const U8 *const start|NN const U8 *const send \ - |const STRLEN uoffset|STRLEN uoffset0|STRLEN boffset0 + |STRLEN uoffset|STRLEN uoffset0|STRLEN boffset0 s |void |utf8_mg_pos_cache_update|NN SV *const sv|NN MAGIC **const mgp \ |const STRLEN byte|const STRLEN utf8|const STRLEN blen s |STRLEN |sv_pos_b2u_midway|NN const U8 *const s|NN const U8 *const target \ @@ -5731,11 +5731,12 @@ STATIC I32 S_expect_number(pTHX_ char **const pattern) assert(pattern) # -STATIC STRLEN S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, STRLEN uoffset) +STATIC STRLEN S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, STRLEN *const uoffset) __attribute__nonnull__(1) - __attribute__nonnull__(2); + __attribute__nonnull__(2) + __attribute__nonnull__(3); #define PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS \ - assert(start); assert(send) + assert(start); assert(send); assert(uoffset) STATIC STRLEN S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, STRLEN uoffset, const STRLEN uend) __attribute__nonnull__(1) @@ -5743,7 +5744,7 @@ STATIC STRLEN S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, STRLEN #define PERL_ARGS_ASSERT_SV_POS_U2B_MIDWAY \ assert(start); assert(send) -STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start, const U8 *const send, const STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0) +STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start, const U8 *const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0) __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3) @@ -6088,19 +6088,23 @@ Perl_sv_len_utf8(pTHX_ register SV *const sv) offset. */ static STRLEN S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, - STRLEN uoffset) + STRLEN *const uoffset_p) { const U8 *s = start; + STRLEN uoffset = *uoffset_p; PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS; - while (s < send && uoffset--) + while (s < send && uoffset) { + --uoffset; s += UTF8SKIP(s); + } if (s > send) { /* This is the existing behaviour. Possibly it should be a croak, as it's actually a bounds error */ s = send; } + *uoffset_p -= uoffset; return s - start; } @@ -6147,7 +6151,7 @@ S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, created if necessary, and the found value offered to it for update. */ static STRLEN S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start, - const U8 *const send, const STRLEN uoffset, + const U8 *const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0) { STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy. */ @@ -6189,9 +6193,11 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start uoffset - uoffset0, (*mgp)->mg_len - uoffset0); } else { + uoffset -= uoffset0; boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0, - send, uoffset - uoffset0); + send, &uoffset); + uoffset += uoffset0; } } else if (cache[2] < uoffset) { @@ -6229,9 +6235,11 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start } if (!found || PL_utf8cache < 0) { - const STRLEN real_boffset - = boffset0 + sv_pos_u2b_forwards(start + boffset0, - send, uoffset - uoffset0); + STRLEN real_boffset; + uoffset -= uoffset0; + real_boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0, + send, &uoffset); + uoffset += uoffset0; if (found && PL_utf8cache < 0) { if (real_boffset != boffset) { diff --git a/t/op/index.t b/t/op/index.t index 6cc3f428ea..aaf611b65b 100644 --- a/t/op/index.t +++ b/t/op/index.t @@ -7,7 +7,7 @@ BEGIN { } use strict; -plan( tests => 111 ); +plan( tests => 113 ); run_tests() unless caller; @@ -199,4 +199,13 @@ SKIP: { } } +{ + # RT#75898 + is(eval { utf8::upgrade($_ = " "); index $_, " ", 72 }, -1, + 'UTF-8 cache handles offset beyond the end of the string'); + $_ = "\x{100}BC"; + is(index($_, "C", 4), -1, + 'UTF-8 cache handles offset beyond the end of the string'); +} + } |