summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2010-07-11 20:11:10 +0100
committerFather Chrysostomos <sprout@cpan.org>2011-09-02 08:18:56 -0700
commit3808f14a4da5927a92ec30527c260c01bad3f340 (patch)
tree640106947120036dc8124a1a5da525e958eaae96
parentae38025c1617ab0606fba64e716a4820603b5f04 (diff)
downloadperl-3808f14a4da5927a92ec30527c260c01bad3f340.tar.gz
Avoid UTF-8 cache panics with offsets beyond the string. Fixes RT #75898.
Change S_sv_pos_u2b_forwards() to take a point to the (requested) UTF-8 offset, and return the actual UTF-8 offset for the byte position returned. This ensures that the cache is consistent with reality. (cherry picked from commit 3e2d3818e517e0037c1ab6a482f31d50271f9e27)
-rw-r--r--embed.fnc4
-rw-r--r--proto.h9
-rw-r--r--sv.c22
-rw-r--r--t/op/index.t11
4 files changed, 32 insertions, 14 deletions
diff --git a/embed.fnc b/embed.fnc
index e6a2fb71a7..4736d8dcbe 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1856,12 +1856,12 @@ s |int |sv_2iuv_non_preserve |NN SV *const sv
sR |I32 |expect_number |NN char **const pattern
#
sn |STRLEN |sv_pos_u2b_forwards|NN const U8 *const start \
- |NN const U8 *const send|STRLEN uoffset
+ |NN const U8 *const send|NN STRLEN *const uoffset
sn |STRLEN |sv_pos_u2b_midway|NN const U8 *const start \
|NN const U8 *send|STRLEN uoffset|const STRLEN uend
s |STRLEN |sv_pos_u2b_cached|NN SV *const sv|NN MAGIC **const mgp \
|NN const U8 *const start|NN const U8 *const send \
- |const STRLEN uoffset|STRLEN uoffset0|STRLEN boffset0
+ |STRLEN uoffset|STRLEN uoffset0|STRLEN boffset0
s |void |utf8_mg_pos_cache_update|NN SV *const sv|NN MAGIC **const mgp \
|const STRLEN byte|const STRLEN utf8|const STRLEN blen
s |STRLEN |sv_pos_b2u_midway|NN const U8 *const s|NN const U8 *const target \
diff --git a/proto.h b/proto.h
index d4f3cd27ba..62bdf6e2b7 100644
--- a/proto.h
+++ b/proto.h
@@ -5731,11 +5731,12 @@ STATIC I32 S_expect_number(pTHX_ char **const pattern)
assert(pattern)
#
-STATIC STRLEN S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, STRLEN uoffset)
+STATIC STRLEN S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send, STRLEN *const uoffset)
__attribute__nonnull__(1)
- __attribute__nonnull__(2);
+ __attribute__nonnull__(2)
+ __attribute__nonnull__(3);
#define PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS \
- assert(start); assert(send)
+ assert(start); assert(send); assert(uoffset)
STATIC STRLEN S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, STRLEN uoffset, const STRLEN uend)
__attribute__nonnull__(1)
@@ -5743,7 +5744,7 @@ STATIC STRLEN S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, STRLEN
#define PERL_ARGS_ASSERT_SV_POS_U2B_MIDWAY \
assert(start); assert(send)
-STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start, const U8 *const send, const STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0)
+STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start, const U8 *const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0)
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
__attribute__nonnull__(pTHX_3)
diff --git a/sv.c b/sv.c
index 8a4e964554..3551bcc6fc 100644
--- a/sv.c
+++ b/sv.c
@@ -6088,19 +6088,23 @@ Perl_sv_len_utf8(pTHX_ register SV *const sv)
offset. */
static STRLEN
S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send,
- STRLEN uoffset)
+ STRLEN *const uoffset_p)
{
const U8 *s = start;
+ STRLEN uoffset = *uoffset_p;
PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS;
- while (s < send && uoffset--)
+ while (s < send && uoffset) {
+ --uoffset;
s += UTF8SKIP(s);
+ }
if (s > send) {
/* This is the existing behaviour. Possibly it should be a croak, as
it's actually a bounds error */
s = send;
}
+ *uoffset_p -= uoffset;
return s - start;
}
@@ -6147,7 +6151,7 @@ S_sv_pos_u2b_midway(const U8 *const start, const U8 *send,
created if necessary, and the found value offered to it for update. */
static STRLEN
S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start,
- const U8 *const send, const STRLEN uoffset,
+ const U8 *const send, STRLEN uoffset,
STRLEN uoffset0, STRLEN boffset0)
{
STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy. */
@@ -6189,9 +6193,11 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start
uoffset - uoffset0,
(*mgp)->mg_len - uoffset0);
} else {
+ uoffset -= uoffset0;
boffset = boffset0
+ sv_pos_u2b_forwards(start + boffset0,
- send, uoffset - uoffset0);
+ send, &uoffset);
+ uoffset += uoffset0;
}
}
else if (cache[2] < uoffset) {
@@ -6229,9 +6235,11 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const start
}
if (!found || PL_utf8cache < 0) {
- const STRLEN real_boffset
- = boffset0 + sv_pos_u2b_forwards(start + boffset0,
- send, uoffset - uoffset0);
+ STRLEN real_boffset;
+ uoffset -= uoffset0;
+ real_boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0,
+ send, &uoffset);
+ uoffset += uoffset0;
if (found && PL_utf8cache < 0) {
if (real_boffset != boffset) {
diff --git a/t/op/index.t b/t/op/index.t
index 6cc3f428ea..aaf611b65b 100644
--- a/t/op/index.t
+++ b/t/op/index.t
@@ -7,7 +7,7 @@ BEGIN {
}
use strict;
-plan( tests => 111 );
+plan( tests => 113 );
run_tests() unless caller;
@@ -199,4 +199,13 @@ SKIP: {
}
}
+{
+ # RT#75898
+ is(eval { utf8::upgrade($_ = " "); index $_, " ", 72 }, -1,
+ 'UTF-8 cache handles offset beyond the end of the string');
+ $_ = "\x{100}BC";
+ is(index($_, "C", 4), -1,
+ 'UTF-8 cache handles offset beyond the end of the string');
+}
+
}