summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2005-01-11 19:10:20 +0000
committerNicholas Clark <nick@ccl4.org>2005-01-11 19:10:20 +0000
commite609e5866582a76dcfe889f9b46e4909b2f0b543 (patch)
tree1598ae77df9042967af7d50d26903e5ae6e5212a /pp.c
parent57f5baf2d376469520fedfc328fdf51d005eafe3 (diff)
downloadperl-e609e5866582a76dcfe889f9b46e4909b2f0b543.tar.gz
Fix bug 32294 - index()/rindex() ignore UTF8 flag
(for cases of mixed UTF8/bytes) Test code based on bug report by John Gardiner Myers p4raw-id: //depot/perl@23782
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c75
1 files changed, 70 insertions, 5 deletions
diff --git a/pp.c b/pp.c
index 69d8e18b49..f960c374f2 100644
--- a/pp.c
+++ b/pp.c
@@ -3190,12 +3190,15 @@ PP(pp_index)
dSP; dTARGET;
SV *big;
SV *little;
+ SV *temp = Nullsv;
I32 offset;
I32 retval;
char *tmps;
char *tmps2;
STRLEN biglen;
I32 arybase = PL_curcop->cop_arybase;
+ int big_utf8;
+ int little_utf8;
if (MAXARG < 3)
offset = 0;
@@ -3203,9 +3206,31 @@ PP(pp_index)
offset = POPi - arybase;
little = POPs;
big = POPs;
- tmps = SvPV(big, biglen);
- if (offset > 0 && DO_UTF8(big))
+ big_utf8 = DO_UTF8(big);
+ little_utf8 = DO_UTF8(little);
+ if (big_utf8 ^ little_utf8) {
+ /* One needs to be upgraded. */
+ SV *bytes = little_utf8 ? big : little;
+ STRLEN len;
+ char *p = SvPV(bytes, len);
+
+ temp = newSVpvn(p, len);
+
+ if (PL_encoding) {
+ sv_recode_to_utf8(temp, PL_encoding);
+ } else {
+ sv_utf8_upgrade(temp);
+ }
+ if (little_utf8) {
+ big = temp;
+ big_utf8 = TRUE;
+ } else {
+ little = temp;
+ }
+ }
+ if (big_utf8 && offset > 0)
sv_pos_u2b(big, &offset, 0);
+ tmps = SvPV(big, biglen);
if (offset < 0)
offset = 0;
else if (offset > (I32)biglen)
@@ -3215,8 +3240,10 @@ PP(pp_index)
retval = -1;
else
retval = tmps2 - tmps;
- if (retval > 0 && DO_UTF8(big))
+ if (retval > 0 && big_utf8)
sv_pos_b2u(big, &retval);
+ if (temp)
+ SvREFCNT_dec(temp);
PUSHi(retval + arybase);
RETURN;
}
@@ -3226,6 +3253,7 @@ PP(pp_rindex)
dSP; dTARGET;
SV *big;
SV *little;
+ SV *temp = Nullsv;
STRLEN blen;
STRLEN llen;
I32 offset;
@@ -3233,17 +3261,42 @@ PP(pp_rindex)
char *tmps;
char *tmps2;
I32 arybase = PL_curcop->cop_arybase;
+ int big_utf8;
+ int little_utf8;
if (MAXARG >= 3)
offset = POPi;
little = POPs;
big = POPs;
+ big_utf8 = DO_UTF8(big);
+ little_utf8 = DO_UTF8(little);
+ if (big_utf8 ^ little_utf8) {
+ /* One needs to be upgraded. */
+ SV *bytes = little_utf8 ? big : little;
+ STRLEN len;
+ char *p = SvPV(bytes, len);
+
+ temp = newSVpvn(p, len);
+
+ if (PL_encoding) {
+ sv_recode_to_utf8(temp, PL_encoding);
+ } else {
+ sv_utf8_upgrade(temp);
+ }
+ if (little_utf8) {
+ big = temp;
+ big_utf8 = TRUE;
+ } else {
+ little = temp;
+ }
+ }
tmps2 = SvPV(little, llen);
tmps = SvPV(big, blen);
+
if (MAXARG < 3)
offset = blen;
else {
- if (offset > 0 && DO_UTF8(big))
+ if (offset > 0 && big_utf8)
sv_pos_u2b(big, &offset, 0);
offset = offset - arybase + llen;
}
@@ -3256,8 +3309,10 @@ PP(pp_rindex)
retval = -1;
else
retval = tmps2 - tmps;
- if (retval > 0 && DO_UTF8(big))
+ if (retval > 0 && big_utf8)
sv_pos_b2u(big, &retval);
+ if (temp)
+ SvREFCNT_dec(temp);
PUSHi(retval + arybase);
RETURN;
}
@@ -4749,3 +4804,13 @@ PP(pp_threadsv)
{
DIE(aTHX_ "tried to access per-thread data in non-threaded perl");
}
+
+/*
+ * Local variables:
+ * c-indentation-style: bsd
+ * c-basic-offset: 4
+ * indent-tabs-mode: t
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+*/