summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Cozens <simon@netthink.co.uk>2000-07-21 04:37:29 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-07-29 22:39:56 +0000
commite01b9e885c888f1f2271506404f791df89bcc483 (patch)
tree706ec9010b7d941037c628a2af91e085dc295b56
parent37931a30a953ca3b2c9a3a768ac2f3df83e5aea6 (diff)
downloadperl-e01b9e885c888f1f2271506404f791df89bcc483.tar.gz
Do not upgrade SVs into utf8 just because they participate
in eq or cmp. Reported and fix suggested in Subject: [ID 20000720.009] sv_eq UTF8 bug Message-Id: <20000721043729.30081.qmail@othersideofthe.earth.li> Exercise for the kind reader: should we or should we not cache the utf8 conversion alongside the SV? (as magic, as the strxfrm()ed version is cached under use locale) Argue both for and against. p4raw-id: //depot/perl@6465
-rw-r--r--sv.c128
1 files changed, 72 insertions, 56 deletions
diff --git a/sv.c b/sv.c
index e248d144b0..20b387c96a 100644
--- a/sv.c
+++ b/sv.c
@@ -4104,38 +4104,51 @@ identical.
*/
I32
-Perl_sv_eq(pTHX_ register SV *str1, register SV *str2)
+Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
{
char *pv1;
STRLEN cur1;
char *pv2;
STRLEN cur2;
+ I32 eq = 0;
+ bool pv1tmp = FALSE;
+ bool pv2tmp = FALSE;
- if (!str1) {
+ if (!sv1) {
pv1 = "";
cur1 = 0;
}
else
- pv1 = SvPV(str1, cur1);
+ pv1 = SvPV(sv1, cur1);
- if (cur1) {
- if (!str2)
- return 0;
- if (SvUTF8(str1) != SvUTF8(str2) && !IN_BYTE) {
- if (SvUTF8(str1)) {
- sv_utf8_upgrade(str2);
- }
- else {
- sv_utf8_upgrade(str1);
- }
+ if (!sv2){
+ pv2 = "";
+ cur2 = 0;
+ }
+ else
+ pv2 = SvPV(sv2, cur2);
+
+ /* do not utf8ize the comparands as a side-effect */
+ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE && 0) {
+ if (SvUTF8(sv1)) {
+ pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
+ pv2tmp = TRUE;
+ }
+ else {
+ pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
+ pv1tmp = TRUE;
}
}
- pv2 = SvPV(str2, cur2);
- if (cur1 != cur2)
- return 0;
+ if (cur1 == cur2)
+ eq = memEQ(pv1, pv2, cur1);
+
+ if (pv1tmp)
+ Safefree(pv1);
+ if (pv2tmp)
+ Safefree(pv2);
- return memEQ(pv1, pv2, cur1);
+ return eq;
}
/*
@@ -4149,59 +4162,62 @@ C<sv2>.
*/
I32
-Perl_sv_cmp(pTHX_ register SV *str1, register SV *str2)
+Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
{
STRLEN cur1, cur2;
char *pv1, *pv2;
- I32 retval;
+ I32 cmp;
+ bool pv1tmp = FALSE;
+ bool pv2tmp = FALSE;
- if (str1) {
- pv1 = SvPV(str1, cur1);
- }
- else {
+ if (!sv1) {
+ pv1 = "";
cur1 = 0;
}
+ else
+ pv1 = SvPV(sv1, cur1);
- if (str2) {
- if (SvPOK(str2)) {
- if (SvPOK(str1) && SvUTF8(str1) != SvUTF8(str2) && !IN_BYTE) {
- /* must upgrade other to UTF8 first */
- if (SvUTF8(str1)) {
- sv_utf8_upgrade(str2);
- }
- else {
- sv_utf8_upgrade(str1);
- /* refresh pointer and length */
- pv1 = SvPVX(str1);
- cur1 = SvCUR(str1);
- }
- }
- pv2 = SvPVX(str2);
- cur2 = SvCUR(str2);
- }
- else {
- pv2 = sv_2pv(str2, &cur2);
- }
- }
- else {
+ if (!sv2){
+ pv2 = "";
cur2 = 0;
}
+ else
+ pv2 = SvPV(sv2, cur2);
- if (!cur1)
- return cur2 ? -1 : 0;
+ /* do not utf8ize the comparands as a side-effect */
+ if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTE) {
+ if (SvUTF8(sv1)) {
+ pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
+ pv2tmp = TRUE;
+ }
+ else {
+ pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
+ pv1tmp = TRUE;
+ }
+ }
- if (!cur2)
- return 1;
+ if (!cur1) {
+ cmp = cur2 ? -1 : 0;
+ } else if (!cur2) {
+ cmp = 1;
+ } else {
+ I32 retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2);
- retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2);
+ if (retval) {
+ cmp = retval < 0 ? -1 : 1;
+ } else if (cur1 == cur2) {
+ cmp = 0;
+ } else {
+ cmp = cur1 < cur2 ? -1 : 1;
+ }
+ }
- if (retval)
- return retval < 0 ? -1 : 1;
+ if (pv1tmp)
+ Safefree(pv1);
+ if (pv2tmp)
+ Safefree(pv2);
- if (cur1 == cur2)
- return 0;
- else
- return cur1 < cur2 ? -1 : 1;
+ return cmp;
}
/*