summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Zakharevich <ilya@math.berkeley.edu>2002-03-03 21:31:04 -0500
committerAbhijit Menon-Sen <ams@wiw.org>2002-03-04 08:40:46 +0000
commit33abfa168074aa6ecde00f854b79a49b4c7fc776 (patch)
tree22b4bc180b4848f30cac711cedc72237922494c6
parent663de3a7d27b2a0a7665749c377c3843be52f2c3 (diff)
downloadperl-33abfa168074aa6ecde00f854b79a49b4c7fc776.tar.gz
sv_cmp and friends
Message-Id: <20020304023103.A14140@math.ohio-state.edu> p4raw-link: @14577 on //depot/perl: 0ad5258ff3f3328f321188cbb4fcd6a74b365431 p4raw-id: //depot/perl@14985
-rw-r--r--embed.fnc1
-rw-r--r--sv.c81
-rw-r--r--util.c33
3 files changed, 56 insertions, 59 deletions
diff --git a/embed.fnc b/embed.fnc
index a94654f123..2853071e33 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -426,6 +426,7 @@ Ap |void |markstack_grow
#if defined(USE_LOCALE_COLLATE)
p |char* |mem_collxfrm |const char* s|STRLEN len|STRLEN* xlen
#endif
+Apd |int |memcmp_byte_utf8 |char *sbyte|STRLEN lbyte|char *sutf|STRLEN lutf
Afp |SV* |mess |const char* pat|...
Ap |SV* |vmess |const char* pat|va_list* args
p |void |qerror |SV* err
diff --git a/sv.c b/sv.c
index 27150d6706..f893fa6bd8 100644
--- a/sv.c
+++ b/sv.c
@@ -5349,7 +5349,6 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
char *pv2;
STRLEN cur2;
I32 eq = 0;
- char *tpv = Nullch;
if (!sv1) {
pv1 = "";
@@ -5365,35 +5364,13 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2)
else
pv2 = SvPV(sv2, cur2);
- /* do not utf8ize the comparands as a side-effect */
- if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
- bool is_utf8 = TRUE;
- /* UTF-8ness differs */
-
- if (SvUTF8(sv1)) {
- /* sv1 is the UTF-8 one , If is equal it must be downgrade-able */
- char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8);
- if (pv != pv1)
- pv1 = tpv = pv;
- }
- else {
- /* sv2 is the UTF-8 one , If is equal it must be downgrade-able */
- char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8);
- if (pv != pv2)
- pv2 = tpv = pv;
- }
- if (is_utf8) {
- /* Downgrade not possible - cannot be eq */
- return FALSE;
- }
- }
-
- if (cur1 == cur2)
- eq = memEQ(pv1, pv2, cur1);
+ if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES)
+ eq = (cur1 == cur2) && memEQ(pv1, pv2, cur1);
+ else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */
+ eq = !memcmp_byte_utf8(pv2, cur2, pv1, cur1);
+ else
+ eq = !memcmp_byte_utf8(pv1, cur1, pv2, cur2);
- if (tpv != Nullch)
- Safefree(tpv);
-
return eq;
}
@@ -5413,9 +5390,7 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
{
STRLEN cur1, cur2;
char *pv1, *pv2;
- I32 cmp;
- bool pv1tmp = FALSE;
- bool pv2tmp = FALSE;
+ I32 retval;
if (!sv1) {
pv1 = "";
@@ -5431,40 +5406,28 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2)
else
pv2 = SvPV(sv2, cur2);
- /* do not utf8ize the comparands as a side-effect */
- if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) {
- if (SvUTF8(sv1)) {
- pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2);
- pv2tmp = TRUE;
- }
- else {
- pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1);
- pv1tmp = TRUE;
- }
- }
-
if (!cur1) {
- cmp = cur2 ? -1 : 0;
+ return cur2 ? -1 : 0;
} else if (!cur2) {
- cmp = 1;
- } else {
- I32 retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2);
+ return 1;
+ } else if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) {
+ retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2);
if (retval) {
- cmp = retval < 0 ? -1 : 1;
+ return retval < 0 ? -1 : 1;
} else if (cur1 == cur2) {
- cmp = 0;
- } else {
- cmp = cur1 < cur2 ? -1 : 1;
+ return 0;
+ } else {
+ return cur1 < cur2 ? -1 : 1;
}
- }
-
- if (pv1tmp)
- Safefree(pv1);
- if (pv2tmp)
- Safefree(pv2);
+ } else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */
+ retval = -memcmp_byte_utf8(pv2, cur2, pv1, cur1);
+ else
+ retval = memcmp_byte_utf8(pv1, cur1, pv2, cur2);
- return cmp;
+ if (retval) /* CURs taken into account already */
+ return retval < 0 ? -1 : 1;
+ return 0;
}
/*
diff --git a/util.c b/util.c
index a9f9ade8f6..4dc86764a7 100644
--- a/util.c
+++ b/util.c
@@ -4346,5 +4346,38 @@ Perl_sv_nounlocking(pTHX_ SV *sv)
{
}
+/*
+=for apidoc memcmp_byte_utf8
+
+Similar to memcmp(), but the first string is with bytes, the second
+with utf8. Takes into account that the lengths may be different.
+=cut
+*/
+int
+Perl_memcmp_byte_utf8(pTHX_ char *sb, STRLEN lbyte, char *su, STRLEN lutf)
+{
+ U8 *sbyte = (U8*)sb;
+ U8 *sutf = (U8*)su;
+ U8 *ebyte = sbyte + lbyte;
+ U8 *eutf = sutf + lutf;
+
+ while (sbyte < ebyte) {
+ if (sutf >= eutf)
+ return 1; /* utf one shorter */
+ if (*sbyte < 128) {
+ if (*sbyte != *sutf)
+ return *sbyte - *sutf;
+ sbyte++; sutf++; /* CONTINUE */
+ } else if ((*sutf & 0x3F) == (*sbyte >> 6)) { /* byte 0xFF: 0xC3 BF */
+ if ((sutf[1] & 0x3F) != (*sbyte & 0x3F))
+ return (*sbyte & 0x3F) - (*sutf & 0x3F);
+ sbyte++, sutf += 2; /* CONTINUE */
+ } else
+ return (*sbyte >> 6) - (*sutf & 0x3F);
+ }
+ if (sutf >= eutf)
+ return 0;
+ return -1; /* byte one shorter */
+}