diff options
author | Ilya Zakharevich <ilya@math.berkeley.edu> | 2002-03-03 21:31:04 -0500 |
---|---|---|
committer | Abhijit Menon-Sen <ams@wiw.org> | 2002-03-04 08:40:46 +0000 |
commit | 33abfa168074aa6ecde00f854b79a49b4c7fc776 (patch) | |
tree | 22b4bc180b4848f30cac711cedc72237922494c6 | |
parent | 663de3a7d27b2a0a7665749c377c3843be52f2c3 (diff) | |
download | perl-33abfa168074aa6ecde00f854b79a49b4c7fc776.tar.gz |
sv_cmp and friends
Message-Id: <20020304023103.A14140@math.ohio-state.edu>
p4raw-link: @14577 on //depot/perl: 0ad5258ff3f3328f321188cbb4fcd6a74b365431
p4raw-id: //depot/perl@14985
-rw-r--r-- | embed.fnc | 1 | ||||
-rw-r--r-- | sv.c | 81 | ||||
-rw-r--r-- | util.c | 33 |
3 files changed, 56 insertions, 59 deletions
@@ -426,6 +426,7 @@ Ap |void |markstack_grow #if defined(USE_LOCALE_COLLATE) p |char* |mem_collxfrm |const char* s|STRLEN len|STRLEN* xlen #endif +Apd |int |memcmp_byte_utf8 |char *sbyte|STRLEN lbyte|char *sutf|STRLEN lutf Afp |SV* |mess |const char* pat|... Ap |SV* |vmess |const char* pat|va_list* args p |void |qerror |SV* err @@ -5349,7 +5349,6 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) char *pv2; STRLEN cur2; I32 eq = 0; - char *tpv = Nullch; if (!sv1) { pv1 = ""; @@ -5365,35 +5364,13 @@ Perl_sv_eq(pTHX_ register SV *sv1, register SV *sv2) else pv2 = SvPV(sv2, cur2); - /* do not utf8ize the comparands as a side-effect */ - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { - bool is_utf8 = TRUE; - /* UTF-8ness differs */ - - if (SvUTF8(sv1)) { - /* sv1 is the UTF-8 one , If is equal it must be downgrade-able */ - char *pv = (char*)bytes_from_utf8((U8*)pv1, &cur1, &is_utf8); - if (pv != pv1) - pv1 = tpv = pv; - } - else { - /* sv2 is the UTF-8 one , If is equal it must be downgrade-able */ - char *pv = (char *)bytes_from_utf8((U8*)pv2, &cur2, &is_utf8); - if (pv != pv2) - pv2 = tpv = pv; - } - if (is_utf8) { - /* Downgrade not possible - cannot be eq */ - return FALSE; - } - } - - if (cur1 == cur2) - eq = memEQ(pv1, pv2, cur1); + if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) + eq = (cur1 == cur2) && memEQ(pv1, pv2, cur1); + else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */ + eq = !memcmp_byte_utf8(pv2, cur2, pv1, cur1); + else + eq = !memcmp_byte_utf8(pv1, cur1, pv2, cur2); - if (tpv != Nullch) - Safefree(tpv); - return eq; } @@ -5413,9 +5390,7 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) { STRLEN cur1, cur2; char *pv1, *pv2; - I32 cmp; - bool pv1tmp = FALSE; - bool pv2tmp = FALSE; + I32 retval; if (!sv1) { pv1 = ""; @@ -5431,40 +5406,28 @@ Perl_sv_cmp(pTHX_ register SV *sv1, register SV *sv2) else pv2 = SvPV(sv2, cur2); - /* do not utf8ize the comparands as a side-effect */ - if (cur1 && cur2 && SvUTF8(sv1) != SvUTF8(sv2) && !IN_BYTES) { - if (SvUTF8(sv1)) { - pv2 = (char*)bytes_to_utf8((U8*)pv2, &cur2); - pv2tmp = TRUE; - } - else { - pv1 = (char*)bytes_to_utf8((U8*)pv1, &cur1); - pv1tmp = TRUE; - } - } - if (!cur1) { - cmp = cur2 ? -1 : 0; + return cur2 ? -1 : 0; } else if (!cur2) { - cmp = 1; - } else { - I32 retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); + return 1; + } else if (SvUTF8(sv1) == SvUTF8(sv2) || IN_BYTES) { + retval = memcmp((void*)pv1, (void*)pv2, cur1 < cur2 ? cur1 : cur2); if (retval) { - cmp = retval < 0 ? -1 : 1; + return retval < 0 ? -1 : 1; } else if (cur1 == cur2) { - cmp = 0; - } else { - cmp = cur1 < cur2 ? -1 : 1; + return 0; + } else { + return cur1 < cur2 ? -1 : 1; } - } - - if (pv1tmp) - Safefree(pv1); - if (pv2tmp) - Safefree(pv2); + } else if (SvUTF8(sv1)) /* do not utf8ize the comparands as a side-effect */ + retval = -memcmp_byte_utf8(pv2, cur2, pv1, cur1); + else + retval = memcmp_byte_utf8(pv1, cur1, pv2, cur2); - return cmp; + if (retval) /* CURs taken into account already */ + return retval < 0 ? -1 : 1; + return 0; } /* @@ -4346,5 +4346,38 @@ Perl_sv_nounlocking(pTHX_ SV *sv) { } +/* +=for apidoc memcmp_byte_utf8 + +Similar to memcmp(), but the first string is with bytes, the second +with utf8. Takes into account that the lengths may be different. +=cut +*/ +int +Perl_memcmp_byte_utf8(pTHX_ char *sb, STRLEN lbyte, char *su, STRLEN lutf) +{ + U8 *sbyte = (U8*)sb; + U8 *sutf = (U8*)su; + U8 *ebyte = sbyte + lbyte; + U8 *eutf = sutf + lutf; + + while (sbyte < ebyte) { + if (sutf >= eutf) + return 1; /* utf one shorter */ + if (*sbyte < 128) { + if (*sbyte != *sutf) + return *sbyte - *sutf; + sbyte++; sutf++; /* CONTINUE */ + } else if ((*sutf & 0x3F) == (*sbyte >> 6)) { /* byte 0xFF: 0xC3 BF */ + if ((sutf[1] & 0x3F) != (*sbyte & 0x3F)) + return (*sbyte & 0x3F) - (*sutf & 0x3F); + sbyte++, sutf += 2; /* CONTINUE */ + } else + return (*sbyte >> 6) - (*sutf & 0x3F); + } + if (sutf >= eutf) + return 0; + return -1; /* byte one shorter */ +} |