diff options
author | Nicholas Clark <nick@ccl4.org> | 2010-11-11 16:08:43 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2010-11-11 16:08:43 +0000 |
commit | fed3ba5d6b9222e6e73844680734b059e616c86b (patch) | |
tree | c8a449308b28520170011d015883c39c887fb9e8 /sv.c | |
parent | 08a6f934b8306af074a22b05f6de14f564a9da18 (diff) | |
download | perl-fed3ba5d6b9222e6e73844680734b059e616c86b.tar.gz |
Add Perl_bytes_cmp_utf8() to compare character sequences in different encodings
Convert sv_eq_flags() and sv_cmp_flags() to use it.
Previously, to compare two strings of characters, where was was in UTF-8, and
one was not, you had to either:
1: Upgrade the second to UTF-8
2: Compare the resulting octet sequence
3: Free the temporary UTF-8 string
or:
1: Attempt to downgrade the first to bytes. If it can't be, they aren't equal
2: Else compare the resulting octet sequence
3: Free the temporary byte string
Which for the general case involves a malloc()/free() and at least two O(n)
scans per comparison.
Whereas this approach has no allocation, a single O(n) scan, which terminates
as early as the best case for the second approach.
Diffstat (limited to 'sv.c')
-rw-r--r-- | sv.c | 33 |
1 files changed, 12 insertions, 21 deletions
@@ -7044,28 +7044,15 @@ Perl_sv_eq_flags(pTHX_ register SV *sv1, register SV *sv2, const U32 flags) } } else { - bool is_utf8 = TRUE; - if (SvUTF8(sv1)) { - /* sv1 is the UTF-8 one, - * if is equal it must be downgrade-able */ - char * const pv = (char*)bytes_from_utf8((const U8*)pv1, - &cur1, &is_utf8); - if (pv != pv1) - pv1 = tpv = pv; + /* sv1 is the UTF-8 one */ + return bytes_cmp_utf8((const U8*)pv2, cur2, + (const U8*)pv1, cur1) == 0; } else { - /* sv2 is the UTF-8 one, - * if is equal it must be downgrade-able */ - char * const pv = (char *)bytes_from_utf8((const U8*)pv2, - &cur2, &is_utf8); - if (pv != pv2) - pv2 = tpv = pv; - } - if (is_utf8) { - /* Downgrade not possible - cannot be eq */ - assert (tpv == 0); - return FALSE; + /* sv2 is the UTF-8 one */ + return bytes_cmp_utf8((const U8*)pv1, cur1, + (const U8*)pv2, cur2) == 0; } } } @@ -7140,7 +7127,9 @@ Perl_sv_cmp_flags(pTHX_ register SV *const sv1, register SV *const sv2, pv2 = SvPV_const(svrecode, cur2); } else { - pv2 = tpv = (char*)bytes_to_utf8((const U8*)pv2, &cur2); + const int retval = -bytes_cmp_utf8((const U8*)pv2, cur2, + (const U8*)pv1, cur1); + return retval ? retval < 0 ? -1 : +1 : 0; } } else { @@ -7150,7 +7139,9 @@ Perl_sv_cmp_flags(pTHX_ register SV *const sv1, register SV *const sv2, pv1 = SvPV_const(svrecode, cur1); } else { - pv1 = tpv = (char*)bytes_to_utf8((const U8*)pv1, &cur1); + const int retval = bytes_cmp_utf8((const U8*)pv1, cur1, + (const U8*)pv2, cur2); + return retval ? retval < 0 ? -1 : +1 : 0; } } } |