summaryrefslogtreecommitdiff
path: root/sv.c
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2010-11-11 16:08:43 +0000
committerNicholas Clark <nick@ccl4.org>2010-11-11 16:08:43 +0000
commitfed3ba5d6b9222e6e73844680734b059e616c86b (patch)
treec8a449308b28520170011d015883c39c887fb9e8 /sv.c
parent08a6f934b8306af074a22b05f6de14f564a9da18 (diff)
downloadperl-fed3ba5d6b9222e6e73844680734b059e616c86b.tar.gz
Add Perl_bytes_cmp_utf8() to compare character sequences in different encodings
Convert sv_eq_flags() and sv_cmp_flags() to use it. Previously, to compare two strings of characters, where was was in UTF-8, and one was not, you had to either: 1: Upgrade the second to UTF-8 2: Compare the resulting octet sequence 3: Free the temporary UTF-8 string or: 1: Attempt to downgrade the first to bytes. If it can't be, they aren't equal 2: Else compare the resulting octet sequence 3: Free the temporary byte string Which for the general case involves a malloc()/free() and at least two O(n) scans per comparison. Whereas this approach has no allocation, a single O(n) scan, which terminates as early as the best case for the second approach.
Diffstat (limited to 'sv.c')
-rw-r--r--sv.c33
1 files changed, 12 insertions, 21 deletions
diff --git a/sv.c b/sv.c
index 27b4bd6fa8..a1ca186b6f 100644
--- a/sv.c
+++ b/sv.c
@@ -7044,28 +7044,15 @@ Perl_sv_eq_flags(pTHX_ register SV *sv1, register SV *sv2, const U32 flags)
}
}
else {
- bool is_utf8 = TRUE;
-
if (SvUTF8(sv1)) {
- /* sv1 is the UTF-8 one,
- * if is equal it must be downgrade-able */
- char * const pv = (char*)bytes_from_utf8((const U8*)pv1,
- &cur1, &is_utf8);
- if (pv != pv1)
- pv1 = tpv = pv;
+ /* sv1 is the UTF-8 one */
+ return bytes_cmp_utf8((const U8*)pv2, cur2,
+ (const U8*)pv1, cur1) == 0;
}
else {
- /* sv2 is the UTF-8 one,
- * if is equal it must be downgrade-able */
- char * const pv = (char *)bytes_from_utf8((const U8*)pv2,
- &cur2, &is_utf8);
- if (pv != pv2)
- pv2 = tpv = pv;
- }
- if (is_utf8) {
- /* Downgrade not possible - cannot be eq */
- assert (tpv == 0);
- return FALSE;
+ /* sv2 is the UTF-8 one */
+ return bytes_cmp_utf8((const U8*)pv1, cur1,
+ (const U8*)pv2, cur2) == 0;
}
}
}
@@ -7140,7 +7127,9 @@ Perl_sv_cmp_flags(pTHX_ register SV *const sv1, register SV *const sv2,
pv2 = SvPV_const(svrecode, cur2);
}
else {
- pv2 = tpv = (char*)bytes_to_utf8((const U8*)pv2, &cur2);
+ const int retval = -bytes_cmp_utf8((const U8*)pv2, cur2,
+ (const U8*)pv1, cur1);
+ return retval ? retval < 0 ? -1 : +1 : 0;
}
}
else {
@@ -7150,7 +7139,9 @@ Perl_sv_cmp_flags(pTHX_ register SV *const sv1, register SV *const sv2,
pv1 = SvPV_const(svrecode, cur1);
}
else {
- pv1 = tpv = (char*)bytes_to_utf8((const U8*)pv1, &cur1);
+ const int retval = bytes_cmp_utf8((const U8*)pv1, cur1,
+ (const U8*)pv2, cur2);
+ return retval ? retval < 0 ? -1 : +1 : 0;
}
}
}