summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2010-11-11 16:08:43 +0000
committerNicholas Clark <nick@ccl4.org>2010-11-11 16:08:43 +0000
commitfed3ba5d6b9222e6e73844680734b059e616c86b (patch)
treec8a449308b28520170011d015883c39c887fb9e8 /ext
parent08a6f934b8306af074a22b05f6de14f564a9da18 (diff)
downloadperl-fed3ba5d6b9222e6e73844680734b059e616c86b.tar.gz
Add Perl_bytes_cmp_utf8() to compare character sequences in different encodings
Convert sv_eq_flags() and sv_cmp_flags() to use it. Previously, to compare two strings of characters, where was was in UTF-8, and one was not, you had to either: 1: Upgrade the second to UTF-8 2: Compare the resulting octet sequence 3: Free the temporary UTF-8 string or: 1: Attempt to downgrade the first to bytes. If it can't be, they aren't equal 2: Else compare the resulting octet sequence 3: Free the temporary byte string Which for the general case involves a malloc()/free() and at least two O(n) scans per comparison. Whereas this approach has no allocation, a single O(n) scan, which terminates as early as the best case for the second approach.
Diffstat (limited to 'ext')
-rw-r--r--ext/XS-APItest/APItest.xs18
-rw-r--r--ext/XS-APItest/t/utf8.t27
2 files changed, 45 insertions, 0 deletions
diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs
index 23dd9637c6..285fedffe5 100644
--- a/ext/XS-APItest/APItest.xs
+++ b/ext/XS-APItest/APItest.xs
@@ -913,6 +913,24 @@ INCLUDE: const-xs.inc
INCLUDE: numeric.xs
+MODULE = XS::APItest::utf8 PACKAGE = XS::APItest::utf8
+
+int
+bytes_cmp_utf8(bytes, utf8)
+ SV *bytes
+ SV *utf8
+ PREINIT:
+ const U8 *b;
+ STRLEN blen;
+ const U8 *u;
+ STRLEN ulen;
+ CODE:
+ b = (const U8 *)SvPVbyte(bytes, blen);
+ u = (const U8 *)SvPVbyte(utf8, ulen);
+ RETVAL = bytes_cmp_utf8(b, blen, u, ulen);
+ OUTPUT:
+ RETVAL
+
MODULE = XS::APItest:Overload PACKAGE = XS::APItest::Overload
SV *
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t
new file mode 100644
index 0000000000..9ad99f27f1
--- /dev/null
+++ b/ext/XS-APItest/t/utf8.t
@@ -0,0 +1,27 @@
+#!perl -w
+
+use strict;
+use Test::More;
+
+use XS::APItest;
+
+foreach ([0, '', '', 'empty'],
+ [0, 'N', 'N', '1 char'],
+ [1, 'NN', 'N', '1 char substring'],
+ [-2, 'Perl', 'Rules', 'different'],
+ [0, chr 163, chr 163, 'pound sign'],
+ [1, chr (163) . 10, chr (163) . 1, '10 pounds is more than 1 pound'],
+ [1, chr(163) . chr(163), chr 163, '2 pound signs are more than 1'],
+ [-2, ' $!', " \x{1F42B}!", 'Camels are worth more than 1 dollar'],
+ [-1, '!', "!\x{1F42A}", 'Initial substrings match'],
+ ) {
+ my ($expect, $left, $right, $desc) = @$_;
+ my $copy = $right;
+ utf8::encode($copy);
+ is(bytes_cmp_utf8($left, $copy), $expect, $desc);
+ next if $right =~ tr/\0-\377//c;
+ utf8::encode($left);
+ is(bytes_cmp_utf8($right, $left), -$expect, "$desc reversed");
+}
+
+done_testing;