summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2009-10-18 21:06:06 +0100
committerNicholas Clark <nick@ccl4.org>2009-10-18 22:10:36 +0100
commit30685b5659009a95642202219acc6ded18f74dbc (patch)
treef3f76ff8fe1aa5f396b1b9ff3034bcff78019cf9 /ext
parent8f3d5996a665cf70e12a836b95e184e9ab628251 (diff)
downloadperl-30685b5659009a95642202219acc6ded18f74dbc.tar.gz
Expose utf16_to_utf8{,reversed} via XS::APItest, and provide some basic tests.
Diffstat (limited to 'ext')
-rw-r--r--ext/XS-APItest/APItest.pm4
-rw-r--r--ext/XS-APItest/APItest.xs29
-rw-r--r--ext/XS-APItest/t/utf16_to_utf8.t49
3 files changed, 80 insertions, 2 deletions
diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm
index 12d0a03e01..c40e4b8a4b 100644
--- a/ext/XS-APItest/APItest.pm
+++ b/ext/XS-APItest/APItest.pm
@@ -23,10 +23,10 @@ our @EXPORT = qw( print_double print_int print_long
my_cxt_getint my_cxt_getsv my_cxt_setint my_cxt_setsv
sv_setsv_cow_hashkey_core sv_setsv_cow_hashkey_notcore
rmagical_cast rmagical_flags
- DPeek
+ DPeek utf16_to_utf8 utf16_to_utf8_reversed
);
-our $VERSION = '0.15';
+our $VERSION = '0.16';
use vars '$WARNINGS_ON_BOOTSTRAP';
use vars map "\$${_}_called_PP", qw(BEGIN UNITCHECK CHECK INIT END);
diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs
index 7e7f78b211..4eac4a61df 100644
--- a/ext/XS-APItest/APItest.xs
+++ b/ext/XS-APItest/APItest.xs
@@ -892,3 +892,32 @@ void
END()
CODE:
sv_inc(get_sv("XS::APItest::END_called", GV_ADD|GV_ADDMULTI));
+
+void
+utf16_to_utf8 (sv, ...)
+ SV* sv
+ ALIAS:
+ utf16_to_utf8_reversed = 1
+ PREINIT:
+ STRLEN len;
+ U8 *source;
+ SV *dest;
+ I32 got; /* Gah, badly thought out APIs */
+ CODE:
+ source = (U8 *)SvPVbyte(sv, len);
+ /* Optionally only convert part of the buffer. */
+ if (items > 1) {
+ len = SvUV(ST(1));
+ }
+ /* Mortalise this right now, as we'll be testing croak()s */
+ dest = sv_2mortal(newSV(len * 3 / 2 + 1));
+ if (ix) {
+ utf16_to_utf8_reversed(source, SvPVX(dest), len, &got);
+ } else {
+ utf16_to_utf8(source, SvPVX(dest), len, &got);
+ }
+ SvCUR_set(dest, got);
+ SvPVX(dest)[got] = '\0';
+ SvPOK_on(dest);
+ ST(0) = dest;
+ XSRETURN(1);
diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t
new file mode 100644
index 0000000000..3da3d7d12b
--- /dev/null
+++ b/ext/XS-APItest/t/utf16_to_utf8.t
@@ -0,0 +1,49 @@
+#!perl -w
+
+use strict;
+use Test::More 'no_plan';
+use Encode;
+
+use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed);
+
+for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0x10000) {
+ my $chr = chr $ord;
+ for my $prefix ('', "\0", 'Perl rules') {
+ for my $suffix ('', "\0", "Moo!") {
+ my $string = $prefix . $chr . $suffix;
+ my $name = sprintf "for chr $ord prefix %d, suffix %d",
+ length $prefix, length $suffix;
+ my $as_utf8 = encode('UTF-8', $string);
+ is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8,
+ "utf16_to_utf8 $name");
+ is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8,
+ "utf16_to_utf8_reversed $name");
+ }
+ }
+}
+
+# Currently this is special-cased to work. Should it?
+
+is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8');
+
+# But anything else is fatal
+
+my $got = eval {utf16_to_utf8('N')};
+like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics');
+is($got, undef, 'hence eval returns undef');
+
+for (["\xD8\0\0\0", 'NULs'],
+ ["\xD8\0\xD8\0", '2 Lows'],
+ ) {
+ my ($malformed, $name) = @$_;
+ $got = eval {utf16_to_utf8($malformed)};
+ like($@, qr/^Malformed UTF-16 surrogate at/,
+ "Malformed surrogate $name croaks for utf16_to_utf8");
+ is($got, undef, 'hence eval returns undef');
+
+ $malformed =~ s/(.)(.)/$2$1/gs;
+ $got = eval {utf16_to_utf8_reversed($malformed)};
+ like($@, qr/^Malformed UTF-16 surrogate at/,
+ "Malformed surrogate $name croaks for utf16_to_utf8_reversed");
+ is($got, undef, 'hence eval returns undef');
+}