diff options
author | Nicholas Clark <nick@ccl4.org> | 2009-10-18 21:06:06 +0100 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2009-10-18 22:10:36 +0100 |
commit | 30685b5659009a95642202219acc6ded18f74dbc (patch) | |
tree | f3f76ff8fe1aa5f396b1b9ff3034bcff78019cf9 /ext | |
parent | 8f3d5996a665cf70e12a836b95e184e9ab628251 (diff) | |
download | perl-30685b5659009a95642202219acc6ded18f74dbc.tar.gz |
Expose utf16_to_utf8{,reversed} via XS::APItest, and provide some basic tests.
Diffstat (limited to 'ext')
-rw-r--r-- | ext/XS-APItest/APItest.pm | 4 | ||||
-rw-r--r-- | ext/XS-APItest/APItest.xs | 29 | ||||
-rw-r--r-- | ext/XS-APItest/t/utf16_to_utf8.t | 49 |
3 files changed, 80 insertions, 2 deletions
diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 12d0a03e01..c40e4b8a4b 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -23,10 +23,10 @@ our @EXPORT = qw( print_double print_int print_long my_cxt_getint my_cxt_getsv my_cxt_setint my_cxt_setsv sv_setsv_cow_hashkey_core sv_setsv_cow_hashkey_notcore rmagical_cast rmagical_flags - DPeek + DPeek utf16_to_utf8 utf16_to_utf8_reversed ); -our $VERSION = '0.15'; +our $VERSION = '0.16'; use vars '$WARNINGS_ON_BOOTSTRAP'; use vars map "\$${_}_called_PP", qw(BEGIN UNITCHECK CHECK INIT END); diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 7e7f78b211..4eac4a61df 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -892,3 +892,32 @@ void END() CODE: sv_inc(get_sv("XS::APItest::END_called", GV_ADD|GV_ADDMULTI)); + +void +utf16_to_utf8 (sv, ...) + SV* sv + ALIAS: + utf16_to_utf8_reversed = 1 + PREINIT: + STRLEN len; + U8 *source; + SV *dest; + I32 got; /* Gah, badly thought out APIs */ + CODE: + source = (U8 *)SvPVbyte(sv, len); + /* Optionally only convert part of the buffer. */ + if (items > 1) { + len = SvUV(ST(1)); + } + /* Mortalise this right now, as we'll be testing croak()s */ + dest = sv_2mortal(newSV(len * 3 / 2 + 1)); + if (ix) { + utf16_to_utf8_reversed(source, SvPVX(dest), len, &got); + } else { + utf16_to_utf8(source, SvPVX(dest), len, &got); + } + SvCUR_set(dest, got); + SvPVX(dest)[got] = '\0'; + SvPOK_on(dest); + ST(0) = dest; + XSRETURN(1); diff --git a/ext/XS-APItest/t/utf16_to_utf8.t b/ext/XS-APItest/t/utf16_to_utf8.t new file mode 100644 index 0000000000..3da3d7d12b --- /dev/null +++ b/ext/XS-APItest/t/utf16_to_utf8.t @@ -0,0 +1,49 @@ +#!perl -w + +use strict; +use Test::More 'no_plan'; +use Encode; + +use XS::APItest qw(utf16_to_utf8 utf16_to_utf8_reversed); + +for my $ord (0, 10, 13, 78, 255, 256, 0xD7FF, 0xE000, 0x10000) { + my $chr = chr $ord; + for my $prefix ('', "\0", 'Perl rules') { + for my $suffix ('', "\0", "Moo!") { + my $string = $prefix . $chr . $suffix; + my $name = sprintf "for chr $ord prefix %d, suffix %d", + length $prefix, length $suffix; + my $as_utf8 = encode('UTF-8', $string); + is(utf16_to_utf8(encode('UTF-16BE', $string)), $as_utf8, + "utf16_to_utf8 $name"); + is(utf16_to_utf8_reversed(encode('UTF-16LE', $string)), $as_utf8, + "utf16_to_utf8_reversed $name"); + } + } +} + +# Currently this is special-cased to work. Should it? + +is(utf16_to_utf8("\0"), "\0", 'Short string to utf16_to_utf8'); + +# But anything else is fatal + +my $got = eval {utf16_to_utf8('N')}; +like($@, qr/^panic: utf16_to_utf8: odd bytelen 1 at/, 'Odd byte length panics'); +is($got, undef, 'hence eval returns undef'); + +for (["\xD8\0\0\0", 'NULs'], + ["\xD8\0\xD8\0", '2 Lows'], + ) { + my ($malformed, $name) = @$_; + $got = eval {utf16_to_utf8($malformed)}; + like($@, qr/^Malformed UTF-16 surrogate at/, + "Malformed surrogate $name croaks for utf16_to_utf8"); + is($got, undef, 'hence eval returns undef'); + + $malformed =~ s/(.)(.)/$2$1/gs; + $got = eval {utf16_to_utf8_reversed($malformed)}; + like($@, qr/^Malformed UTF-16 surrogate at/, + "Malformed surrogate $name croaks for utf16_to_utf8_reversed"); + is($got, undef, 'hence eval returns undef'); +} |