summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-09-08 11:34:15 -0600
committerKarl Williamson <khw@cpan.org>2016-09-17 21:10:50 -0600
commit3d56ecbe82b99d21cf2f5e67297d4236e38b282d (patch)
treeb85011159de9be80373f9e27744593f7248849db
parentf1c999a79ad93bb81cbb7b1bec96e06c33773b81 (diff)
downloadperl-3d56ecbe82b99d21cf2f5e67297d4236e38b282d.tar.gz
Add tests for is_valid_partial_utf8_char_flags()
-rw-r--r--ext/XS-APItest/APItest.xs9
-rw-r--r--ext/XS-APItest/t/utf8.t120
2 files changed, 128 insertions, 1 deletions
diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs
index d2c1c33e30..b20206e5d6 100644
--- a/ext/XS-APItest/APItest.xs
+++ b/ext/XS-APItest/APItest.xs
@@ -5327,6 +5327,15 @@ test_isUTF8_CHAR(char *s, STRLEN len)
OUTPUT:
RETVAL
+IV
+test_is_utf8_valid_partial_char_flags(char *s, STRLEN len, U32 flags)
+ CODE:
+ /* RETVAL should be bool, but making it IV allows us to test it
+ * returning 0 or 1 */
+ RETVAL = is_utf8_valid_partial_char_flags((U8 *) s, (U8 *) s + len, flags);
+ OUTPUT:
+ RETVAL
+
UV
test_toLOWER(UV ord)
CODE:
diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t
index 735febaf01..c909ebbfe9 100644
--- a/ext/XS-APItest/t/utf8.t
+++ b/ext/XS-APItest/t/utf8.t
@@ -338,7 +338,26 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) }
"Verify UTF8_SKIP(chr $hex_n) is $uvchr_skip_should_be");
use bytes;
- for (my $j = 0; $j < length $n_chr; $j++) {
+ my $byte_length = length $n_chr;
+ for (my $j = 0; $j < $byte_length; $j++) {
+ undef @warnings;
+
+ if ($j == $byte_length - 1) {
+ my $ret = test_is_utf8_valid_partial_char_flags($n_chr, $byte_length, 0);
+ is($ret, 0, " Verify is_utf8_valid_partial_char_flags(" . display_bytes($n_chr) . ") returns 0 for full character");
+ }
+ else {
+ my $bytes_so_far = substr($n_chr, 0, $j + 1);
+ my $ret = test_is_utf8_valid_partial_char_flags($bytes_so_far, $j + 1, 0);
+ is($ret, 1, " Verify is_utf8_valid_partial_char_flags(" . display_bytes($bytes_so_far) . ") returns 1");
+ }
+
+ unless (is(scalar @warnings, 0,
+ " Verify is_utf8_valid_partial_char_flags generated no warnings"))
+ {
+ diag "The warnings were: " . join(", ", @warnings);
+ }
+
my $b = substr($n_chr, $j, 1);
my $hex_b = sprintf("\"\\x%02x\"", ord $b);
@@ -715,6 +734,52 @@ foreach my $test (@malformations) {
diag "The warnings were: " . join(", ", @warnings);
}
+ for my $j (1 .. $length - 1) {
+ my $partial = substr($bytes, 0, $j);
+
+ undef @warnings;
+
+ $ret = test_is_utf8_valid_partial_char_flags($bytes, $j, 0);
+ my $ret_should_be = 0;
+ my $comment = "";
+ if ($testname =~ /premature|short/ && $j < 2) {
+ $ret_should_be = 1;
+ $comment = ", but need 2 bytes to discern:";
+ }
+ elsif ($testname =~ /overlong/ && $length > 2) {
+ if ($length <= 7 && $j < 2) {
+ $ret_should_be = 1;
+ $comment = ", but need 2 bytes to discern:";
+ }
+ elsif ($length > 7 && $j < 7) {
+ $ret_should_be = 1;
+ $comment = ", but need 7 bytes to discern:";
+ }
+ }
+ elsif ($testname =~ /overflow/ && $testname !~ /first byte/) {
+ if (isASCII) {
+ if ($j < (($is64bit) ? 3 : 2)) {
+ $comment = ", but need $j bytes to discern:";
+ $ret_should_be = 1;
+ }
+ }
+ else {
+ if ($j < (($is64bit) ? 2 : 8)) {
+ $comment = ", but need $j bytes to discern:";
+ $ret_should_be = 1;
+ }
+ }
+ }
+ is($ret, $ret_should_be, "$testname: is_utf8_valid_partial_char_flags("
+ . display_bytes($partial)
+ . ")$comment returns $ret_should_be");
+ unless (is(scalar @warnings, 0,
+ "$testname: is_utf8_valid_partial_char_flags() generated no warnings"))
+ {
+ diag "The warnings were: " . join(", ", @warnings);
+ }
+ }
+
# Test what happens when this malformation is not allowed
undef @warnings;
@@ -1174,6 +1239,59 @@ foreach my $test (@tests) {
{
diag "The warnings were: " . join(", ", @warnings);
}
+
+ # Test partial character handling, for each byte not a full character
+ for my $j (1.. $length - 1) {
+
+ # Skip the test for the interaction between overflow and above-31
+ # bit. It is really testing other things than the partial
+ # character tests, for which other tests in this file are
+ # sufficient
+ last if $testname =~ /overflow/;
+
+ foreach my $disallow_flag (0, $disallow_flags) {
+ my $partial = substr($bytes, 0, $j);
+ my $ret_should_be;
+ my $comment;
+ if ($disallow_flag) {
+ $ret_should_be = 0;
+ $comment = "disallowed";
+ }
+ else {
+ $ret_should_be = 1;
+ $comment = "allowed";
+ }
+
+ if ($disallow_flag) {
+ if ($testname =~ /non-character/) {
+ $ret_should_be = 1;
+ $comment .= ", but but need full char to discern";
+ }
+ elsif ($testname =~ /surrogate/) {
+ if ($j < 2) {
+ $ret_should_be = 1;
+ $comment .= ", but need 2 bytes to discern";
+ }
+ }
+ elsif ($testname =~ /first non_unicode/ && $j < 2) {
+ $ret_should_be = 1;
+ $comment .= ", but need 2 bytes to discern";
+ }
+ }
+
+ undef @warnings;
+
+ $ret = test_is_utf8_valid_partial_char_flags($partial, $j, $disallow_flag);
+ is($ret, $ret_should_be, "$testname: is_utf8_valid_partial_char_flags("
+ . display_bytes($partial)
+ . "), $comment: returns $ret_should_be");
+ unless (is(scalar @warnings, 0,
+ "$testname: is_utf8_valid_partial_char_flags() generated no warnings"))
+ {
+ diag "The warnings were: " . join(", ", @warnings);
+ }
+ }
+ }
}
# This is more complicated than the malformations tested earlier, as there