diff options
author | Karl Williamson <khw@cpan.org> | 2016-09-08 11:34:15 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-09-17 21:10:50 -0600 |
commit | 3d56ecbe82b99d21cf2f5e67297d4236e38b282d (patch) | |
tree | b85011159de9be80373f9e27744593f7248849db | |
parent | f1c999a79ad93bb81cbb7b1bec96e06c33773b81 (diff) | |
download | perl-3d56ecbe82b99d21cf2f5e67297d4236e38b282d.tar.gz |
Add tests for is_valid_partial_utf8_char_flags()
-rw-r--r-- | ext/XS-APItest/APItest.xs | 9 | ||||
-rw-r--r-- | ext/XS-APItest/t/utf8.t | 120 |
2 files changed, 128 insertions, 1 deletions
diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index d2c1c33e30..b20206e5d6 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -5327,6 +5327,15 @@ test_isUTF8_CHAR(char *s, STRLEN len) OUTPUT: RETVAL +IV +test_is_utf8_valid_partial_char_flags(char *s, STRLEN len, U32 flags) + CODE: + /* RETVAL should be bool, but making it IV allows us to test it + * returning 0 or 1 */ + RETVAL = is_utf8_valid_partial_char_flags((U8 *) s, (U8 *) s + len, flags); + OUTPUT: + RETVAL + UV test_toLOWER(UV ord) CODE: diff --git a/ext/XS-APItest/t/utf8.t b/ext/XS-APItest/t/utf8.t index 735febaf01..c909ebbfe9 100644 --- a/ext/XS-APItest/t/utf8.t +++ b/ext/XS-APItest/t/utf8.t @@ -338,7 +338,26 @@ for my $u (sort { utf8::unicode_to_native($a) <=> utf8::unicode_to_native($b) } "Verify UTF8_SKIP(chr $hex_n) is $uvchr_skip_should_be"); use bytes; - for (my $j = 0; $j < length $n_chr; $j++) { + my $byte_length = length $n_chr; + for (my $j = 0; $j < $byte_length; $j++) { + undef @warnings; + + if ($j == $byte_length - 1) { + my $ret = test_is_utf8_valid_partial_char_flags($n_chr, $byte_length, 0); + is($ret, 0, " Verify is_utf8_valid_partial_char_flags(" . display_bytes($n_chr) . ") returns 0 for full character"); + } + else { + my $bytes_so_far = substr($n_chr, 0, $j + 1); + my $ret = test_is_utf8_valid_partial_char_flags($bytes_so_far, $j + 1, 0); + is($ret, 1, " Verify is_utf8_valid_partial_char_flags(" . display_bytes($bytes_so_far) . ") returns 1"); + } + + unless (is(scalar @warnings, 0, + " Verify is_utf8_valid_partial_char_flags generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + my $b = substr($n_chr, $j, 1); my $hex_b = sprintf("\"\\x%02x\"", ord $b); @@ -715,6 +734,52 @@ foreach my $test (@malformations) { diag "The warnings were: " . join(", ", @warnings); } + for my $j (1 .. $length - 1) { + my $partial = substr($bytes, 0, $j); + + undef @warnings; + + $ret = test_is_utf8_valid_partial_char_flags($bytes, $j, 0); + my $ret_should_be = 0; + my $comment = ""; + if ($testname =~ /premature|short/ && $j < 2) { + $ret_should_be = 1; + $comment = ", but need 2 bytes to discern:"; + } + elsif ($testname =~ /overlong/ && $length > 2) { + if ($length <= 7 && $j < 2) { + $ret_should_be = 1; + $comment = ", but need 2 bytes to discern:"; + } + elsif ($length > 7 && $j < 7) { + $ret_should_be = 1; + $comment = ", but need 7 bytes to discern:"; + } + } + elsif ($testname =~ /overflow/ && $testname !~ /first byte/) { + if (isASCII) { + if ($j < (($is64bit) ? 3 : 2)) { + $comment = ", but need $j bytes to discern:"; + $ret_should_be = 1; + } + } + else { + if ($j < (($is64bit) ? 2 : 8)) { + $comment = ", but need $j bytes to discern:"; + $ret_should_be = 1; + } + } + } + is($ret, $ret_should_be, "$testname: is_utf8_valid_partial_char_flags(" + . display_bytes($partial) + . ")$comment returns $ret_should_be"); + unless (is(scalar @warnings, 0, + "$testname: is_utf8_valid_partial_char_flags() generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + } + # Test what happens when this malformation is not allowed undef @warnings; @@ -1174,6 +1239,59 @@ foreach my $test (@tests) { { diag "The warnings were: " . join(", ", @warnings); } + + # Test partial character handling, for each byte not a full character + for my $j (1.. $length - 1) { + + # Skip the test for the interaction between overflow and above-31 + # bit. It is really testing other things than the partial + # character tests, for which other tests in this file are + # sufficient + last if $testname =~ /overflow/; + + foreach my $disallow_flag (0, $disallow_flags) { + my $partial = substr($bytes, 0, $j); + my $ret_should_be; + my $comment; + if ($disallow_flag) { + $ret_should_be = 0; + $comment = "disallowed"; + } + else { + $ret_should_be = 1; + $comment = "allowed"; + } + + if ($disallow_flag) { + if ($testname =~ /non-character/) { + $ret_should_be = 1; + $comment .= ", but but need full char to discern"; + } + elsif ($testname =~ /surrogate/) { + if ($j < 2) { + $ret_should_be = 1; + $comment .= ", but need 2 bytes to discern"; + } + } + elsif ($testname =~ /first non_unicode/ && $j < 2) { + $ret_should_be = 1; + $comment .= ", but need 2 bytes to discern"; + } + } + + undef @warnings; + + $ret = test_is_utf8_valid_partial_char_flags($partial, $j, $disallow_flag); + is($ret, $ret_should_be, "$testname: is_utf8_valid_partial_char_flags(" + . display_bytes($partial) + . "), $comment: returns $ret_should_be"); + unless (is(scalar @warnings, 0, + "$testname: is_utf8_valid_partial_char_flags() generated no warnings")) + { + diag "The warnings were: " . join(", ", @warnings); + } + } + } } # This is more complicated than the malformations tested earlier, as there |