diff options
author | Karl Williamson <khw@cpan.org> | 2016-08-28 22:04:16 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-08-31 20:32:37 -0600 |
commit | 2b6852c008f43c765471849e5576c5425c5d9e23 (patch) | |
tree | 3cdd8f02788e9061d090782ed6be525887204a92 /pp_sys.c | |
parent | 4dab108fb5e7e21a547733bb00ddb5d8bffd936d (diff) | |
download | perl-2b6852c008f43c765471849e5576c5425c5d9e23.tar.gz |
Use new is_utf8_valid_partial_char()
This new function can be used in the implementation of the file test
operators, -B and -T, to see if the whole fixed length buffer is valid
UTF-8. Previously if all bytes were UTF-8 except the bytes at the end
that could have been a partial character, it assumed the whole thing was
UTF-8. This improves the prediction slightly
Diffstat (limited to 'pp_sys.c')
-rw-r--r-- | pp_sys.c | 8 |
1 files changed, 4 insertions, 4 deletions
@@ -3559,10 +3559,10 @@ PP(pp_fttext) const U8 *ep; /* Here contains a variant under UTF-8 . See if the entire string is - * UTF-8. But the buffer may end in a partial character, so consider - * it UTF-8 if the first non-UTF8 char is an ending partial */ - if (is_utf8_string_loc((U8 *) s, len, &ep) - || ep + UTF8SKIP(ep) > (U8 *) (s + len)) + * UTF-8. But the buffer may end in a partial character, so if it + * failed, see if the failure was due just to that */ + if ( is_utf8_string_loc((U8 *) s, len, &ep) + || is_utf8_valid_partial_char(ep, (U8 *) s + len)) { if (PL_op->op_type == OP_FTTEXT) { FT_RETURNYES; |