summaryrefslogtreecommitdiff
path: root/pp_sys.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-08-28 22:04:16 -0600
committerKarl Williamson <khw@cpan.org>2016-08-31 20:32:37 -0600
commit2b6852c008f43c765471849e5576c5425c5d9e23 (patch)
tree3cdd8f02788e9061d090782ed6be525887204a92 /pp_sys.c
parent4dab108fb5e7e21a547733bb00ddb5d8bffd936d (diff)
downloadperl-2b6852c008f43c765471849e5576c5425c5d9e23.tar.gz
Use new is_utf8_valid_partial_char()
This new function can be used in the implementation of the file test operators, -B and -T, to see if the whole fixed length buffer is valid UTF-8. Previously if all bytes were UTF-8 except the bytes at the end that could have been a partial character, it assumed the whole thing was UTF-8. This improves the prediction slightly
Diffstat (limited to 'pp_sys.c')
-rw-r--r--pp_sys.c8
1 files changed, 4 insertions, 4 deletions
diff --git a/pp_sys.c b/pp_sys.c
index 7e13e38dac..a198d4e525 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -3559,10 +3559,10 @@ PP(pp_fttext)
const U8 *ep;
/* Here contains a variant under UTF-8 . See if the entire string is
- * UTF-8. But the buffer may end in a partial character, so consider
- * it UTF-8 if the first non-UTF8 char is an ending partial */
- if (is_utf8_string_loc((U8 *) s, len, &ep)
- || ep + UTF8SKIP(ep) > (U8 *) (s + len))
+ * UTF-8. But the buffer may end in a partial character, so if it
+ * failed, see if the failure was due just to that */
+ if ( is_utf8_string_loc((U8 *) s, len, &ep)
+ || is_utf8_valid_partial_char(ep, (U8 *) s + len))
{
if (PL_op->op_type == OP_FTTEXT) {
FT_RETURNYES;