summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Vandiver <alex@chmrr.net>2009-05-30 12:38:28 -0400
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2009-06-06 09:13:21 +0200
commit8e91ec7fd4ffa8cda89687c8214203a3b3e8a0c8 (patch)
treee1c27005a0b29bf12bd3aa78da4920736e1d3572
parentf699e9507ccd6209d4f240b42b00f92c8cf2f2b0 (diff)
downloadperl-8e91ec7fd4ffa8cda89687c8214203a3b3e8a0c8.tar.gz
Faster utf8_length method -- fixes [RT#50250]
UTF8SKIP appears to be a rather slow call; use UTF8_IS_INVARIANT to skip it whenever possible. We also move the malformed utf8 check until after the loop, since it can be checked after the termination condition, instead of at every pass through the loop.
-rw-r--r--utf8.c28
1 files changed, 15 insertions, 13 deletions
diff --git a/utf8.c b/utf8.c
index 4f4c3eaffe..b5a380962a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -682,7 +682,6 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e)
{
dVAR;
STRLEN len = 0;
- U8 t = 0;
PERL_ARGS_ASSERT_UTF8_LENGTH;
@@ -693,20 +692,23 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e)
if (e < s)
goto warn_and_return;
while (s < e) {
- t = UTF8SKIP(s);
- if (e - s < t) {
- warn_and_return:
- if (ckWARN_d(WARN_UTF8)) {
- if (PL_op)
- Perl_warner(aTHX_ packWARN(WARN_UTF8),
+ if (!UTF8_IS_INVARIANT(*s))
+ s += UTF8SKIP(s);
+ else
+ s++;
+ len++;
+ }
+
+ if (e != s) {
+ len--;
+ warn_and_return:
+ if (ckWARN_d(WARN_UTF8)) {
+ if (PL_op)
+ Perl_warner(aTHX_ packWARN(WARN_UTF8),
"%s in %s", unees, OP_DESC(PL_op));
- else
- Perl_warner(aTHX_ packWARN(WARN_UTF8), unees);
- }
- return len;
+ else
+ Perl_warner(aTHX_ packWARN(WARN_UTF8), unees);
}
- s += t;
- len++;
}
return len;