diff options
author | Alex Vandiver <alex@chmrr.net> | 2009-05-30 12:38:28 -0400 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2009-06-06 09:13:21 +0200 |
commit | 8e91ec7fd4ffa8cda89687c8214203a3b3e8a0c8 (patch) | |
tree | e1c27005a0b29bf12bd3aa78da4920736e1d3572 | |
parent | f699e9507ccd6209d4f240b42b00f92c8cf2f2b0 (diff) | |
download | perl-8e91ec7fd4ffa8cda89687c8214203a3b3e8a0c8.tar.gz |
Faster utf8_length method -- fixes [RT#50250]
UTF8SKIP appears to be a rather slow call; use UTF8_IS_INVARIANT to
skip it whenever possible. We also move the malformed utf8 check
until after the loop, since it can be checked after the termination
condition, instead of at every pass through the loop.
-rw-r--r-- | utf8.c | 28 |
1 files changed, 15 insertions, 13 deletions
@@ -682,7 +682,6 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e) { dVAR; STRLEN len = 0; - U8 t = 0; PERL_ARGS_ASSERT_UTF8_LENGTH; @@ -693,20 +692,23 @@ Perl_utf8_length(pTHX_ const U8 *s, const U8 *e) if (e < s) goto warn_and_return; while (s < e) { - t = UTF8SKIP(s); - if (e - s < t) { - warn_and_return: - if (ckWARN_d(WARN_UTF8)) { - if (PL_op) - Perl_warner(aTHX_ packWARN(WARN_UTF8), + if (!UTF8_IS_INVARIANT(*s)) + s += UTF8SKIP(s); + else + s++; + len++; + } + + if (e != s) { + len--; + warn_and_return: + if (ckWARN_d(WARN_UTF8)) { + if (PL_op) + Perl_warner(aTHX_ packWARN(WARN_UTF8), "%s in %s", unees, OP_DESC(PL_op)); - else - Perl_warner(aTHX_ packWARN(WARN_UTF8), unees); - } - return len; + else + Perl_warner(aTHX_ packWARN(WARN_UTF8), unees); } - s += t; - len++; } return len; |