Perl_sv_len_utf8 can use the UTF-8 offset cache to reduce its linear scan.

Previously, if the scalar's character length wasn't yet known, but an offset midway was, the offset would be ignored, and the linear scan of UTF-8 was for the entire length of the scalar.
author: Nicholas Clark <nick@ccl4.org> 2010-07-12 13:09:28 +0100
committer: Nicholas Clark <nick@ccl4.org> 2010-07-12 13:43:19 +0100
commit: 6ef2ab89d2567e144b289574a2e087dd7eec7894 (patch)
tree: 9707a41660cfeafb4945777c9b61bab320d0e92d /sv.c
parent: 0d7caf4cc74eb29a95f71af5a91fef30ca848e41 (diff)
download: perl-6ef2ab89d2567e144b289574a2e087dd7eec7894.tar.gz
1 files changed, 12 insertions, 2 deletions
diff --git a/sv.c b/sv.c
index 3a25abcf10..6cfb488c19 100644
--- a/sv.c
+++ b/sv.c
@@ -6047,8 +6047,18 @@ Perl_sv_len_utf8(pTHX_ register SV *const sv)
 	    STRLEN ulen;
 	    MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
 
-	    if (mg && mg->mg_len != -1) {
-		ulen = mg->mg_len;
+	    if (mg && (mg->mg_len != -1 || mg->mg_ptr)) {
+		if (mg->mg_len != -1)
+		    ulen = mg->mg_len;
+		else {
+		    /* We can use the offset cache for a headstart.
+		       The longer value is stored in the first pair.  */
+		    STRLEN *cache = (STRLEN *) mg->mg_ptr;
+
+		    ulen = cache[0] + Perl_utf8_length(aTHX_ s + cache[1],
+						       s + len);
+		}
+		
 		if (PL_utf8cache < 0) {
 		    const STRLEN real = Perl_utf8_length(aTHX_ s, s + len);
 		    if (real != ulen) {
author	Nicholas Clark <nick@ccl4.org>	2010-07-12 13:09:28 +0100
committer	Nicholas Clark <nick@ccl4.org>	2010-07-12 13:43:19 +0100
commit	6ef2ab89d2567e144b289574a2e087dd7eec7894 (patch)
tree	9707a41660cfeafb4945777c9b61bab320d0e92d /sv.c
parent	0d7caf4cc74eb29a95f71af5a91fef30ca848e41 (diff)
download	perl-6ef2ab89d2567e144b289574a2e087dd7eec7894.tar.gz