summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.h4
-rwxr-xr-xembed.pl1
-rw-r--r--objXSUB.h4
-rw-r--r--perlapi.c7
-rw-r--r--proto.h1
-rw-r--r--sv.c18
-rw-r--r--utf8.c29
7 files changed, 52 insertions, 12 deletions
diff --git a/embed.h b/embed.h
index 7bb132d59a..1301e3e7fa 100644
--- a/embed.h
+++ b/embed.h
@@ -725,6 +725,7 @@
#define utilize Perl_utilize
#define utf16_to_utf8 Perl_utf16_to_utf8
#define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed
+#define utf8_length Perl_utf8_length
#define utf8_distance Perl_utf8_distance
#define utf8_hop Perl_utf8_hop
#define utf8_to_bytes Perl_utf8_to_bytes
@@ -2186,6 +2187,7 @@
#define utilize(a,b,c,d,e) Perl_utilize(aTHX_ a,b,c,d,e)
#define utf16_to_utf8(a,b,c,d) Perl_utf16_to_utf8(aTHX_ a,b,c,d)
#define utf16_to_utf8_reversed(a,b,c,d) Perl_utf16_to_utf8_reversed(aTHX_ a,b,c,d)
+#define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b)
#define utf8_distance(a,b) Perl_utf8_distance(aTHX_ a,b)
#define utf8_hop(a,b) Perl_utf8_hop(aTHX_ a,b)
#define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b)
@@ -4284,6 +4286,8 @@
#define utf16_to_utf8 Perl_utf16_to_utf8
#define Perl_utf16_to_utf8_reversed CPerlObj::Perl_utf16_to_utf8_reversed
#define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed
+#define Perl_utf8_length CPerlObj::Perl_utf8_length
+#define utf8_length Perl_utf8_length
#define Perl_utf8_distance CPerlObj::Perl_utf8_distance
#define utf8_distance Perl_utf8_distance
#define Perl_utf8_hop CPerlObj::Perl_utf8_hop
diff --git a/embed.pl b/embed.pl
index a19c4398c0..b8abef3a58 100755
--- a/embed.pl
+++ b/embed.pl
@@ -2070,6 +2070,7 @@ p |void |unshare_hek |HEK* hek
p |void |utilize |int aver|I32 floor|OP* version|OP* id|OP* arg
Ap |U8* |utf16_to_utf8 |U8* p|U8 *d|I32 bytelen|I32 *newlen
Ap |U8* |utf16_to_utf8_reversed|U8* p|U8 *d|I32 bytelen|I32 *newlen
+Ap |STRLEN |utf8_length |U8* s|U8 *e
Ap |I32 |utf8_distance |U8 *a|U8 *b
Ap |U8* |utf8_hop |U8 *s|I32 off
ApM |U8* |utf8_to_bytes |U8 *s|STRLEN *len
diff --git a/objXSUB.h b/objXSUB.h
index 5827b7225c..88eb400f69 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -1853,6 +1853,10 @@
#define Perl_utf16_to_utf8_reversed pPerl->Perl_utf16_to_utf8_reversed
#undef utf16_to_utf8_reversed
#define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed
+#undef Perl_utf8_length
+#define Perl_utf8_length pPerl->Perl_utf8_length
+#undef utf8_length
+#define utf8_length Perl_utf8_length
#undef Perl_utf8_distance
#define Perl_utf8_distance pPerl->Perl_utf8_distance
#undef utf8_distance
diff --git a/perlapi.c b/perlapi.c
index a9dd2f070d..a2e73e4bd0 100644
--- a/perlapi.c
+++ b/perlapi.c
@@ -3350,6 +3350,13 @@ Perl_utf16_to_utf8_reversed(pTHXo_ U8* p, U8 *d, I32 bytelen, I32 *newlen)
return ((CPerlObj*)pPerl)->Perl_utf16_to_utf8_reversed(p, d, bytelen, newlen);
}
+#undef Perl_utf8_length
+STRLEN
+Perl_utf8_length(pTHXo_ U8* s, U8 *e)
+{
+ return ((CPerlObj*)pPerl)->Perl_utf8_length(s, e);
+}
+
#undef Perl_utf8_distance
I32
Perl_utf8_distance(pTHXo_ U8 *a, U8 *b)
diff --git a/proto.h b/proto.h
index 052346d782..91b7f86d10 100644
--- a/proto.h
+++ b/proto.h
@@ -805,6 +805,7 @@ PERL_CALLCONV void Perl_unshare_hek(pTHX_ HEK* hek);
PERL_CALLCONV void Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* id, OP* arg);
PERL_CALLCONV U8* Perl_utf16_to_utf8(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen);
PERL_CALLCONV U8* Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen);
+PERL_CALLCONV STRLEN Perl_utf8_length(pTHX_ U8* s, U8 *e);
PERL_CALLCONV I32 Perl_utf8_distance(pTHX_ U8 *a, U8 *b);
PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off);
PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len);
diff --git a/sv.c b/sv.c
index 375b9564fb..e193bc52ed 100644
--- a/sv.c
+++ b/sv.c
@@ -3994,26 +3994,20 @@ UTF8 bytes as a single character.
STRLEN
Perl_sv_len_utf8(pTHX_ register SV *sv)
{
- U8 *s;
- U8 *send;
- STRLEN len;
-
if (!sv)
return 0;
#ifdef NOTYET
if (SvGMAGICAL(sv))
- len = mg_length(sv);
+ return mg_length(sv);
else
#endif
- s = (U8*)SvPV(sv, len);
- send = s + len;
- len = 0;
- while (s < send) {
- s += UTF8SKIP(s);
- len++;
+ {
+ STRLEN len;
+ U8 *s = (U8*)SvPV(sv, len);
+
+ return Perl_utf8_length(s, s + len);
}
- return len;
}
void
diff --git a/utf8.c b/utf8.c
index f1b80a43b7..fc625dc464 100644
--- a/utf8.c
+++ b/utf8.c
@@ -353,6 +353,35 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen)
return Perl_utf8_to_uv(aTHX_ s, (STRLEN)-1, retlen, 0);
}
+/*
+=for apidoc|utf8_length|U8 *s|U8 *e
+
+Return the length of the UTF-8 char encoded string C<s> in characters.
+Stops at string C<e>. If C<e E<lt> s> or if the scan would end up
+past C<e>, return -1.
+
+=cut
+*/
+
+STRLEN
+Perl_utf8_length(pTHX_ U8* s, U8* e)
+{
+ STRLEN len = 0;
+
+ if (e < s)
+ return -1;
+ while (s < e) {
+ STRLEN t = UTF8SKIP(s);
+
+ if (e - s < t)
+ return -1;
+ s += t;
+ len++;
+ }
+
+ return len;
+}
+
/* utf8_distance(a,b) returns the number of UTF8 characters between
the pointers a and b */