diff options
-rw-r--r-- | embed.h | 4 | ||||
-rwxr-xr-x | embed.pl | 1 | ||||
-rw-r--r-- | objXSUB.h | 4 | ||||
-rw-r--r-- | perlapi.c | 7 | ||||
-rw-r--r-- | proto.h | 1 | ||||
-rw-r--r-- | sv.c | 18 | ||||
-rw-r--r-- | utf8.c | 29 |
7 files changed, 52 insertions, 12 deletions
@@ -725,6 +725,7 @@ #define utilize Perl_utilize #define utf16_to_utf8 Perl_utf16_to_utf8 #define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed +#define utf8_length Perl_utf8_length #define utf8_distance Perl_utf8_distance #define utf8_hop Perl_utf8_hop #define utf8_to_bytes Perl_utf8_to_bytes @@ -2186,6 +2187,7 @@ #define utilize(a,b,c,d,e) Perl_utilize(aTHX_ a,b,c,d,e) #define utf16_to_utf8(a,b,c,d) Perl_utf16_to_utf8(aTHX_ a,b,c,d) #define utf16_to_utf8_reversed(a,b,c,d) Perl_utf16_to_utf8_reversed(aTHX_ a,b,c,d) +#define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b) #define utf8_distance(a,b) Perl_utf8_distance(aTHX_ a,b) #define utf8_hop(a,b) Perl_utf8_hop(aTHX_ a,b) #define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b) @@ -4284,6 +4286,8 @@ #define utf16_to_utf8 Perl_utf16_to_utf8 #define Perl_utf16_to_utf8_reversed CPerlObj::Perl_utf16_to_utf8_reversed #define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed +#define Perl_utf8_length CPerlObj::Perl_utf8_length +#define utf8_length Perl_utf8_length #define Perl_utf8_distance CPerlObj::Perl_utf8_distance #define utf8_distance Perl_utf8_distance #define Perl_utf8_hop CPerlObj::Perl_utf8_hop @@ -2070,6 +2070,7 @@ p |void |unshare_hek |HEK* hek p |void |utilize |int aver|I32 floor|OP* version|OP* id|OP* arg Ap |U8* |utf16_to_utf8 |U8* p|U8 *d|I32 bytelen|I32 *newlen Ap |U8* |utf16_to_utf8_reversed|U8* p|U8 *d|I32 bytelen|I32 *newlen +Ap |STRLEN |utf8_length |U8* s|U8 *e Ap |I32 |utf8_distance |U8 *a|U8 *b Ap |U8* |utf8_hop |U8 *s|I32 off ApM |U8* |utf8_to_bytes |U8 *s|STRLEN *len @@ -1853,6 +1853,10 @@ #define Perl_utf16_to_utf8_reversed pPerl->Perl_utf16_to_utf8_reversed #undef utf16_to_utf8_reversed #define utf16_to_utf8_reversed Perl_utf16_to_utf8_reversed +#undef Perl_utf8_length +#define Perl_utf8_length pPerl->Perl_utf8_length +#undef utf8_length +#define utf8_length Perl_utf8_length #undef Perl_utf8_distance #define Perl_utf8_distance pPerl->Perl_utf8_distance #undef utf8_distance @@ -3350,6 +3350,13 @@ Perl_utf16_to_utf8_reversed(pTHXo_ U8* p, U8 *d, I32 bytelen, I32 *newlen) return ((CPerlObj*)pPerl)->Perl_utf16_to_utf8_reversed(p, d, bytelen, newlen); } +#undef Perl_utf8_length +STRLEN +Perl_utf8_length(pTHXo_ U8* s, U8 *e) +{ + return ((CPerlObj*)pPerl)->Perl_utf8_length(s, e); +} + #undef Perl_utf8_distance I32 Perl_utf8_distance(pTHXo_ U8 *a, U8 *b) @@ -805,6 +805,7 @@ PERL_CALLCONV void Perl_unshare_hek(pTHX_ HEK* hek); PERL_CALLCONV void Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* id, OP* arg); PERL_CALLCONV U8* Perl_utf16_to_utf8(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen); PERL_CALLCONV U8* Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen); +PERL_CALLCONV STRLEN Perl_utf8_length(pTHX_ U8* s, U8 *e); PERL_CALLCONV I32 Perl_utf8_distance(pTHX_ U8 *a, U8 *b); PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off); PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len); @@ -3994,26 +3994,20 @@ UTF8 bytes as a single character. STRLEN Perl_sv_len_utf8(pTHX_ register SV *sv) { - U8 *s; - U8 *send; - STRLEN len; - if (!sv) return 0; #ifdef NOTYET if (SvGMAGICAL(sv)) - len = mg_length(sv); + return mg_length(sv); else #endif - s = (U8*)SvPV(sv, len); - send = s + len; - len = 0; - while (s < send) { - s += UTF8SKIP(s); - len++; + { + STRLEN len; + U8 *s = (U8*)SvPV(sv, len); + + return Perl_utf8_length(s, s + len); } - return len; } void @@ -353,6 +353,35 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen) return Perl_utf8_to_uv(aTHX_ s, (STRLEN)-1, retlen, 0); } +/* +=for apidoc|utf8_length|U8 *s|U8 *e + +Return the length of the UTF-8 char encoded string C<s> in characters. +Stops at string C<e>. If C<e E<lt> s> or if the scan would end up +past C<e>, return -1. + +=cut +*/ + +STRLEN +Perl_utf8_length(pTHX_ U8* s, U8* e) +{ + STRLEN len = 0; + + if (e < s) + return -1; + while (s < e) { + STRLEN t = UTF8SKIP(s); + + if (e - s < t) + return -1; + s += t; + len++; + } + + return len; +} + /* utf8_distance(a,b) returns the number of UTF8 characters between the pointers a and b */ |