diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2000-11-26 19:01:05 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-11-26 19:01:05 +0000 |
commit | 02eb7b47b8a6793752e5b001af6e62c374b2c440 (patch) | |
tree | 07c212ac1a760e5468e7b769b861a1fe00d96718 | |
parent | 21477fb41342ef2f0f21af5ef95caf64eee65dee (diff) | |
download | perl-02eb7b47b8a6793752e5b001af6e62c374b2c440.tar.gz |
Make utf8_length() and utf8_distance() (the latter of which
is unused at the moment) to be less forgiving about bad UTF-8.
p4raw-id: //depot/perl@7869
-rwxr-xr-x | embed.pl | 2 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | utf8.c | 28 |
3 files changed, 21 insertions, 11 deletions
@@ -2072,7 +2072,7 @@ p |void |utilize |int aver|I32 floor|OP* version|OP* id|OP* arg Ap |U8* |utf16_to_utf8 |U8* p|U8 *d|I32 bytelen|I32 *newlen Ap |U8* |utf16_to_utf8_reversed|U8* p|U8 *d|I32 bytelen|I32 *newlen Ap |STRLEN |utf8_length |U8* s|U8 *e -Ap |I32 |utf8_distance |U8 *a|U8 *b +Ap |IV |utf8_distance |U8 *a|U8 *b Ap |U8* |utf8_hop |U8 *s|I32 off ApM |U8* |utf8_to_bytes |U8 *s|STRLEN *len ApM |U8* |bytes_to_utf8 |U8 *s|STRLEN *len @@ -807,7 +807,7 @@ PERL_CALLCONV void Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* id, PERL_CALLCONV U8* Perl_utf16_to_utf8(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen); PERL_CALLCONV U8* Perl_utf16_to_utf8_reversed(pTHX_ U8* p, U8 *d, I32 bytelen, I32 *newlen); PERL_CALLCONV STRLEN Perl_utf8_length(pTHX_ U8* s, U8 *e); -PERL_CALLCONV I32 Perl_utf8_distance(pTHX_ U8 *a, U8 *b); +PERL_CALLCONV IV Perl_utf8_distance(pTHX_ U8 *a, U8 *b); PERL_CALLCONV U8* Perl_utf8_hop(pTHX_ U8 *s, I32 off); PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len); PERL_CALLCONV U8* Perl_bytes_to_utf8(pTHX_ U8 *s, STRLEN *len); @@ -357,8 +357,8 @@ Perl_utf8_to_uv_simple(pTHX_ U8* s, STRLEN* retlen) =for apidoc|utf8_length|U8 *s|U8 *e Return the length of the UTF-8 char encoded string C<s> in characters. -Stops at string C<e>. If C<e E<lt> s> or if the scan would end up -past C<e>, return -1. +Stops at C<e> (inclusive). If C<e E<lt> s> or if the scan would end +up past C<e>, croaks. =cut */ @@ -369,12 +369,12 @@ Perl_utf8_length(pTHX_ U8* s, U8* e) STRLEN len = 0; if (e < s) - return -1; + Perl_croak(aTHX_ "panic: utf8_length: unexpected end"); while (s < e) { - STRLEN t = UTF8SKIP(s); + U8 t = UTF8SKIP(s); if (e - s < t) - return -1; + Perl_croak(aTHX_ "panic: utf8_length: unaligned end"); s += t; len++; } @@ -385,22 +385,32 @@ Perl_utf8_length(pTHX_ U8* s, U8* e) /* utf8_distance(a,b) returns the number of UTF8 characters between the pointers a and b */ -I32 +IV Perl_utf8_distance(pTHX_ U8 *a, U8 *b) { - I32 off = 0; + IV off = 0; + if (a < b) { while (a < b) { - a += UTF8SKIP(a); + U8 c = UTF8SKIP(a); + + if (b - a < c) + Perl_croak(aTHX_ "panic: utf8_distance: unaligned end"); + a += c; off--; } } else { while (b < a) { - b += UTF8SKIP(b); + U8 c = UTF8SKIP(b); + + if (a - b < c) + Perl_croak(aTHX_ "panic: utf8_distance: unaligned end"); + b += c; off++; } } + return off; } |