diff options
author | Jarkko Hietaniemi <jhi@iki.fi> | 2003-09-10 06:54:02 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2003-09-10 06:54:02 +0000 |
commit | 81cd54e3d8dc0f62b7c4bf5206036c9493ef5300 (patch) | |
tree | 6145c24cb1216bb566d3ae381a9dd9c55fba98ad /utf8.c | |
parent | e944adaebcc9a91185478dbc0f0fe933f108b22d (diff) | |
download | perl-81cd54e3d8dc0f62b7c4bf5206036c9493ef5300.tar.gz |
A new UTF-8 API, Perl_is_utf8_string_loc(), a variant
of Perl_utf8_is_string().
p4raw-id: //depot/perl@21152
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 49 |
1 files changed, 49 insertions, 0 deletions
@@ -257,6 +257,55 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) } /* +=for apidoc A|bool|is_utf8_string_loc|U8 *s|STRLEN len|U8 **p + +Like is_ut8_string but store the location of the failure in +the last argument. + +=cut +*/ + +bool +Perl_is_utf8_string_loc(pTHX_ U8 *s, STRLEN len, U8 **p) +{ + U8* x = s; + U8* send; + STRLEN c; + + if (!len) + len = strlen((char *)s); + send = s + len; + + while (x < send) { + /* Inline the easy bits of is_utf8_char() here for speed... */ + if (UTF8_IS_INVARIANT(*x)) + c = 1; + else if (!UTF8_IS_START(*x)) { + if (p) + *p = x; + return FALSE; + } + else { + /* ... and call is_utf8_char() only if really needed. */ + c = is_utf8_char(x); + if (!c) { + if (p) + *p = x; + return FALSE; + } + } + x += c; + } + if (x != send) { + if (p) + *p = x; + return FALSE; + } + + return TRUE; +} + +/* =for apidoc A|UV|utf8n_to_uvuni|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags Bottom level UTF-8 decode routine. |