From 81cd54e3d8dc0f62b7c4bf5206036c9493ef5300 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Wed, 10 Sep 2003 06:54:02 +0000 Subject: A new UTF-8 API, Perl_is_utf8_string_loc(), a variant of Perl_utf8_is_string(). p4raw-id: //depot/perl@21152 --- utf8.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'utf8.c') diff --git a/utf8.c b/utf8.c index 21d0f08a19..ad8758e3d7 100644 --- a/utf8.c +++ b/utf8.c @@ -256,6 +256,55 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) return TRUE; } +/* +=for apidoc A|bool|is_utf8_string_loc|U8 *s|STRLEN len|U8 **p + +Like is_ut8_string but store the location of the failure in +the last argument. + +=cut +*/ + +bool +Perl_is_utf8_string_loc(pTHX_ U8 *s, STRLEN len, U8 **p) +{ + U8* x = s; + U8* send; + STRLEN c; + + if (!len) + len = strlen((char *)s); + send = s + len; + + while (x < send) { + /* Inline the easy bits of is_utf8_char() here for speed... */ + if (UTF8_IS_INVARIANT(*x)) + c = 1; + else if (!UTF8_IS_START(*x)) { + if (p) + *p = x; + return FALSE; + } + else { + /* ... and call is_utf8_char() only if really needed. */ + c = is_utf8_char(x); + if (!c) { + if (p) + *p = x; + return FALSE; + } + } + x += c; + } + if (x != send) { + if (p) + *p = x; + return FALSE; + } + + return TRUE; +} + /* =for apidoc A|UV|utf8n_to_uvuni|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags -- cgit v1.2.1