summaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-09-10 06:54:02 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-09-10 06:54:02 +0000
commit81cd54e3d8dc0f62b7c4bf5206036c9493ef5300 (patch)
tree6145c24cb1216bb566d3ae381a9dd9c55fba98ad /utf8.c
parente944adaebcc9a91185478dbc0f0fe933f108b22d (diff)
downloadperl-81cd54e3d8dc0f62b7c4bf5206036c9493ef5300.tar.gz
A new UTF-8 API, Perl_is_utf8_string_loc(), a variant
of Perl_utf8_is_string(). p4raw-id: //depot/perl@21152
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c49
1 files changed, 49 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 21d0f08a19..ad8758e3d7 100644
--- a/utf8.c
+++ b/utf8.c
@@ -257,6 +257,55 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
}
/*
+=for apidoc A|bool|is_utf8_string_loc|U8 *s|STRLEN len|U8 **p
+
+Like is_ut8_string but store the location of the failure in
+the last argument.
+
+=cut
+*/
+
+bool
+Perl_is_utf8_string_loc(pTHX_ U8 *s, STRLEN len, U8 **p)
+{
+ U8* x = s;
+ U8* send;
+ STRLEN c;
+
+ if (!len)
+ len = strlen((char *)s);
+ send = s + len;
+
+ while (x < send) {
+ /* Inline the easy bits of is_utf8_char() here for speed... */
+ if (UTF8_IS_INVARIANT(*x))
+ c = 1;
+ else if (!UTF8_IS_START(*x)) {
+ if (p)
+ *p = x;
+ return FALSE;
+ }
+ else {
+ /* ... and call is_utf8_char() only if really needed. */
+ c = is_utf8_char(x);
+ if (!c) {
+ if (p)
+ *p = x;
+ return FALSE;
+ }
+ }
+ x += c;
+ }
+ if (x != send) {
+ if (p)
+ *p = x;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/*
=for apidoc A|UV|utf8n_to_uvuni|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags
Bottom level UTF-8 decode routine.