summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2003-09-10 06:54:02 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2003-09-10 06:54:02 +0000
commit81cd54e3d8dc0f62b7c4bf5206036c9493ef5300 (patch)
tree6145c24cb1216bb566d3ae381a9dd9c55fba98ad
parente944adaebcc9a91185478dbc0f0fe933f108b22d (diff)
downloadperl-81cd54e3d8dc0f62b7c4bf5206036c9493ef5300.tar.gz
A new UTF-8 API, Perl_is_utf8_string_loc(), a variant
of Perl_utf8_is_string(). p4raw-id: //depot/perl@21152
-rw-r--r--embed.fnc1
-rw-r--r--embed.h2
-rw-r--r--global.sym1
-rw-r--r--proto.h1
-rw-r--r--utf8.c49
5 files changed, 54 insertions, 0 deletions
diff --git a/embed.fnc b/embed.fnc
index 26d3bd5bbc..ca5014339e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -337,6 +337,7 @@ Ap |bool |is_uni_punct_lc|UV c
Ap |bool |is_uni_xdigit_lc|UV c
Apd |STRLEN |is_utf8_char |U8 *p
Apd |bool |is_utf8_string |U8 *s|STRLEN len
+Apd |bool |is_utf8_string_loc|U8 *s|STRLEN len|U8 **p
Ap |bool |is_utf8_alnum |U8 *p
Ap |bool |is_utf8_alnumc |U8 *p
Ap |bool |is_utf8_idfirst|U8 *p
diff --git a/embed.h b/embed.h
index 873cb04158..5d0e52d34d 100644
--- a/embed.h
+++ b/embed.h
@@ -409,6 +409,7 @@
#define is_uni_xdigit_lc Perl_is_uni_xdigit_lc
#define is_utf8_char Perl_is_utf8_char
#define is_utf8_string Perl_is_utf8_string
+#define is_utf8_string_loc Perl_is_utf8_string_loc
#define is_utf8_alnum Perl_is_utf8_alnum
#define is_utf8_alnumc Perl_is_utf8_alnumc
#define is_utf8_idfirst Perl_is_utf8_idfirst
@@ -2899,6 +2900,7 @@
#define is_uni_xdigit_lc(a) Perl_is_uni_xdigit_lc(aTHX_ a)
#define is_utf8_char(a) Perl_is_utf8_char(aTHX_ a)
#define is_utf8_string(a,b) Perl_is_utf8_string(aTHX_ a,b)
+#define is_utf8_string_loc(a,b,c) Perl_is_utf8_string_loc(aTHX_ a,b,c)
#define is_utf8_alnum(a) Perl_is_utf8_alnum(aTHX_ a)
#define is_utf8_alnumc(a) Perl_is_utf8_alnumc(aTHX_ a)
#define is_utf8_idfirst(a) Perl_is_utf8_idfirst(aTHX_ a)
diff --git a/global.sym b/global.sym
index 34961986af..83bec56dc7 100644
--- a/global.sym
+++ b/global.sym
@@ -204,6 +204,7 @@ Perl_is_uni_punct_lc
Perl_is_uni_xdigit_lc
Perl_is_utf8_char
Perl_is_utf8_string
+Perl_is_utf8_string_loc
Perl_is_utf8_alnum
Perl_is_utf8_alnumc
Perl_is_utf8_idfirst
diff --git a/proto.h b/proto.h
index 97ae843332..b6a584ca45 100644
--- a/proto.h
+++ b/proto.h
@@ -316,6 +316,7 @@ PERL_CALLCONV bool Perl_is_uni_punct_lc(pTHX_ UV c);
PERL_CALLCONV bool Perl_is_uni_xdigit_lc(pTHX_ UV c);
PERL_CALLCONV STRLEN Perl_is_utf8_char(pTHX_ U8 *p);
PERL_CALLCONV bool Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len);
+PERL_CALLCONV bool Perl_is_utf8_string_loc(pTHX_ U8 *s, STRLEN len, U8 **p);
PERL_CALLCONV bool Perl_is_utf8_alnum(pTHX_ U8 *p);
PERL_CALLCONV bool Perl_is_utf8_alnumc(pTHX_ U8 *p);
PERL_CALLCONV bool Perl_is_utf8_idfirst(pTHX_ U8 *p);
diff --git a/utf8.c b/utf8.c
index 21d0f08a19..ad8758e3d7 100644
--- a/utf8.c
+++ b/utf8.c
@@ -257,6 +257,55 @@ Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
}
/*
+=for apidoc A|bool|is_utf8_string_loc|U8 *s|STRLEN len|U8 **p
+
+Like is_ut8_string but store the location of the failure in
+the last argument.
+
+=cut
+*/
+
+bool
+Perl_is_utf8_string_loc(pTHX_ U8 *s, STRLEN len, U8 **p)
+{
+ U8* x = s;
+ U8* send;
+ STRLEN c;
+
+ if (!len)
+ len = strlen((char *)s);
+ send = s + len;
+
+ while (x < send) {
+ /* Inline the easy bits of is_utf8_char() here for speed... */
+ if (UTF8_IS_INVARIANT(*x))
+ c = 1;
+ else if (!UTF8_IS_START(*x)) {
+ if (p)
+ *p = x;
+ return FALSE;
+ }
+ else {
+ /* ... and call is_utf8_char() only if really needed. */
+ c = is_utf8_char(x);
+ if (!c) {
+ if (p)
+ *p = x;
+ return FALSE;
+ }
+ }
+ x += c;
+ }
+ if (x != send) {
+ if (p)
+ *p = x;
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/*
=for apidoc A|UV|utf8n_to_uvuni|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags
Bottom level UTF-8 decode routine.