summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--embed.h4
-rwxr-xr-xembed.pl1
-rw-r--r--global.sym1
-rw-r--r--objXSUB.h4
-rwxr-xr-xperlapi.c7
-rw-r--r--pod/perlapi.pod24
-rw-r--r--proto.h1
-rw-r--r--utf8.c24
8 files changed, 59 insertions, 7 deletions
diff --git a/embed.h b/embed.h
index 4c95dc2ff4..fa199fbe15 100644
--- a/embed.h
+++ b/embed.h
@@ -305,6 +305,7 @@
#define to_uni_title_lc Perl_to_uni_title_lc
#define to_uni_lower_lc Perl_to_uni_lower_lc
#define is_utf8_char Perl_is_utf8_char
+#define is_utf8_string Perl_is_utf8_string
#define is_utf8_alnum Perl_is_utf8_alnum
#define is_utf8_alnumc Perl_is_utf8_alnumc
#define is_utf8_idfirst Perl_is_utf8_idfirst
@@ -1758,6 +1759,7 @@
#define to_uni_title_lc(a) Perl_to_uni_title_lc(aTHX_ a)
#define to_uni_lower_lc(a) Perl_to_uni_lower_lc(aTHX_ a)
#define is_utf8_char(a) Perl_is_utf8_char(aTHX_ a)
+#define is_utf8_string(a,b) Perl_is_utf8_string(aTHX_ a,b)
#define is_utf8_alnum(a) Perl_is_utf8_alnum(aTHX_ a)
#define is_utf8_alnumc(a) Perl_is_utf8_alnumc(aTHX_ a)
#define is_utf8_idfirst(a) Perl_is_utf8_idfirst(aTHX_ a)
@@ -3445,6 +3447,8 @@
#define to_uni_lower_lc Perl_to_uni_lower_lc
#define Perl_is_utf8_char CPerlObj::Perl_is_utf8_char
#define is_utf8_char Perl_is_utf8_char
+#define Perl_is_utf8_string CPerlObj::Perl_is_utf8_string
+#define is_utf8_string Perl_is_utf8_string
#define Perl_is_utf8_alnum CPerlObj::Perl_is_utf8_alnum
#define is_utf8_alnum Perl_is_utf8_alnum
#define Perl_is_utf8_alnumc CPerlObj::Perl_is_utf8_alnumc
diff --git a/embed.pl b/embed.pl
index 5b63a35a7c..96603be691 100755
--- a/embed.pl
+++ b/embed.pl
@@ -1620,6 +1620,7 @@ Ap |U32 |to_uni_upper_lc|U32 c
Ap |U32 |to_uni_title_lc|U32 c
Ap |U32 |to_uni_lower_lc|U32 c
Ap |int |is_utf8_char |U8 *p
+Ap |bool |is_utf8_string |U8 *s|STRLEN len
Ap |bool |is_utf8_alnum |U8 *p
Ap |bool |is_utf8_alnumc |U8 *p
Ap |bool |is_utf8_idfirst|U8 *p
diff --git a/global.sym b/global.sym
index 8a9b00890f..1f03877bdc 100644
--- a/global.sym
+++ b/global.sym
@@ -185,6 +185,7 @@ Perl_to_uni_upper_lc
Perl_to_uni_title_lc
Perl_to_uni_lower_lc
Perl_is_utf8_char
+Perl_is_utf8_string
Perl_is_utf8_alnum
Perl_is_utf8_alnumc
Perl_is_utf8_idfirst
diff --git a/objXSUB.h b/objXSUB.h
index ad5aa092fa..e30258bc0f 100644
--- a/objXSUB.h
+++ b/objXSUB.h
@@ -707,6 +707,10 @@
#define Perl_is_utf8_char pPerl->Perl_is_utf8_char
#undef is_utf8_char
#define is_utf8_char Perl_is_utf8_char
+#undef Perl_is_utf8_string
+#define Perl_is_utf8_string pPerl->Perl_is_utf8_string
+#undef is_utf8_string
+#define is_utf8_string Perl_is_utf8_string
#undef Perl_is_utf8_alnum
#define Perl_is_utf8_alnum pPerl->Perl_is_utf8_alnum
#undef is_utf8_alnum
diff --git a/perlapi.c b/perlapi.c
index ccb7c8fc32..29428d587b 100755
--- a/perlapi.c
+++ b/perlapi.c
@@ -1326,6 +1326,13 @@ Perl_is_utf8_char(pTHXo_ U8 *p)
return ((CPerlObj*)pPerl)->Perl_is_utf8_char(p);
}
+#undef Perl_is_utf8_string
+bool
+Perl_is_utf8_string(pTHXo_ U8 *s, STRLEN len)
+{
+ return ((CPerlObj*)pPerl)->Perl_is_utf8_string(s, len);
+}
+
#undef Perl_is_utf8_alnum
bool
Perl_is_utf8_alnum(pTHXo_ U8 *p)
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index 1e488097b1..86ad5bd1bb 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -2282,19 +2282,19 @@ false, defined or undefined. Does not handle 'get' magic.
=for hackers
Found in file sv.h
-=item svtype
+=item SvTYPE
-An enum of flags for Perl types. These are found in the file B<sv.h>
-in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV. See C<svtype>.
+
+ svtype SvTYPE(SV* sv)
=for hackers
Found in file sv.h
-=item SvTYPE
-
-Returns the type of the SV. See C<svtype>.
+=item svtype
- svtype SvTYPE(SV* sv)
+An enum of flags for Perl types. These are found in the file B<sv.h>
+in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
=for hackers
Found in file sv.h
@@ -2939,6 +2939,16 @@ Converts the specified character to uppercase.
=for hackers
Found in file handy.h
+=item U8 *s
+
+Returns true if first C<len> bytes of the given string form valid a UTF8
+string, false otherwise.
+
+ bool_utf8_string U8 *s(STRLEN len)
+
+=for hackers
+Found in file utf8.c
+
=item utf8_to_bytes
Converts a string C<s> of length C<len> from UTF8 into ASCII encoding.
diff --git a/proto.h b/proto.h
index c1d9a66fec..fc34840da8 100644
--- a/proto.h
+++ b/proto.h
@@ -367,6 +367,7 @@ PERL_CALLCONV U32 Perl_to_uni_upper_lc(pTHX_ U32 c);
PERL_CALLCONV U32 Perl_to_uni_title_lc(pTHX_ U32 c);
PERL_CALLCONV U32 Perl_to_uni_lower_lc(pTHX_ U32 c);
PERL_CALLCONV int Perl_is_utf8_char(pTHX_ U8 *p);
+PERL_CALLCONV bool Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len);
PERL_CALLCONV bool Perl_is_utf8_alnum(pTHX_ U8 *p);
PERL_CALLCONV bool Perl_is_utf8_alnumc(pTHX_ U8 *p);
PERL_CALLCONV bool Perl_is_utf8_idfirst(pTHX_ U8 *p);
diff --git a/utf8.c b/utf8.c
index 9bb89a4ca6..666ec3476a 100644
--- a/utf8.c
+++ b/utf8.c
@@ -134,6 +134,30 @@ Perl_is_utf8_char(pTHX_ U8 *s)
return len;
}
+/*
+=for apidoc Am|bool_utf8_string|U8 *s|STRLEN len
+
+Returns true if first C<len> bytes of the given string form valid a UTF8
+string, false otherwise.
+
+=cut
+*/
+
+bool
+Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len)
+{
+ U8* x=s;
+ U8* send=s+len;
+ int c;
+ while (x < send) {
+ c = is_utf8_char(x);
+ x += c;
+ if (!c || x > send)
+ return 0;
+ }
+ return 1;
+}
+
UV
Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen)
{