diff options
-rw-r--r-- | embed.h | 4 | ||||
-rwxr-xr-x | embed.pl | 1 | ||||
-rw-r--r-- | global.sym | 1 | ||||
-rw-r--r-- | objXSUB.h | 4 | ||||
-rwxr-xr-x | perlapi.c | 7 | ||||
-rw-r--r-- | pod/perlapi.pod | 24 | ||||
-rw-r--r-- | proto.h | 1 | ||||
-rw-r--r-- | utf8.c | 24 |
8 files changed, 59 insertions, 7 deletions
@@ -305,6 +305,7 @@ #define to_uni_title_lc Perl_to_uni_title_lc #define to_uni_lower_lc Perl_to_uni_lower_lc #define is_utf8_char Perl_is_utf8_char +#define is_utf8_string Perl_is_utf8_string #define is_utf8_alnum Perl_is_utf8_alnum #define is_utf8_alnumc Perl_is_utf8_alnumc #define is_utf8_idfirst Perl_is_utf8_idfirst @@ -1758,6 +1759,7 @@ #define to_uni_title_lc(a) Perl_to_uni_title_lc(aTHX_ a) #define to_uni_lower_lc(a) Perl_to_uni_lower_lc(aTHX_ a) #define is_utf8_char(a) Perl_is_utf8_char(aTHX_ a) +#define is_utf8_string(a,b) Perl_is_utf8_string(aTHX_ a,b) #define is_utf8_alnum(a) Perl_is_utf8_alnum(aTHX_ a) #define is_utf8_alnumc(a) Perl_is_utf8_alnumc(aTHX_ a) #define is_utf8_idfirst(a) Perl_is_utf8_idfirst(aTHX_ a) @@ -3445,6 +3447,8 @@ #define to_uni_lower_lc Perl_to_uni_lower_lc #define Perl_is_utf8_char CPerlObj::Perl_is_utf8_char #define is_utf8_char Perl_is_utf8_char +#define Perl_is_utf8_string CPerlObj::Perl_is_utf8_string +#define is_utf8_string Perl_is_utf8_string #define Perl_is_utf8_alnum CPerlObj::Perl_is_utf8_alnum #define is_utf8_alnum Perl_is_utf8_alnum #define Perl_is_utf8_alnumc CPerlObj::Perl_is_utf8_alnumc @@ -1620,6 +1620,7 @@ Ap |U32 |to_uni_upper_lc|U32 c Ap |U32 |to_uni_title_lc|U32 c Ap |U32 |to_uni_lower_lc|U32 c Ap |int |is_utf8_char |U8 *p +Ap |bool |is_utf8_string |U8 *s|STRLEN len Ap |bool |is_utf8_alnum |U8 *p Ap |bool |is_utf8_alnumc |U8 *p Ap |bool |is_utf8_idfirst|U8 *p diff --git a/global.sym b/global.sym index 8a9b00890f..1f03877bdc 100644 --- a/global.sym +++ b/global.sym @@ -185,6 +185,7 @@ Perl_to_uni_upper_lc Perl_to_uni_title_lc Perl_to_uni_lower_lc Perl_is_utf8_char +Perl_is_utf8_string Perl_is_utf8_alnum Perl_is_utf8_alnumc Perl_is_utf8_idfirst @@ -707,6 +707,10 @@ #define Perl_is_utf8_char pPerl->Perl_is_utf8_char #undef is_utf8_char #define is_utf8_char Perl_is_utf8_char +#undef Perl_is_utf8_string +#define Perl_is_utf8_string pPerl->Perl_is_utf8_string +#undef is_utf8_string +#define is_utf8_string Perl_is_utf8_string #undef Perl_is_utf8_alnum #define Perl_is_utf8_alnum pPerl->Perl_is_utf8_alnum #undef is_utf8_alnum @@ -1326,6 +1326,13 @@ Perl_is_utf8_char(pTHXo_ U8 *p) return ((CPerlObj*)pPerl)->Perl_is_utf8_char(p); } +#undef Perl_is_utf8_string +bool +Perl_is_utf8_string(pTHXo_ U8 *s, STRLEN len) +{ + return ((CPerlObj*)pPerl)->Perl_is_utf8_string(s, len); +} + #undef Perl_is_utf8_alnum bool Perl_is_utf8_alnum(pTHXo_ U8 *p) diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 1e488097b1..86ad5bd1bb 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -2282,19 +2282,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B<sv.h> -in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. +Returns the type of the SV. See C<svtype>. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C<svtype>. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B<sv.h> +in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. =for hackers Found in file sv.h @@ -2939,6 +2939,16 @@ Converts the specified character to uppercase. =for hackers Found in file handy.h +=item U8 *s + +Returns true if first C<len> bytes of the given string form valid a UTF8 +string, false otherwise. + + bool_utf8_string U8 *s(STRLEN len) + +=for hackers +Found in file utf8.c + =item utf8_to_bytes Converts a string C<s> of length C<len> from UTF8 into ASCII encoding. @@ -367,6 +367,7 @@ PERL_CALLCONV U32 Perl_to_uni_upper_lc(pTHX_ U32 c); PERL_CALLCONV U32 Perl_to_uni_title_lc(pTHX_ U32 c); PERL_CALLCONV U32 Perl_to_uni_lower_lc(pTHX_ U32 c); PERL_CALLCONV int Perl_is_utf8_char(pTHX_ U8 *p); +PERL_CALLCONV bool Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len); PERL_CALLCONV bool Perl_is_utf8_alnum(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_alnumc(pTHX_ U8 *p); PERL_CALLCONV bool Perl_is_utf8_idfirst(pTHX_ U8 *p); @@ -134,6 +134,30 @@ Perl_is_utf8_char(pTHX_ U8 *s) return len; } +/* +=for apidoc Am|bool_utf8_string|U8 *s|STRLEN len + +Returns true if first C<len> bytes of the given string form valid a UTF8 +string, false otherwise. + +=cut +*/ + +bool +Perl_is_utf8_string(pTHX_ U8 *s, STRLEN len) +{ + U8* x=s; + U8* send=s+len; + int c; + while (x < send) { + c = is_utf8_char(x); + x += c; + if (!c || x > send) + return 0; + } + return 1; +} + UV Perl_utf8_to_uv(pTHX_ U8* s, I32* retlen) { |