diff options
-rw-r--r-- | cpan/Encode/Encode.pm | 2 | ||||
-rw-r--r-- | cpan/Encode/Encode.xs | 2 | ||||
-rw-r--r-- | embed.fnc | 6 | ||||
-rw-r--r-- | embed.h | 6 | ||||
-rw-r--r-- | mathoms.c | 96 | ||||
-rw-r--r-- | proto.h | 14 | ||||
-rw-r--r-- | utf8.c | 100 |
7 files changed, 119 insertions, 107 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm index 6f8d85949a..3487daabe1 100644 --- a/cpan/Encode/Encode.pm +++ b/cpan/Encode/Encode.pm @@ -7,7 +7,7 @@ use warnings; use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; our $VERSION; BEGIN { - $VERSION = sprintf "%d.%02d", q$Revision: 3.02 $ =~ /(\d+)/g; + $VERSION = sprintf "%d.%02d", q$Revision: 3.03 $ =~ /(\d+)/g; require XSLoader; XSLoader::load( __PACKAGE__, $VERSION ); } diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs index 0d66ec36d9..823917ca71 100644 --- a/cpan/Encode/Encode.xs +++ b/cpan/Encode/Encode.xs @@ -231,7 +231,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 * if (dir == enc->f_utf8) { STRLEN clen; UV ch = - utf8n_to_uvuni(s+slen, (tlen-sdone-slen), + utf8n_to_uvchr(s+slen, (tlen-sdone-slen), &clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); /* if non-representable multibyte prefix at end of current buffer - break*/ if (clen > tlen - sdone - slen) break; @@ -2067,7 +2067,7 @@ AxTp |U8* |bytes_from_utf8_loc|NN const U8 *s \ |NN bool *is_utf8p \ |NULLOK const U8 ** first_unconverted Apxd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *lenp -ApdD |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen +ApdDb |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen CbpdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen CbpD |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen AMpd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen @@ -2098,7 +2098,7 @@ CTp |UV |_utf8n_to_uvchr_msgs_helper \ |NULLOK U32 * errors \ |NULLOK AV ** msgs CipTRd |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen -Cdp |UV |utf8n_to_uvuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags +CdbDp |UV |utf8n_to_uvuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags Adm |U8* |uvchr_to_utf8 |NN U8 *d|UV uv Cp |U8* |uvuni_to_utf8 |NN U8 *d|UV uv @@ -2106,7 +2106,7 @@ Adm |U8* |uvchr_to_utf8_flags |NN U8 *d|UV uv|UV flags Admx |U8* |uvchr_to_utf8_flags_msgs|NN U8 *d|UV uv|UV flags|NULLOK HV ** msgs CMpd |U8* |uvoffuni_to_utf8_flags |NN U8 *d|UV uv|const UV flags Cp |U8* |uvoffuni_to_utf8_flags_msgs|NN U8 *d|UV uv|const UV flags|NULLOK HV** msgs -Cdp |U8* |uvuni_to_utf8_flags |NN U8 *d|UV uv|UV flags +CdpbD |U8* |uvuni_to_utf8_flags |NN U8 *d|UV uv|UV flags Apd |char* |pv_uni_display |NN SV *dsv|NN const U8 *spv|STRLEN len|STRLEN pvlim|UV flags ApdR |char* |sv_uni_display |NN SV *dsv|NN SV *ssv|STRLEN pvlim|UV flags EXpR |Size_t |_inverse_folds |const UV cp \ @@ -723,17 +723,23 @@ #define utf8_hop_safe Perl_utf8_hop_safe #define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b) #define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b) +#ifndef NO_MATHOMS #define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b) +#endif #define utf8_to_uvchr_buf_helper(a,b,c) Perl_utf8_to_uvchr_buf_helper(aTHX_ a,b,c) #ifndef NO_MATHOMS #define utf8_to_uvuni(a,b) Perl_utf8_to_uvuni(aTHX_ a,b) #endif #define utf8_to_uvuni_buf(a,b,c) Perl_utf8_to_uvuni_buf(aTHX_ a,b,c) #define utf8n_to_uvchr_msgs Perl_utf8n_to_uvchr_msgs +#ifndef NO_MATHOMS #define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d) +#endif #define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d) #define uvuni_to_utf8(a,b) Perl_uvuni_to_utf8(aTHX_ a,b) +#ifndef NO_MATHOMS #define uvuni_to_utf8_flags(a,b,c) Perl_uvuni_to_utf8_flags(aTHX_ a,b,c) +#endif #define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr #ifndef NO_MATHOMS #define valid_utf8_to_uvuni(a,b) Perl_valid_utf8_to_uvuni(aTHX_ a,b) @@ -1321,6 +1321,102 @@ Perl_sv_2pvbyte(pTHX_ SV *sv, STRLEN *const lp) return sv_2pvbyte(sv, lp); } +U8 * +Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) +{ + PERL_ARGS_ASSERT_UVUNI_TO_UTF8; + + return uvoffuni_to_utf8_flags(d, uv, 0); +} + +/* +=for apidoc utf8n_to_uvuni + +Instead use L<perlapi/utf8_to_uvchr_buf>, or rarely, L<perlapi/utf8n_to_uvchr>. + +This function was useful for code that wanted to handle both EBCDIC and +ASCII platforms with Unicode properties, but starting in Perl v5.20, the +distinctions between the platforms have mostly been made invisible to most +code, so this function is quite unlikely to be what you want. If you do need +this precise functionality, use instead +C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|perlapi/utf8_to_uvchr_buf>> +or C<L<NATIVE_TO_UNI(utf8n_to_uvchr(...))|perlapi/utf8n_to_uvchr>>. + +=cut +*/ + +UV +Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) +{ + PERL_ARGS_ASSERT_UTF8N_TO_UVUNI; + + return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags)); +} + +/* +=for apidoc uvuni_to_utf8_flags + +Instead you almost certainly want to use L<perlapi/uvchr_to_utf8> or +L<perlapi/uvchr_to_utf8_flags>. + +This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>, +which itself, while not deprecated, should be used only in isolated +circumstances. These functions were useful for code that wanted to handle +both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl +v5.20, the distinctions between the platforms have mostly been made invisible +to most code, so this function is quite unlikely to be what you want. + +=cut +*/ + +U8 * +Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) +{ + PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS; + + return uvoffuni_to_utf8_flags(d, uv, flags); +} + +/* +=for apidoc utf8_to_uvchr + +Returns the native code point of the first character in the string C<s> +which is assumed to be in UTF-8 encoding; C<retlen> will be set to the +length, in bytes, of that character. + +Some, but not all, UTF-8 malformations are detected, and in fact, some +malformed input could cause reading beyond the end of the input buffer, which +is why this function is deprecated. Use L</utf8_to_uvchr_buf> instead. + +If C<s> points to one of the detected malformations, and UTF8 warnings are +enabled, zero is returned and C<*retlen> is set (if C<retlen> isn't +C<NULL>) to -1. If those warnings are off, the computed value if well-defined (or +the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen> +is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the +next possible position in C<s> that could begin a non-malformed character. +See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned. + +=cut +*/ + +UV +Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) +{ + PERL_ARGS_ASSERT_UTF8_TO_UVCHR; + + /* This function is unsafe if malformed UTF-8 input is given it, which is + * why the function is deprecated. If the first byte of the input + * indicates that there are more bytes remaining in the sequence that forms + * the character than there are in the input buffer, it can read past the + * end. But we can make it safe if the input string happens to be + * NUL-terminated, as many strings in Perl are, by refusing to read past a + * NUL, which is what UTF8_CHK_SKIP() does. A NUL indicates the start of + * the next character anyway. If the input isn't NUL-terminated, the + * function remains unsafe, as it always has been. */ + + return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen); +} + GCC_DIAG_RESTORE #endif /* NO_MATHOMS */ @@ -3909,10 +3909,12 @@ PERL_CALLCONV STRLEN Perl_utf8_length(pTHX_ const U8* s, const U8 *e) PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp); #define PERL_ARGS_ASSERT_UTF8_TO_BYTES \ assert(s); assert(lenp) +#ifndef NO_MATHOMS PERL_CALLCONV UV Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) __attribute__deprecated__; #define PERL_ARGS_ASSERT_UTF8_TO_UVCHR \ assert(s) +#endif PERL_CALLCONV UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen); #define PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF \ @@ -3945,9 +3947,13 @@ PERL_STATIC_INLINE UV Perl_utf8n_to_uvchr_msgs(const U8 *s, STRLEN curlen, STRLE #define PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS \ assert(s) #endif -PERL_CALLCONV UV Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags); +#ifndef NO_MATHOMS +PERL_CALLCONV UV Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) + __attribute__deprecated__; #define PERL_ARGS_ASSERT_UTF8N_TO_UVUNI \ assert(s) +#endif + PERL_CALLCONV void Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* idop, OP* arg); #define PERL_ARGS_ASSERT_UTILIZE \ assert(idop) @@ -3966,9 +3972,13 @@ PERL_CALLCONV U8* Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV PERL_CALLCONV U8* Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv); #define PERL_ARGS_ASSERT_UVUNI_TO_UTF8 \ assert(d) -PERL_CALLCONV U8* Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags); +#ifndef NO_MATHOMS +PERL_CALLCONV U8* Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) + __attribute__deprecated__; #define PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS \ assert(d) +#endif + #ifndef PERL_NO_INLINE_FUNCTIONS PERL_STATIC_INLINE UV Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen) __attribute__warn_unused_result__; @@ -4437,106 +4437,6 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, return 1; } -/* XXX The next two functions should likely be moved to mathoms.c once all - * occurrences of them are removed from the core; some cpan-upstream modules - * still use them */ - -U8 * -Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) -{ - PERL_ARGS_ASSERT_UVUNI_TO_UTF8; - - return uvoffuni_to_utf8_flags(d, uv, 0); -} - -/* -=for apidoc utf8n_to_uvuni - -Instead use L<perlapi/utf8_to_uvchr_buf>, or rarely, L<perlapi/utf8n_to_uvchr>. - -This function was useful for code that wanted to handle both EBCDIC and -ASCII platforms with Unicode properties, but starting in Perl v5.20, the -distinctions between the platforms have mostly been made invisible to most -code, so this function is quite unlikely to be what you want. If you do need -this precise functionality, use instead -C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|perlapi/utf8_to_uvchr_buf>> -or C<L<NATIVE_TO_UNI(utf8n_to_uvchr(...))|perlapi/utf8n_to_uvchr>>. - -=cut -*/ - -UV -Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) -{ - PERL_ARGS_ASSERT_UTF8N_TO_UVUNI; - - return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags)); -} - -/* -=for apidoc uvuni_to_utf8_flags - -Instead you almost certainly want to use L<perlapi/uvchr_to_utf8> or -L<perlapi/uvchr_to_utf8_flags>. - -This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>, -which itself, while not deprecated, should be used only in isolated -circumstances. These functions were useful for code that wanted to handle -both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl -v5.20, the distinctions between the platforms have mostly been made invisible -to most code, so this function is quite unlikely to be what you want. - -=cut -*/ - -U8 * -Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) -{ - PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS; - - return uvoffuni_to_utf8_flags(d, uv, flags); -} - -/* -=for apidoc utf8_to_uvchr - -Returns the native code point of the first character in the string C<s> -which is assumed to be in UTF-8 encoding; C<retlen> will be set to the -length, in bytes, of that character. - -Some, but not all, UTF-8 malformations are detected, and in fact, some -malformed input could cause reading beyond the end of the input buffer, which -is why this function is deprecated. Use L</utf8_to_uvchr_buf> instead. - -If C<s> points to one of the detected malformations, and UTF8 warnings are -enabled, zero is returned and C<*retlen> is set (if C<retlen> isn't -C<NULL>) to -1. If those warnings are off, the computed value if well-defined (or -the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen> -is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the -next possible position in C<s> that could begin a non-malformed character. -See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned. - -=cut -*/ - -UV -Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) -{ - PERL_ARGS_ASSERT_UTF8_TO_UVCHR; - - /* This function is unsafe if malformed UTF-8 input is given it, which is - * why the function is deprecated. If the first byte of the input - * indicates that there are more bytes remaining in the sequence that forms - * the character than there are in the input buffer, it can read past the - * end. But we can make it safe if the input string happens to be - * NUL-terminated, as many strings in Perl are, by refusing to read past a - * NUL, which is what UTF8_CHK_SKIP() does. A NUL indicates the start of - * the next character anyway. If the input isn't NUL-terminated, the - * function remains unsafe, as it always has been. */ - - return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen); -} - /* * ex: set ts=8 sts=4 sw=4 et: */ |