diff options
author | Karl Williamson <khw@cpan.org> | 2022-06-28 09:33:22 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2022-06-28 10:10:20 -0600 |
commit | 31798a685edf1feafb2f251ac5096a193518cf2e (patch) | |
tree | edc4f929ac8e1e188b6ad03f72ca837334f79274 | |
parent | b179236d59d4d0be43d9307d30b9e97609c9f96d (diff) | |
download | perl-31798a685edf1feafb2f251ac5096a193518cf2e.tar.gz |
Partially revert remove deprecated functions"
This partially reverts commit 7008caa915ad99e650acf2aea40612b5e48b7ba2.
Another portion was reverted by
f847c0b367a25dc6028136ddc7085a7301da7a82.
It turns out that two more of the removed functions are still used in
cpan in several places. Revert their removal for now until a better
alternative than any existing is in place and backported in ppport.h.
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | embed.h | 6 | ||||
-rw-r--r-- | mathoms.c | 75 | ||||
-rw-r--r-- | proto.h | 14 |
4 files changed, 97 insertions, 0 deletions
@@ -2593,6 +2593,8 @@ CxTdp |U8* |bytes_from_utf8_loc|NN const U8 *s \ |NN bool *is_utf8p \ |NULLOK const U8 ** first_unconverted Apxd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *lenp +ApdDb |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen +CbpdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen AMpd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen Cip |UV |utf8_to_uvchr_buf_helper|NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen p |bool |check_utf8_print |NN const U8 *s|const STRLEN len @@ -703,7 +703,13 @@ #define utf8_hop_safe Perl_utf8_hop_safe #define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b) #define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b) +#ifndef NO_MATHOMS +#define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b) +#endif #define utf8_to_uvchr_buf_helper(a,b,c) Perl_utf8_to_uvchr_buf_helper(aTHX_ a,b,c) +#ifndef NO_MATHOMS +#define utf8_to_uvuni(a,b) Perl_utf8_to_uvuni(aTHX_ a,b) +#endif #define utf8n_to_uvchr_msgs Perl_utf8n_to_uvchr_msgs #ifndef NO_MATHOMS #define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d) @@ -821,6 +821,40 @@ Perl_is_utf8_char_buf(const U8 *buf, const U8* buf_end) return isUTF8_CHAR(buf, buf_end); } +/* +=for apidoc_section $unicode +=for apidoc utf8_to_uvuni + +Returns the Unicode code point of the first character in the string C<s> +which is assumed to be in UTF-8 encoding; C<retlen> will be set to the +length, in bytes, of that character. + +Some, but not all, UTF-8 malformations are detected, and in fact, some +malformed input could cause reading beyond the end of the input buffer, which +is one reason why this function is deprecated. The other is that only in +extremely limited circumstances should the Unicode versus native code point be +of any interest to you. + +If C<s> points to one of the detected malformations, and UTF8 warnings are +enabled, zero is returned and C<*retlen> is set (if C<retlen> doesn't point to +NULL) to -1. If those warnings are off, the computed value if well-defined (or +the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen> +is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the +next possible position in C<s> that could begin a non-malformed character. +See L<perlapi/utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned. + +=cut +*/ + +UV +Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) +{ + PERL_UNUSED_CONTEXT; + PERL_ARGS_ASSERT_UTF8_TO_UVUNI; + + return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen)); +} + /* return ptr to little string in big string, NULL if not found */ /* The original version of this routine was donated by Corey Satten. */ @@ -895,6 +929,47 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags) return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags)); } +/* +=for apidoc_section $unicode +=for apidoc utf8_to_uvchr + +Returns the native code point of the first character in the string C<s> +which is assumed to be in UTF-8 encoding; C<retlen> will be set to the +length, in bytes, of that character. + +Some, but not all, UTF-8 malformations are detected, and in fact, some +malformed input could cause reading beyond the end of the input buffer, which +is why this function is deprecated. Use L</utf8_to_uvchr_buf> instead. + +If C<s> points to one of the detected malformations, and UTF8 warnings are +enabled, zero is returned and C<*retlen> is set (if C<retlen> isn't +C<NULL>) to -1. If those warnings are off, the computed value if well-defined (or +the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen> +is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the +next possible position in C<s> that could begin a non-malformed character. +See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned. + +=cut +*/ + +UV +Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) +{ + PERL_ARGS_ASSERT_UTF8_TO_UVCHR; + + /* This function is unsafe if malformed UTF-8 input is given it, which is + * why the function is deprecated. If the first byte of the input + * indicates that there are more bytes remaining in the sequence that forms + * the character than there are in the input buffer, it can read past the + * end. But we can make it safe if the input string happens to be + * NUL-terminated, as many strings in Perl are, by refusing to read past a + * NUL, which is what UTF8_CHK_SKIP() does. A NUL indicates the start of + * the next character anyway. If the input isn't NUL-terminated, the + * function remains unsafe, as it always has been. */ + + return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen); +} + GCC_DIAG_RESTORE #endif /* NO_MATHOMS */ @@ -4401,6 +4401,13 @@ PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp); PERL_CALLCONV U8* Perl_utf8_to_utf16_base(pTHX_ U8* s, U8 *d, Size_t bytelen, Size_t *newlen, const bool high, const bool low); #define PERL_ARGS_ASSERT_UTF8_TO_UTF16_BASE \ assert(s); assert(d); assert(newlen) +#ifndef NO_MATHOMS +PERL_CALLCONV UV Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) + __attribute__deprecated__; +#define PERL_ARGS_ASSERT_UTF8_TO_UVCHR \ + assert(s) +#endif + PERL_CALLCONV UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen); #define PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF \ assert(s); assert(send) @@ -4409,6 +4416,13 @@ PERL_STATIC_INLINE UV Perl_utf8_to_uvchr_buf_helper(pTHX_ const U8 *s, const U8 #define PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF_HELPER \ assert(s); assert(send) #endif +#ifndef NO_MATHOMS +PERL_CALLCONV UV Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) + __attribute__deprecated__; +#define PERL_ARGS_ASSERT_UTF8_TO_UVUNI \ + assert(s) +#endif + PERL_CALLCONV UV Perl_utf8n_to_uvchr(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags); #define PERL_ARGS_ASSERT_UTF8N_TO_UVCHR \ assert(s) |