summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpan/Encode/Encode.pm2
-rw-r--r--cpan/Encode/Encode.xs2
-rw-r--r--embed.fnc6
-rw-r--r--embed.h6
-rw-r--r--mathoms.c96
-rw-r--r--proto.h14
-rw-r--r--utf8.c100
7 files changed, 119 insertions, 107 deletions
diff --git a/cpan/Encode/Encode.pm b/cpan/Encode/Encode.pm
index 6f8d85949a..3487daabe1 100644
--- a/cpan/Encode/Encode.pm
+++ b/cpan/Encode/Encode.pm
@@ -7,7 +7,7 @@ use warnings;
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
our $VERSION;
BEGIN {
- $VERSION = sprintf "%d.%02d", q$Revision: 3.02 $ =~ /(\d+)/g;
+ $VERSION = sprintf "%d.%02d", q$Revision: 3.03 $ =~ /(\d+)/g;
require XSLoader;
XSLoader::load( __PACKAGE__, $VERSION );
}
diff --git a/cpan/Encode/Encode.xs b/cpan/Encode/Encode.xs
index 0d66ec36d9..823917ca71 100644
--- a/cpan/Encode/Encode.xs
+++ b/cpan/Encode/Encode.xs
@@ -231,7 +231,7 @@ encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, U8 *
if (dir == enc->f_utf8) {
STRLEN clen;
UV ch =
- utf8n_to_uvuni(s+slen, (tlen-sdone-slen),
+ utf8n_to_uvchr(s+slen, (tlen-sdone-slen),
&clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY);
/* if non-representable multibyte prefix at end of current buffer - break*/
if (clen > tlen - sdone - slen) break;
diff --git a/embed.fnc b/embed.fnc
index e8ed8938be..51c15e8560 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2067,7 +2067,7 @@ AxTp |U8* |bytes_from_utf8_loc|NN const U8 *s \
|NN bool *is_utf8p \
|NULLOK const U8 ** first_unconverted
Apxd |U8* |bytes_to_utf8 |NN const U8 *s|NN STRLEN *lenp
-ApdD |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
+ApdDb |UV |utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
CbpdD |UV |utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
CbpD |UV |valid_utf8_to_uvuni |NN const U8 *s|NULLOK STRLEN *retlen
AMpd |UV |utf8_to_uvchr_buf |NN const U8 *s|NN const U8 *send|NULLOK STRLEN *retlen
@@ -2098,7 +2098,7 @@ CTp |UV |_utf8n_to_uvchr_msgs_helper \
|NULLOK U32 * errors \
|NULLOK AV ** msgs
CipTRd |UV |valid_utf8_to_uvchr |NN const U8 *s|NULLOK STRLEN *retlen
-Cdp |UV |utf8n_to_uvuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
+CdbDp |UV |utf8n_to_uvuni|NN const U8 *s|STRLEN curlen|NULLOK STRLEN *retlen|U32 flags
Adm |U8* |uvchr_to_utf8 |NN U8 *d|UV uv
Cp |U8* |uvuni_to_utf8 |NN U8 *d|UV uv
@@ -2106,7 +2106,7 @@ Adm |U8* |uvchr_to_utf8_flags |NN U8 *d|UV uv|UV flags
Admx |U8* |uvchr_to_utf8_flags_msgs|NN U8 *d|UV uv|UV flags|NULLOK HV ** msgs
CMpd |U8* |uvoffuni_to_utf8_flags |NN U8 *d|UV uv|const UV flags
Cp |U8* |uvoffuni_to_utf8_flags_msgs|NN U8 *d|UV uv|const UV flags|NULLOK HV** msgs
-Cdp |U8* |uvuni_to_utf8_flags |NN U8 *d|UV uv|UV flags
+CdpbD |U8* |uvuni_to_utf8_flags |NN U8 *d|UV uv|UV flags
Apd |char* |pv_uni_display |NN SV *dsv|NN const U8 *spv|STRLEN len|STRLEN pvlim|UV flags
ApdR |char* |sv_uni_display |NN SV *dsv|NN SV *ssv|STRLEN pvlim|UV flags
EXpR |Size_t |_inverse_folds |const UV cp \
diff --git a/embed.h b/embed.h
index 6d0346af0b..8152b16555 100644
--- a/embed.h
+++ b/embed.h
@@ -723,17 +723,23 @@
#define utf8_hop_safe Perl_utf8_hop_safe
#define utf8_length(a,b) Perl_utf8_length(aTHX_ a,b)
#define utf8_to_bytes(a,b) Perl_utf8_to_bytes(aTHX_ a,b)
+#ifndef NO_MATHOMS
#define utf8_to_uvchr(a,b) Perl_utf8_to_uvchr(aTHX_ a,b)
+#endif
#define utf8_to_uvchr_buf_helper(a,b,c) Perl_utf8_to_uvchr_buf_helper(aTHX_ a,b,c)
#ifndef NO_MATHOMS
#define utf8_to_uvuni(a,b) Perl_utf8_to_uvuni(aTHX_ a,b)
#endif
#define utf8_to_uvuni_buf(a,b,c) Perl_utf8_to_uvuni_buf(aTHX_ a,b,c)
#define utf8n_to_uvchr_msgs Perl_utf8n_to_uvchr_msgs
+#ifndef NO_MATHOMS
#define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d)
+#endif
#define uvoffuni_to_utf8_flags_msgs(a,b,c,d) Perl_uvoffuni_to_utf8_flags_msgs(aTHX_ a,b,c,d)
#define uvuni_to_utf8(a,b) Perl_uvuni_to_utf8(aTHX_ a,b)
+#ifndef NO_MATHOMS
#define uvuni_to_utf8_flags(a,b,c) Perl_uvuni_to_utf8_flags(aTHX_ a,b,c)
+#endif
#define valid_utf8_to_uvchr Perl_valid_utf8_to_uvchr
#ifndef NO_MATHOMS
#define valid_utf8_to_uvuni(a,b) Perl_valid_utf8_to_uvuni(aTHX_ a,b)
diff --git a/mathoms.c b/mathoms.c
index b0122f95ca..a0728f6913 100644
--- a/mathoms.c
+++ b/mathoms.c
@@ -1321,6 +1321,102 @@ Perl_sv_2pvbyte(pTHX_ SV *sv, STRLEN *const lp)
return sv_2pvbyte(sv, lp);
}
+U8 *
+Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
+{
+ PERL_ARGS_ASSERT_UVUNI_TO_UTF8;
+
+ return uvoffuni_to_utf8_flags(d, uv, 0);
+}
+
+/*
+=for apidoc utf8n_to_uvuni
+
+Instead use L<perlapi/utf8_to_uvchr_buf>, or rarely, L<perlapi/utf8n_to_uvchr>.
+
+This function was useful for code that wanted to handle both EBCDIC and
+ASCII platforms with Unicode properties, but starting in Perl v5.20, the
+distinctions between the platforms have mostly been made invisible to most
+code, so this function is quite unlikely to be what you want. If you do need
+this precise functionality, use instead
+C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|perlapi/utf8_to_uvchr_buf>>
+or C<L<NATIVE_TO_UNI(utf8n_to_uvchr(...))|perlapi/utf8n_to_uvchr>>.
+
+=cut
+*/
+
+UV
+Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+{
+ PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
+
+ return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags));
+}
+
+/*
+=for apidoc uvuni_to_utf8_flags
+
+Instead you almost certainly want to use L<perlapi/uvchr_to_utf8> or
+L<perlapi/uvchr_to_utf8_flags>.
+
+This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>,
+which itself, while not deprecated, should be used only in isolated
+circumstances. These functions were useful for code that wanted to handle
+both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl
+v5.20, the distinctions between the platforms have mostly been made invisible
+to most code, so this function is quite unlikely to be what you want.
+
+=cut
+*/
+
+U8 *
+Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+{
+ PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
+
+ return uvoffuni_to_utf8_flags(d, uv, flags);
+}
+
+/*
+=for apidoc utf8_to_uvchr
+
+Returns the native code point of the first character in the string C<s>
+which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
+length, in bytes, of that character.
+
+Some, but not all, UTF-8 malformations are detected, and in fact, some
+malformed input could cause reading beyond the end of the input buffer, which
+is why this function is deprecated. Use L</utf8_to_uvchr_buf> instead.
+
+If C<s> points to one of the detected malformations, and UTF8 warnings are
+enabled, zero is returned and C<*retlen> is set (if C<retlen> isn't
+C<NULL>) to -1. If those warnings are off, the computed value if well-defined (or
+the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
+is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
+next possible position in C<s> that could begin a non-malformed character.
+See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned.
+
+=cut
+*/
+
+UV
+Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
+{
+ PERL_ARGS_ASSERT_UTF8_TO_UVCHR;
+
+ /* This function is unsafe if malformed UTF-8 input is given it, which is
+ * why the function is deprecated. If the first byte of the input
+ * indicates that there are more bytes remaining in the sequence that forms
+ * the character than there are in the input buffer, it can read past the
+ * end. But we can make it safe if the input string happens to be
+ * NUL-terminated, as many strings in Perl are, by refusing to read past a
+ * NUL, which is what UTF8_CHK_SKIP() does. A NUL indicates the start of
+ * the next character anyway. If the input isn't NUL-terminated, the
+ * function remains unsafe, as it always has been. */
+
+ return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen);
+}
+
GCC_DIAG_RESTORE
#endif /* NO_MATHOMS */
diff --git a/proto.h b/proto.h
index 266ba5983b..de52524d01 100644
--- a/proto.h
+++ b/proto.h
@@ -3909,10 +3909,12 @@ PERL_CALLCONV STRLEN Perl_utf8_length(pTHX_ const U8* s, const U8 *e)
PERL_CALLCONV U8* Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp);
#define PERL_ARGS_ASSERT_UTF8_TO_BYTES \
assert(s); assert(lenp)
+#ifndef NO_MATHOMS
PERL_CALLCONV UV Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
__attribute__deprecated__;
#define PERL_ARGS_ASSERT_UTF8_TO_UVCHR \
assert(s)
+#endif
PERL_CALLCONV UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen);
#define PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF \
@@ -3945,9 +3947,13 @@ PERL_STATIC_INLINE UV Perl_utf8n_to_uvchr_msgs(const U8 *s, STRLEN curlen, STRLE
#define PERL_ARGS_ASSERT_UTF8N_TO_UVCHR_MSGS \
assert(s)
#endif
-PERL_CALLCONV UV Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags);
+#ifndef NO_MATHOMS
+PERL_CALLCONV UV Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+ __attribute__deprecated__;
#define PERL_ARGS_ASSERT_UTF8N_TO_UVUNI \
assert(s)
+#endif
+
PERL_CALLCONV void Perl_utilize(pTHX_ int aver, I32 floor, OP* version, OP* idop, OP* arg);
#define PERL_ARGS_ASSERT_UTILIZE \
assert(idop)
@@ -3966,9 +3972,13 @@ PERL_CALLCONV U8* Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV uv, const UV
PERL_CALLCONV U8* Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv);
#define PERL_ARGS_ASSERT_UVUNI_TO_UTF8 \
assert(d)
-PERL_CALLCONV U8* Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags);
+#ifndef NO_MATHOMS
+PERL_CALLCONV U8* Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+ __attribute__deprecated__;
#define PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS \
assert(d)
+#endif
+
#ifndef PERL_NO_INLINE_FUNCTIONS
PERL_STATIC_INLINE UV Perl_valid_utf8_to_uvchr(const U8 *s, STRLEN *retlen)
__attribute__warn_unused_result__;
diff --git a/utf8.c b/utf8.c
index 471ad0465f..9b1595c424 100644
--- a/utf8.c
+++ b/utf8.c
@@ -4437,106 +4437,6 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1,
return 1;
}
-/* XXX The next two functions should likely be moved to mathoms.c once all
- * occurrences of them are removed from the core; some cpan-upstream modules
- * still use them */
-
-U8 *
-Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv)
-{
- PERL_ARGS_ASSERT_UVUNI_TO_UTF8;
-
- return uvoffuni_to_utf8_flags(d, uv, 0);
-}
-
-/*
-=for apidoc utf8n_to_uvuni
-
-Instead use L<perlapi/utf8_to_uvchr_buf>, or rarely, L<perlapi/utf8n_to_uvchr>.
-
-This function was useful for code that wanted to handle both EBCDIC and
-ASCII platforms with Unicode properties, but starting in Perl v5.20, the
-distinctions between the platforms have mostly been made invisible to most
-code, so this function is quite unlikely to be what you want. If you do need
-this precise functionality, use instead
-C<L<NATIVE_TO_UNI(utf8_to_uvchr_buf(...))|perlapi/utf8_to_uvchr_buf>>
-or C<L<NATIVE_TO_UNI(utf8n_to_uvchr(...))|perlapi/utf8n_to_uvchr>>.
-
-=cut
-*/
-
-UV
-Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
-{
- PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
-
- return NATIVE_TO_UNI(utf8n_to_uvchr(s, curlen, retlen, flags));
-}
-
-/*
-=for apidoc uvuni_to_utf8_flags
-
-Instead you almost certainly want to use L<perlapi/uvchr_to_utf8> or
-L<perlapi/uvchr_to_utf8_flags>.
-
-This function is a deprecated synonym for L</uvoffuni_to_utf8_flags>,
-which itself, while not deprecated, should be used only in isolated
-circumstances. These functions were useful for code that wanted to handle
-both EBCDIC and ASCII platforms with Unicode properties, but starting in Perl
-v5.20, the distinctions between the platforms have mostly been made invisible
-to most code, so this function is quite unlikely to be what you want.
-
-=cut
-*/
-
-U8 *
-Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
-{
- PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
-
- return uvoffuni_to_utf8_flags(d, uv, flags);
-}
-
-/*
-=for apidoc utf8_to_uvchr
-
-Returns the native code point of the first character in the string C<s>
-which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
-length, in bytes, of that character.
-
-Some, but not all, UTF-8 malformations are detected, and in fact, some
-malformed input could cause reading beyond the end of the input buffer, which
-is why this function is deprecated. Use L</utf8_to_uvchr_buf> instead.
-
-If C<s> points to one of the detected malformations, and UTF8 warnings are
-enabled, zero is returned and C<*retlen> is set (if C<retlen> isn't
-C<NULL>) to -1. If those warnings are off, the computed value if well-defined (or
-the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
-is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
-next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvchr> for details on when the REPLACEMENT CHARACTER is returned.
-
-=cut
-*/
-
-UV
-Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen)
-{
- PERL_ARGS_ASSERT_UTF8_TO_UVCHR;
-
- /* This function is unsafe if malformed UTF-8 input is given it, which is
- * why the function is deprecated. If the first byte of the input
- * indicates that there are more bytes remaining in the sequence that forms
- * the character than there are in the input buffer, it can read past the
- * end. But we can make it safe if the input string happens to be
- * NUL-terminated, as many strings in Perl are, by refusing to read past a
- * NUL, which is what UTF8_CHK_SKIP() does. A NUL indicates the start of
- * the next character anyway. If the input isn't NUL-terminated, the
- * function remains unsafe, as it always has been. */
-
- return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen);
-}
-
/*
* ex: set ts=8 sts=4 sw=4 et:
*/