utf8.c: Stop using two functions

This is in preparation for deprecating these functions, to force any code that has been using these functions to change. Since the Unicode tables are now stored in native order, these functions should only rarely be needed. However, the functionality of these is needed, and in actuality, on ASCII platforms, the native functions are #defined to these. So what this commit does is rename the functions to something else, and create wrappers with the old names, so that anyone using them will get the deprecation when it actually goes into effect: we are waiting for CPAN files distributed with the core to change before doing the deprecation. According to cpan.grep.me, this should affect fewer than 10 additional CPAN distributions.
author: Karl Williamson <public@khwilliamson.com> 2013-02-26 12:08:50 -0700
committer: Karl Williamson <public@khwilliamson.com> 2013-08-29 09:55:58 -0600
commit: 378516de21aea9be747038c25876881aaf56e166 (patch)
tree: c4130c17547e61ec24210b629f60f18895d29396 /utf8.c
parent: a27992ccf5d1a0c50667fb21ba8ca973f50a7508 (diff)
download: perl-378516de21aea9be747038c25876881aaf56e166.tar.gz
1 files changed, 21 insertions, 18 deletions
diff --git a/utf8.c b/utf8.c
index 22f5331aa2..945e31c7bf 100644
--- a/utf8.c
+++ b/utf8.c
@@ -87,7 +87,7 @@ Perl_is_ascii_string(const U8 *s, STRLEN len)
 }
 
 /*
-=for apidoc uvuni_to_utf8_flags
+=for apidoc uvoffuni_to_utf8_flags
 
 THIS FUNCTION SHOULD BE USED IN ONLY VERY SPECIALIZED CIRCUMSTANCES.
 
@@ -96,11 +96,11 @@ of the string C<d>; C<d> should have at least C<UTF8_MAXBYTES+1> free
 bytes available. The return value is the pointer to the byte after the
 end of the new character. In other words,
 
-    d = uvuni_to_utf8_flags(d, uv, flags);
+    d = uvoffuni_to_utf8_flags(d, uv, flags);
 
 or, in most cases,
 
-    d = uvuni_to_utf8_flags(d, uv, 0);
+    d = uvoffuni_to_utf8_flags(d, uv, 0);
 
 This is the Unicode-aware way of saying
 
@@ -137,9 +137,9 @@ DISALLOW flags.
 */
 
 U8 *
-Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
+Perl_uvoffuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
 {
-    PERL_ARGS_ASSERT_UVUNI_TO_UTF8_FLAGS;
+    PERL_ARGS_ASSERT_UVOFFUNI_TO_UTF8_FLAGS;
 
     /* The first problematic code point is the first surrogate */
     if (uv >= UNICODE_SURROGATE_FIRST
@@ -475,10 +475,13 @@ Perl_is_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el)
 
 /*
 
-=for apidoc utf8n_to_uvuni
+=for apidoc utf8n_to_uvoffuni
+
+THIS FUNCTION SHOULD BE USED IN ONLY VERY SPECIALIZED CIRCUMSTANCES.
 
 Bottom level UTF-8 decode routine.
-Returns the code point value of the first character in the string C<s>,
+Returns the official Unicode (not native) code point value of the first
+character in the string C<s>,
 which is assumed to be in UTF-8 (or UTF-EBCDIC) encoding, and no longer than
 C<curlen> bytes; C<*retlen> (if C<retlen> isn't NULL) will be set to
 the length, in bytes, of that character.
@@ -553,7 +556,7 @@ Most code should use L</utf8_to_uvchr_buf>() rather than call this directly.
 */
 
 UV
-Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
+Perl_utf8n_to_uvoffuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
 {
     dVAR;
     const U8 * const s0 = s;
@@ -571,7 +574,7 @@ Perl_utf8n_to_uvuni(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen, U32 flags)
 
     const char* const malformed_text = "Malformed UTF-8 character";
 
-    PERL_ARGS_ASSERT_UTF8N_TO_UVUNI;
+    PERL_ARGS_ASSERT_UTF8N_TO_UVOFFUNI;
 
     /* The order of malformation tests here is important.  We should consume as
      * few bytes as possible in order to not skip any valid character.  This is
@@ -905,7 +908,7 @@ NULL) to -1.  If those warnings are off, the computed value, if well-defined
 (or the Unicode REPLACEMENT CHARACTER if not), is silently returned, and
 C<*retlen> is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is
 the next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is
 returned.
 
 =cut
@@ -978,7 +981,7 @@ NULL) to -1.  If those warnings are off, the computed value if well-defined (or
 the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
 is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
 next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
 
 =cut
 */
@@ -1008,7 +1011,7 @@ NULL) to -1.  If those warnings are off, the computed value if well-defined (or
 the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
 is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
 next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
 
 =cut
 */
@@ -1021,7 +1024,7 @@ Perl_utf8_to_uvuni_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen)
     assert(send > s);
 
     /* Call the low level routine asking for checks */
-    return Perl_utf8n_to_uvuni(aTHX_ s, send -s, retlen,
+    return Perl_utf8n_to_uvoffuni(aTHX_ s, send -s, retlen,
 			       ckWARN_d(WARN_UTF8) ? 0 : UTF8_ALLOW_ANY);
 }
 
@@ -1057,7 +1060,7 @@ NULL) to -1.  If those warnings are off, the computed value if well-defined (or
 the Unicode REPLACEMENT CHARACTER, if not) is silently returned, and C<*retlen>
 is set (if C<retlen> isn't NULL) so that (S<C<s> + C<*retlen>>) is the
 next possible position in C<s> that could begin a non-malformed character.
-See L</utf8n_to_uvuni> for details on when the REPLACEMENT CHARACTER is returned.
+See L</utf8n_to_uvoffuni> for details on when the REPLACEMENT CHARACTER is returned.
 
 =cut
 */
@@ -4191,7 +4194,7 @@ Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv)
 {
     PERL_ARGS_ASSERT_UVCHR_TO_UTF8;
 
-    return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
+    return Perl_uvoffuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0);
 }
 
 U8 *
@@ -4199,7 +4202,7 @@ Perl_uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
 {
     PERL_ARGS_ASSERT_UVCHR_TO_UTF8_FLAGS;
 
-    return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags);
+    return Perl_uvoffuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags);
 }
 
 /*
@@ -4210,7 +4213,7 @@ C<s>
 which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
 length, in bytes, of that character.
 
-C<length> and C<flags> are the same as L</utf8n_to_uvuni>().
+C<length> and C<flags> are the same as L</utf8n_to_uvoffuni>().
 
 =cut
 */
@@ -4221,7 +4224,7 @@ UV
 Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, STRLEN *retlen,
 U32 flags)
 {
-    const UV uv = Perl_utf8n_to_uvuni(aTHX_ s, curlen, retlen, flags);
+    const UV uv = Perl_utf8n_to_uvoffuni(aTHX_ s, curlen, retlen, flags);
 
     PERL_ARGS_ASSERT_UTF8N_TO_UVCHR;
author	Karl Williamson <public@khwilliamson.com>	2013-02-26 12:08:50 -0700
committer	Karl Williamson <public@khwilliamson.com>	2013-08-29 09:55:58 -0600
commit	378516de21aea9be747038c25876881aaf56e166 (patch)
tree	c4130c17547e61ec24210b629f60f18895d29396 /utf8.c
parent	a27992ccf5d1a0c50667fb21ba8ca973f50a7508 (diff)
download	perl-378516de21aea9be747038c25876881aaf56e166.tar.gz