Subject: PATCH 5.10 documentation

From: karl williamson <public@khwilliamson.com> Date: Tue, 16 Dec 2008 16:00:34 -0700 Message-ID: <49483312.80804@khwilliamson.com>
author: Steve Peters <steve@fisharerojo.org> 2008-12-19 11:38:31 -0600
committer: Steve Peters <steve@fisharerojo.org> 2008-12-19 11:38:31 -0600
commit: 2bbc8d558d247c6ef91207a12a4650c0bc292dd6 (patch)
tree: f56c82008dc643d8e799b8e21fb9a3c36b64b3b4 /utf8.c
parent: 7df2e4bc09d8ad053532c5f9232b2d713856c938 (diff)
download: perl-2bbc8d558d247c6ef91207a12a4650c0bc292dd6.tar.gz
1 files changed, 13 insertions, 9 deletions
diff --git a/utf8.c b/utf8.c
index ecbc4ea61c..8243793959 100644
--- a/utf8.c
+++ b/utf8.c
@@ -247,9 +247,9 @@ S_is_utf8_char_slow(const U8 *s, const STRLEN len)
 =for apidoc is_utf8_char
 
 Tests if some arbitrary number of bytes begins in a valid UTF-8
-character.  Note that an INVARIANT (i.e. ASCII) character is a valid
-UTF-8 character.  The actual number of bytes in the UTF-8 character
-will be returned if it is valid, otherwise 0.
+character.  Note that an INVARIANT (i.e. ASCII on non-EBCDIC machines)
+character is a valid UTF-8 character.  The actual number of bytes in the UTF-8
+character will be returned if it is valid, otherwise 0.
 
 =cut */
 STRLEN
@@ -648,7 +648,7 @@ Returns the Unicode code point of the first character in the string C<s>
 which is assumed to be in UTF-8 encoding; C<retlen> will be set to the
 length, in bytes, of that character.
 
-This function should only be used when returned UV is considered
+This function should only be used when the returned UV is considered
 an index into the Unicode semantic tables (e.g. swashes).
 
 If C<s> does not point to a well-formed UTF-8 character, zero is
@@ -772,7 +772,7 @@ Perl_utf8_hop(pTHX_ const U8 *s, I32 off)
 /*
 =for apidoc utf8_to_bytes
 
-Converts a string C<s> of length C<len> from UTF-8 into byte encoding.
+Converts a string C<s> of length C<len> from UTF-8 into native byte encoding.
 Unlike C<bytes_to_utf8>, this over-writes the original string, and
 updates len to contain the new length.
 Returns zero on failure, setting C<len> to -1.
@@ -817,12 +817,13 @@ Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *len)
 /*
 =for apidoc bytes_from_utf8
 
-Converts a string C<s> of length C<len> from UTF-8 into byte encoding.
+Converts a string C<s> of length C<len> from UTF-8 into native byte encoding.
 Unlike C<utf8_to_bytes> but like C<bytes_to_utf8>, returns a pointer to
 the newly-created string, and updates C<len> to contain the new
 length.  Returns the original string if no conversion occurs, C<len>
 is unchanged. Do nothing if C<is_utf8> points to 0. Sets C<is_utf8> to
-0 if C<s> is converted or contains all 7bit characters.
+0 if C<s> is converted or consisted entirely of characters that are invariant
+in utf8 (i.e., US-ASCII on non-EBCDIC machines).
 
 =cut
 */
@@ -874,11 +875,14 @@ Perl_bytes_from_utf8(pTHX_ const U8 *s, STRLEN *len, bool *is_utf8)
 /*
 =for apidoc bytes_to_utf8
 
-Converts a string C<s> of length C<len> from ASCII into UTF-8 encoding.
+Converts a string C<s> of length C<len> from the native encoding into UTF-8.
 Returns a pointer to the newly-created string, and sets C<len> to
 reflect the new length.
 
-If you want to convert to UTF-8 from other encodings than ASCII,
+A NUL character will be written after the end of the string.
+
+If you want to convert to UTF-8 from encodings other than
+the native (Latin1 or EBCDIC),
 see sv_recode_to_utf8().
 
 =cut
author	Steve Peters <steve@fisharerojo.org>	2008-12-19 11:38:31 -0600
committer	Steve Peters <steve@fisharerojo.org>	2008-12-19 11:38:31 -0600
commit	2bbc8d558d247c6ef91207a12a4650c0bc292dd6 (patch)
tree	f56c82008dc643d8e799b8e21fb9a3c36b64b3b4 /utf8.c
parent	7df2e4bc09d8ad053532c5f9232b2d713856c938 (diff)
download	perl-2bbc8d558d247c6ef91207a12a4650c0bc292dd6.tar.gz