summaryrefslogtreecommitdiff
path: root/pod/perlapi.pod
diff options
context:
space:
mode:
authorSimon Cozens <simon@netthink.co.uk>2001-01-16 13:42:30 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-01-16 15:42:04 +0000
commiteebe148573e5a07582cda46391148ca89c563ade (patch)
treed9e4d82d250996b9854a3b3c7c3a61cde15b9283 /pod/perlapi.pod
parent77003bb1f5b79e478d4e39dbf22a7d33aacd2fd5 (diff)
downloadperl-eebe148573e5a07582cda46391148ca89c563ade.tar.gz
Re: API Cleanup
To: perl5-porters@perl.org Date: Tue, 16 Jan 2001 13:42:30 +0000 Message-ID: <20010116134230.A13420@pembro26.pmb.ox.ac.uk> Subject: [PATCH] utf8.c documentation Date: Tue, 16 Jan 2001 13:52:48 +0000 Message-ID: <20010116135248.A13496@pembro26.pmb.ox.ac.uk> Subject: Re: API Cleanup From: Simon Cozens <simon@cozens.net> Date: Tue, 16 Jan 2001 14:58:55 +0000 Message-ID: <20010116145855.A13794@pembro26.pmb.ox.ac.uk> UTF-8 doc patches. p4raw-id: //depot/perl@8452
Diffstat (limited to 'pod/perlapi.pod')
-rw-r--r--pod/perlapi.pod77
1 files changed, 56 insertions, 21 deletions
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index e676431210..25fe18a0b6 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -188,7 +188,10 @@ Converts a string C<s> of length C<len> from ASCII into UTF8 encoding.
Returns a pointer to the newly-created string, and sets C<len> to
reflect the new length.
- U8 * bytes_to_utf8(U8 *s, STRLEN *len)
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+ U8* bytes_to_utf8(U8 *s, STRLEN *len)
=for hackers
Found in file utf8.c
@@ -1013,6 +1016,27 @@ character.
=for hackers
Found in file handy.h
+=item is_utf8_char
+
+Tests if some arbitrary number of bytes begins in a valid UTF-8 character.
+The actual number of bytes in the UTF-8 character will be returned if it
+is valid, otherwise 0.
+
+ STRLEN is_utf8_char(U8 *p)
+
+=for hackers
+Found in file utf8.c
+
+=item is_utf8_string
+
+Returns true if first C<len> bytes of the given string form valid a UTF8
+string, false otherwise.
+
+ bool is_utf8_string(U8 *s, STRLEN len)
+
+=for hackers
+Found in file utf8.c
+
=item items
Variable which is setup by C<xsubpp> to indicate the number of
@@ -2396,19 +2420,19 @@ false, defined or undefined. Does not handle 'get' magic.
=for hackers
Found in file sv.h
-=item SvTYPE
-
-Returns the type of the SV. See C<svtype>.
+=item svtype
- svtype SvTYPE(SV* sv)
+An enum of flags for Perl types. These are found in the file B<sv.h>
+in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
=for hackers
Found in file sv.h
-=item svtype
+=item SvTYPE
-An enum of flags for Perl types. These are found in the file B<sv.h>
-in the C<svtype> enum. Test these flags with the C<SvTYPE> macro.
+Returns the type of the SV. See C<svtype>.
+
+ svtype SvTYPE(SV* sv)
=for hackers
Found in file sv.h
@@ -3247,16 +3271,6 @@ Converts the specified character to uppercase.
=for hackers
Found in file handy.h
-=item U8 *s
-
-Returns true if first C<len> bytes of the given string form valid a UTF8
-string, false otherwise.
-
- is_utf8_string U8 *s(STRLEN len)
-
-=for hackers
-Found in file utf8.c
-
=item utf8_distance
Returns the number of UTF8 characters between the UTF-8 pointers C<a>
@@ -3302,7 +3316,10 @@ Unlike C<bytes_to_utf8>, this over-writes the original string, and
updates len to contain the new length.
Returns zero on failure, setting C<len> to -1.
- U8 * utf8_to_bytes(U8 *s, STRLEN *len)
+NOTE: this function is experimental and may change or be
+removed without notice.
+
+ U8* utf8_to_bytes(U8 *s, STRLEN *len)
=for hackers
Found in file utf8.c
@@ -3324,7 +3341,7 @@ length of the UTF-8 character in bytes, and zero will be returned.
The C<flags> can also contain various flags to allow deviations from
the strict UTF-8 encoding (see F<utf8.h>).
- U8* s utf8_to_uv(STRLEN curlen, STRLEN *retlen, U32 flags)
+ UV utf8_to_uv(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags)
=for hackers
Found in file utf8.c
@@ -3338,7 +3355,25 @@ length, in bytes, of that character.
If C<s> does not point to a well-formed UTF8 character, zero is
returned and retlen is set, if possible, to -1.
- U8* s utf8_to_uv_simple(STRLEN *retlen)
+ UV utf8_to_uv_simple(U8 *s, STRLEN* retlen)
+
+=for hackers
+Found in file utf8.c
+
+=item uv_to_utf8
+
+Adds the UTF8 representation of the Unicode codepoint C<uv> to the end
+of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free
+bytes available. The return value is the pointer to the byte after the
+end of the new character. In other words,
+
+ d = uv_to_utf8(d, uv);
+
+is the recommended Unicode-aware way of saying
+
+ *(d++) = uv;
+
+ U8* uv_to_utf8(U8 *d, UV uv)
=for hackers
Found in file utf8.c