diff options
author | Simon Cozens <simon@netthink.co.uk> | 2001-01-16 13:42:30 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-01-16 15:42:04 +0000 |
commit | eebe148573e5a07582cda46391148ca89c563ade (patch) | |
tree | d9e4d82d250996b9854a3b3c7c3a61cde15b9283 /pod/perlapi.pod | |
parent | 77003bb1f5b79e478d4e39dbf22a7d33aacd2fd5 (diff) | |
download | perl-eebe148573e5a07582cda46391148ca89c563ade.tar.gz |
Re: API Cleanup
To: perl5-porters@perl.org
Date: Tue, 16 Jan 2001 13:42:30 +0000
Message-ID: <20010116134230.A13420@pembro26.pmb.ox.ac.uk>
Subject: [PATCH] utf8.c documentation
Date: Tue, 16 Jan 2001 13:52:48 +0000
Message-ID: <20010116135248.A13496@pembro26.pmb.ox.ac.uk>
Subject: Re: API Cleanup
From: Simon Cozens <simon@cozens.net>
Date: Tue, 16 Jan 2001 14:58:55 +0000
Message-ID: <20010116145855.A13794@pembro26.pmb.ox.ac.uk>
UTF-8 doc patches.
p4raw-id: //depot/perl@8452
Diffstat (limited to 'pod/perlapi.pod')
-rw-r--r-- | pod/perlapi.pod | 77 |
1 files changed, 56 insertions, 21 deletions
diff --git a/pod/perlapi.pod b/pod/perlapi.pod index e676431210..25fe18a0b6 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -188,7 +188,10 @@ Converts a string C<s> of length C<len> from ASCII into UTF8 encoding. Returns a pointer to the newly-created string, and sets C<len> to reflect the new length. - U8 * bytes_to_utf8(U8 *s, STRLEN *len) +NOTE: this function is experimental and may change or be +removed without notice. + + U8* bytes_to_utf8(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -1013,6 +1016,27 @@ character. =for hackers Found in file handy.h +=item is_utf8_char + +Tests if some arbitrary number of bytes begins in a valid UTF-8 character. +The actual number of bytes in the UTF-8 character will be returned if it +is valid, otherwise 0. + + STRLEN is_utf8_char(U8 *p) + +=for hackers +Found in file utf8.c + +=item is_utf8_string + +Returns true if first C<len> bytes of the given string form valid a UTF8 +string, false otherwise. + + bool is_utf8_string(U8 *s, STRLEN len) + +=for hackers +Found in file utf8.c + =item items Variable which is setup by C<xsubpp> to indicate the number of @@ -2396,19 +2420,19 @@ false, defined or undefined. Does not handle 'get' magic. =for hackers Found in file sv.h -=item SvTYPE - -Returns the type of the SV. See C<svtype>. +=item svtype - svtype SvTYPE(SV* sv) +An enum of flags for Perl types. These are found in the file B<sv.h> +in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. =for hackers Found in file sv.h -=item svtype +=item SvTYPE -An enum of flags for Perl types. These are found in the file B<sv.h> -in the C<svtype> enum. Test these flags with the C<SvTYPE> macro. +Returns the type of the SV. See C<svtype>. + + svtype SvTYPE(SV* sv) =for hackers Found in file sv.h @@ -3247,16 +3271,6 @@ Converts the specified character to uppercase. =for hackers Found in file handy.h -=item U8 *s - -Returns true if first C<len> bytes of the given string form valid a UTF8 -string, false otherwise. - - is_utf8_string U8 *s(STRLEN len) - -=for hackers -Found in file utf8.c - =item utf8_distance Returns the number of UTF8 characters between the UTF-8 pointers C<a> @@ -3302,7 +3316,10 @@ Unlike C<bytes_to_utf8>, this over-writes the original string, and updates len to contain the new length. Returns zero on failure, setting C<len> to -1. - U8 * utf8_to_bytes(U8 *s, STRLEN *len) +NOTE: this function is experimental and may change or be +removed without notice. + + U8* utf8_to_bytes(U8 *s, STRLEN *len) =for hackers Found in file utf8.c @@ -3324,7 +3341,7 @@ length of the UTF-8 character in bytes, and zero will be returned. The C<flags> can also contain various flags to allow deviations from the strict UTF-8 encoding (see F<utf8.h>). - U8* s utf8_to_uv(STRLEN curlen, STRLEN *retlen, U32 flags) + UV utf8_to_uv(U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags) =for hackers Found in file utf8.c @@ -3338,7 +3355,25 @@ length, in bytes, of that character. If C<s> does not point to a well-formed UTF8 character, zero is returned and retlen is set, if possible, to -1. - U8* s utf8_to_uv_simple(STRLEN *retlen) + UV utf8_to_uv_simple(U8 *s, STRLEN* retlen) + +=for hackers +Found in file utf8.c + +=item uv_to_utf8 + +Adds the UTF8 representation of the Unicode codepoint C<uv> to the end +of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free +bytes available. The return value is the pointer to the byte after the +end of the new character. In other words, + + d = uv_to_utf8(d, uv); + +is the recommended Unicode-aware way of saying + + *(d++) = uv; + + U8* uv_to_utf8(U8 *d, UV uv) =for hackers Found in file utf8.c |