diff options
author | Gisle Aas <gisle@aas.no> | 2001-03-27 03:30:24 -0800 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2001-03-27 20:04:18 +0000 |
commit | d5ce4a7cd9d16a6d235e1b45a0768034bbdac1f0 (patch) | |
tree | bb93040ba9584bcd9f854c77a295ace06136d1c1 | |
parent | fd713a85eb6c0ac3df6fa25ed6c5b990a5c3d174 (diff) | |
download | perl-d5ce4a7cd9d16a6d235e1b45a0768034bbdac1f0.tar.gz |
Re: perl@9359 breaks HTML::Parser
Message-ID: <lrr8zjxb5b.fsf@caliper.ActiveState.com>
Clarify the UTF-8 issues of the API docs.
(Slightly reworded and expanded.)
p4raw-id: //depot/perl@9386
-rw-r--r-- | pod/perlapi.pod | 26 | ||||
-rw-r--r-- | sv.c | 24 | ||||
-rw-r--r-- | sv.h | 8 |
3 files changed, 36 insertions, 22 deletions
diff --git a/pod/perlapi.pod b/pod/perlapi.pod index aaa9e90abd..711db4d828 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -2179,6 +2179,7 @@ Found in file sv.h =item SvPOK_only Tells an SV that it is a string and disables all other OK bits. +Will also turn off the UTF8 status. void SvPOK_only(SV* sv) @@ -2187,8 +2188,8 @@ Found in file sv.h =item SvPOK_only_UTF8 -Tells an SV that it is a UTF8 string (do not use frivolously) -and disables all other OK bits. +Tells an SV that it is a string and disables all other OK bits, +and leaves the UTF8 status as it was. void SvPOK_only_UTF8(SV* sv) @@ -2494,7 +2495,8 @@ Found in file sv.h =item SvUTF8_on -Tells an SV that it is a string and encoded in UTF8. Do not use frivolously. +Turn on the UTF8 status of an SV (the data is not changed, just the flag). +Do not use frivolously. void SvUTF8_on(SV *sv) @@ -2544,7 +2546,8 @@ Found in file sv.c =item sv_catpv Concatenates the string onto the end of the string which is in the SV. -Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>. +If the SV has the UTF8 status set, then the bytes appended should be +valid UTF8. Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>. void sv_catpv(SV* sv, const char* ptr) @@ -2553,9 +2556,13 @@ Found in file sv.c =item sv_catpvf -Processes its arguments like C<sprintf> and appends the formatted output -to an SV. Handles 'get' magic, but not 'set' magic. C<SvSETMAGIC()> must -typically be called after calling this function to handle 'set' magic. +Processes its arguments like C<sprintf> and appends the formatted +output to an SV. If the appended data contains "wide" characters +(including, but not limited to, SVs with a UTF-8 PV formatted with %s, +and characters >255 formatted with %c), the original SV might get +upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. +C<SvSETMAGIC()> must typically be called after calling this function +to handle 'set' magic. void sv_catpvf(SV* sv, const char* pat, ...) @@ -2574,8 +2581,9 @@ Found in file sv.c =item sv_catpvn Concatenates the string onto the end of the string which is in the SV. The -C<len> indicates number of bytes to copy. Handles 'get' magic, but not -'set' magic. See C<sv_catpvn_mg>. +C<len> indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>. void sv_catpvn(SV* sv, const char* ptr, STRLEN len) @@ -3817,8 +3817,9 @@ Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) /* like set but assuming =for apidoc sv_catpvn Concatenates the string onto the end of the string which is in the SV. The -C<len> indicates number of bytes to copy. Handles 'get' magic, but not -'set' magic. See C<sv_catpvn_mg>. +C<len> indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>. =cut */ @@ -3916,10 +3917,10 @@ Perl_sv_catsv_mg(pTHX_ SV *dsv, register SV *ssv) =for apidoc sv_catpv Concatenates the string onto the end of the string which is in the SV. -Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>. +If the SV has the UTF8 status set, then the bytes appended should be +valid UTF8. Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>. -=cut -*/ +=cut */ void Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr) @@ -6760,12 +6761,15 @@ Perl_sv_catpvf_mg_nocontext(SV *sv, const char* pat, ...) /* =for apidoc sv_catpvf -Processes its arguments like C<sprintf> and appends the formatted output -to an SV. Handles 'get' magic, but not 'set' magic. C<SvSETMAGIC()> must -typically be called after calling this function to handle 'set' magic. +Processes its arguments like C<sprintf> and appends the formatted +output to an SV. If the appended data contains "wide" characters +(including, but not limited to, SVs with a UTF-8 PV formatted with %s, +and characters >255 formatted with %c), the original SV might get +upgraded to UTF-8. Handles 'get' magic, but not 'set' magic. +C<SvSETMAGIC()> must typically be called after calling this function +to handle 'set' magic. -=cut -*/ +=cut */ void Perl_sv_catpvf(pTHX_ SV *sv, const char* pat, ...) @@ -478,6 +478,7 @@ Unsets the PV status of an SV. =for apidoc Am|void|SvPOK_only|SV* sv Tells an SV that it is a string and disables all other OK bits. +Will also turn off the UTF8 status. =for apidoc Am|bool|SvOOK|SV* sv Returns a boolean indicating whether the SvIVX is a valid offset value for @@ -584,14 +585,15 @@ Set the length of the string which is in the SV. See C<SvCUR>. Returns a boolean indicating whether the SV contains UTF-8 encoded data. =for apidoc Am|void|SvUTF8_on|SV *sv -Tells an SV that it is a string and encoded in UTF8. Do not use frivolously. +Turn on the UTF8 status of an SV (the data is not changed, just the flag). +Do not use frivolously. =for apidoc Am|void|SvUTF8_off|SV *sv Unsets the UTF8 status of an SV. =for apidoc Am|void|SvPOK_only_UTF8|SV* sv -Tells an SV that it is a UTF8 string (do not use frivolously) -and disables all other OK bits. +Tells an SV that it is a string and disables all other OK bits, +and leaves the UTF8 status as it was. =cut */ |