summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGisle Aas <gisle@aas.no>2001-03-27 03:30:24 -0800
committerJarkko Hietaniemi <jhi@iki.fi>2001-03-27 20:04:18 +0000
commitd5ce4a7cd9d16a6d235e1b45a0768034bbdac1f0 (patch)
treebb93040ba9584bcd9f854c77a295ace06136d1c1
parentfd713a85eb6c0ac3df6fa25ed6c5b990a5c3d174 (diff)
downloadperl-d5ce4a7cd9d16a6d235e1b45a0768034bbdac1f0.tar.gz
Re: perl@9359 breaks HTML::Parser
Message-ID: <lrr8zjxb5b.fsf@caliper.ActiveState.com> Clarify the UTF-8 issues of the API docs. (Slightly reworded and expanded.) p4raw-id: //depot/perl@9386
-rw-r--r--pod/perlapi.pod26
-rw-r--r--sv.c24
-rw-r--r--sv.h8
3 files changed, 36 insertions, 22 deletions
diff --git a/pod/perlapi.pod b/pod/perlapi.pod
index aaa9e90abd..711db4d828 100644
--- a/pod/perlapi.pod
+++ b/pod/perlapi.pod
@@ -2179,6 +2179,7 @@ Found in file sv.h
=item SvPOK_only
Tells an SV that it is a string and disables all other OK bits.
+Will also turn off the UTF8 status.
void SvPOK_only(SV* sv)
@@ -2187,8 +2188,8 @@ Found in file sv.h
=item SvPOK_only_UTF8
-Tells an SV that it is a UTF8 string (do not use frivolously)
-and disables all other OK bits.
+Tells an SV that it is a string and disables all other OK bits,
+and leaves the UTF8 status as it was.
void SvPOK_only_UTF8(SV* sv)
@@ -2494,7 +2495,8 @@ Found in file sv.h
=item SvUTF8_on
-Tells an SV that it is a string and encoded in UTF8. Do not use frivolously.
+Turn on the UTF8 status of an SV (the data is not changed, just the flag).
+Do not use frivolously.
void SvUTF8_on(SV *sv)
@@ -2544,7 +2546,8 @@ Found in file sv.c
=item sv_catpv
Concatenates the string onto the end of the string which is in the SV.
-Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>.
+If the SV has the UTF8 status set, then the bytes appended should be
+valid UTF8. Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>.
void sv_catpv(SV* sv, const char* ptr)
@@ -2553,9 +2556,13 @@ Found in file sv.c
=item sv_catpvf
-Processes its arguments like C<sprintf> and appends the formatted output
-to an SV. Handles 'get' magic, but not 'set' magic. C<SvSETMAGIC()> must
-typically be called after calling this function to handle 'set' magic.
+Processes its arguments like C<sprintf> and appends the formatted
+output to an SV. If the appended data contains "wide" characters
+(including, but not limited to, SVs with a UTF-8 PV formatted with %s,
+and characters >255 formatted with %c), the original SV might get
+upgraded to UTF-8. Handles 'get' magic, but not 'set' magic.
+C<SvSETMAGIC()> must typically be called after calling this function
+to handle 'set' magic.
void sv_catpvf(SV* sv, const char* pat, ...)
@@ -2574,8 +2581,9 @@ Found in file sv.c
=item sv_catpvn
Concatenates the string onto the end of the string which is in the SV. The
-C<len> indicates number of bytes to copy. Handles 'get' magic, but not
-'set' magic. See C<sv_catpvn_mg>.
+C<len> indicates number of bytes to copy. If the SV has the UTF8
+status set, then the bytes appended should be valid UTF8.
+Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>.
void sv_catpvn(SV* sv, const char* ptr, STRLEN len)
diff --git a/sv.c b/sv.c
index 75b35a81ff..7e98be6aae 100644
--- a/sv.c
+++ b/sv.c
@@ -3817,8 +3817,9 @@ Perl_sv_chop(pTHX_ register SV *sv, register char *ptr) /* like set but assuming
=for apidoc sv_catpvn
Concatenates the string onto the end of the string which is in the SV. The
-C<len> indicates number of bytes to copy. Handles 'get' magic, but not
-'set' magic. See C<sv_catpvn_mg>.
+C<len> indicates number of bytes to copy. If the SV has the UTF8
+status set, then the bytes appended should be valid UTF8.
+Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>.
=cut
*/
@@ -3916,10 +3917,10 @@ Perl_sv_catsv_mg(pTHX_ SV *dsv, register SV *ssv)
=for apidoc sv_catpv
Concatenates the string onto the end of the string which is in the SV.
-Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>.
+If the SV has the UTF8 status set, then the bytes appended should be
+valid UTF8. Handles 'get' magic, but not 'set' magic. See C<sv_catpv_mg>.
-=cut
-*/
+=cut */
void
Perl_sv_catpv(pTHX_ register SV *sv, register const char *ptr)
@@ -6760,12 +6761,15 @@ Perl_sv_catpvf_mg_nocontext(SV *sv, const char* pat, ...)
/*
=for apidoc sv_catpvf
-Processes its arguments like C<sprintf> and appends the formatted output
-to an SV. Handles 'get' magic, but not 'set' magic. C<SvSETMAGIC()> must
-typically be called after calling this function to handle 'set' magic.
+Processes its arguments like C<sprintf> and appends the formatted
+output to an SV. If the appended data contains "wide" characters
+(including, but not limited to, SVs with a UTF-8 PV formatted with %s,
+and characters >255 formatted with %c), the original SV might get
+upgraded to UTF-8. Handles 'get' magic, but not 'set' magic.
+C<SvSETMAGIC()> must typically be called after calling this function
+to handle 'set' magic.
-=cut
-*/
+=cut */
void
Perl_sv_catpvf(pTHX_ SV *sv, const char* pat, ...)
diff --git a/sv.h b/sv.h
index 2785f14595..ab04b0529a 100644
--- a/sv.h
+++ b/sv.h
@@ -478,6 +478,7 @@ Unsets the PV status of an SV.
=for apidoc Am|void|SvPOK_only|SV* sv
Tells an SV that it is a string and disables all other OK bits.
+Will also turn off the UTF8 status.
=for apidoc Am|bool|SvOOK|SV* sv
Returns a boolean indicating whether the SvIVX is a valid offset value for
@@ -584,14 +585,15 @@ Set the length of the string which is in the SV. See C<SvCUR>.
Returns a boolean indicating whether the SV contains UTF-8 encoded data.
=for apidoc Am|void|SvUTF8_on|SV *sv
-Tells an SV that it is a string and encoded in UTF8. Do not use frivolously.
+Turn on the UTF8 status of an SV (the data is not changed, just the flag).
+Do not use frivolously.
=for apidoc Am|void|SvUTF8_off|SV *sv
Unsets the UTF8 status of an SV.
=for apidoc Am|void|SvPOK_only_UTF8|SV* sv
-Tells an SV that it is a UTF8 string (do not use frivolously)
-and disables all other OK bits.
+Tells an SV that it is a string and disables all other OK bits,
+and leaves the UTF8 status as it was.
=cut
*/