diff options
author | Father Chrysostomos <sprout@cpan.org> | 2011-09-29 08:48:38 -0700 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2011-10-06 13:01:10 -0700 |
commit | c682ebef862f40c7b7ed8a6175ecb457b9981787 (patch) | |
tree | 1fd18653eeb152b22027bdae16c29d35e89022d0 /mro.c | |
parent | 204e6232679d0d412347fddd9e5bd0e529da73d5 (diff) | |
download | perl-c682ebef862f40c7b7ed8a6175ecb457b9981787.tar.gz |
mro.c: Correct utf8 and bytes concatenation
The previous commit introduced some code that concatenates a pv on to
an sv and then does SvUTF8_on on the sv if the pv was utf8.
That can’t work if the sv was in Latin-1 (or single-byte) encoding
and contained extra-ASCII characters. Nor can it work if bytes are
appended to a utf8 sv. Both produce mangled utf8.
There is apparently no function apart from sv_catsv that handle
this. So I’ve modified sv_catpvn_flags to handle this if passed the
SV_CATUTF8 (concatenating a utf8 pv) or SV_CATBYTES (cancatenating a
byte pv) flag.
This avoids the overhead of creating a new sv (in fact, sv_catsv
even copies its rhs in some cases, so that would mean creating two
new svs). It might even be worthwhile to redefine sv_catsv in terms
of this....
Diffstat (limited to 'mro.c')
-rw-r--r-- | mro.c | 44 |
1 files changed, 26 insertions, 18 deletions
@@ -767,10 +767,11 @@ Perl_mro_package_moved(pTHX_ HV * const stash, HV * const oldstash, else sv_catpvs(namesv, "::"); } if (GvNAMELEN(gv) != 1) { - sv_catpvn(namesv, GvNAME(gv), GvNAMELEN(gv) - 2); + sv_catpvn_flags( + namesv, GvNAME(gv), GvNAMELEN(gv) - 2, /* skip trailing :: */ - if ( GvNAMEUTF8(gv) ) - SvUTF8_on(namesv); + GvNAMEUTF8(gv) ? SV_CATUTF8 : SV_CATBYTES + ); } } else { @@ -789,10 +790,11 @@ Perl_mro_package_moved(pTHX_ HV * const stash, HV * const oldstash, else sv_catpvs(aname, "::"); } if (GvNAMELEN(gv) != 1) { - sv_catpvn(aname, GvNAME(gv), GvNAMELEN(gv) - 2); + sv_catpvn_flags( + aname, GvNAME(gv), GvNAMELEN(gv) - 2, /* skip trailing :: */ - if ( GvNAMEUTF8(gv) ) - SvUTF8_on(aname); + GvNAMEUTF8(gv) ? SV_CATUTF8 : SV_CATBYTES + ); } av_push((AV *)namesv, aname); } @@ -1127,9 +1129,11 @@ S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, sv_catpvs(aname, ":"); else { sv_catpvs(aname, "::"); - sv_catpvn(aname, key, len-2); - if ( SvUTF8(keysv) ) - SvUTF8_on(aname); + sv_catpvn_flags( + aname, key, len-2, + SvUTF8(keysv) + ? SV_CATUTF8 : SV_CATBYTES + ); } av_push((AV *)subname, aname); } @@ -1139,9 +1143,10 @@ S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, if (len == 1) sv_catpvs(subname, ":"); else { sv_catpvs(subname, "::"); - sv_catpvn(subname, key, len-2); - if ( SvUTF8(keysv) ) - SvUTF8_on(subname); + sv_catpvn_flags( + subname, key, len-2, + SvUTF8(keysv) ? SV_CATUTF8 : SV_CATBYTES + ); } } mro_gather_and_rename( @@ -1209,9 +1214,11 @@ S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, sv_catpvs(aname, ":"); else { sv_catpvs(aname, "::"); - sv_catpvn(aname, key, len-2); - if ( SvUTF8(keysv) ) - SvUTF8_on(aname); + sv_catpvn_flags( + aname, key, len-2, + SvUTF8(keysv) + ? SV_CATUTF8 : SV_CATBYTES + ); } av_push((AV *)subname, aname); } @@ -1221,9 +1228,10 @@ S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, if (len == 1) sv_catpvs(subname, ":"); else { sv_catpvs(subname, "::"); - sv_catpvn(subname, key, len-2); - if ( SvUTF8(keysv) ) - SvUTF8_on(subname); + sv_catpvn_flags( + subname, key, len-2, + SvUTF8(keysv) ? SV_CATUTF8 : SV_CATBYTES + ); } } mro_gather_and_rename( |