diff options
author | Karl Williamson <khw@cpan.org> | 2014-11-18 22:02:21 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-11-20 21:45:18 -0700 |
commit | 5eef447b12283016e9c57a31362edb829884a7ed (patch) | |
tree | d1bd82d014afa163f7c5aaaac3ee9bd7bff779f8 /toke.c | |
parent | d5b4785c1592d741608f5cff697058cc284db4eb (diff) | |
download | perl-5eef447b12283016e9c57a31362edb829884a7ed.tar.gz |
toke.c: Consistently upgrade under encoding
The documentation says that intermixing above-Latin1 code points with
ones that would be otherwise encoded to something else, like Greek,
causes the encoding to be foregone. Until this commit, this only
happened when the above-latin1 code point came first in the string
constant being scanned; meaning string-order was important. This
changes things to match the documentation
Diffstat (limited to 'toke.c')
-rw-r--r-- | toke.c | 10 |
1 files changed, 7 insertions, 3 deletions
@@ -3189,9 +3189,13 @@ S_scan_const(pTHX_ char *start) SvPOK_on(sv); *d = '\0'; /* See Note on sizing above. */ - sv_utf8_upgrade_flags_grow(sv, - SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, - UNISKIP(uv) + (STRLEN)(send - s) + 1); + sv_utf8_upgrade_flags_grow( + sv, + SV_GMAGIC|SV_FORCE_UTF8_UPGRADE + /* Above-latin1 in string + * implies no encoding */ + |SV_UTF8_NO_ENCODING, + UNISKIP(uv) + (STRLEN)(send - s) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } |