diff options
author | Yitzchak Scott-Thoennes <sthoenna@efn.org> | 2000-11-10 01:47:15 -0800 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-11-13 04:05:07 +0000 |
commit | a1ca4561f20dbf547f57d39a690790cbe33210da (patch) | |
tree | 83f9af82cb7a2699ca5ca248e9907ac52b3924a8 /pp.c | |
parent | d26ab924bbea5dd5379307deb59c11af3692350b (diff) | |
download | perl-a1ca4561f20dbf547f57d39a690790cbe33210da.tar.gz |
Tweak the definition of the bit complement on UTF-8 data:
if none of the characters in the string are > 0xff,
the result is a complemented byte string, not a (UTF-8)
char string. Based on the summary in
Subject: Re: [ID 20000918.005] ~ on wide chars
Message-ID: <jSDD6gzkgi/T092yn@efn.org>
This should give us the maximum backward (pre-char string)
compatibility and utf8 compatibility. The other alternative
would be to limit the bit complement to be always byte only,
taking the least significant byte of the chars.
p4raw-id: //depot/perl@7665
Diffstat (limited to 'pp.c')
-rw-r--r-- | pp.c | 39 |
1 files changed, 29 insertions, 10 deletions
@@ -1476,31 +1476,50 @@ PP(pp_complement) tmps = (U8*)SvPV_force(TARG, len); anum = len; if (SvUTF8(TARG)) { - /* Calculate exact length, let's not estimate */ + /* Calculate exact length, let's not estimate. */ STRLEN targlen = 0; U8 *result; U8 *send; STRLEN l; + UV nchar = 0; + UV nwide = 0; send = tmps + len; while (tmps < send) { UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY); tmps += UTF8SKIP(tmps); targlen += UNISKIP(~c); + nchar++; + if (c > 0xff) + nwide++; } /* Now rewind strings and write them. */ tmps -= len; - Newz(0, result, targlen + 1, U8); - while (tmps < send) { - UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY); - tmps += UTF8SKIP(tmps); - result = uv_to_utf8(result,(UV)~c); + + if (nwide) { + Newz(0, result, targlen + 1, U8); + while (tmps < send) { + UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY); + tmps += UTF8SKIP(tmps); + result = uv_to_utf8(result, ~c); + } + *result = '\0'; + result -= targlen; + sv_setpvn(TARG, (char*)result, targlen); + SvUTF8_on(TARG); + } + else { + Newz(0, result, nchar + 1, U8); + while (tmps < send) { + U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY); + tmps += UTF8SKIP(tmps); + *result++ = ~c; + } + *result = '\0'; + result -= nchar; + sv_setpvn(TARG, (char*)result, nchar); } - *result = '\0'; - result -= targlen; - sv_setpvn(TARG, (char*)result, targlen); - SvUTF8_on(TARG); Safefree(result); SETs(TARG); RETURN; |