summaryrefslogtreecommitdiff
path: root/pp.c
diff options
context:
space:
mode:
authorYitzchak Scott-Thoennes <sthoenna@efn.org>2000-11-10 01:47:15 -0800
committerJarkko Hietaniemi <jhi@iki.fi>2000-11-13 04:05:07 +0000
commita1ca4561f20dbf547f57d39a690790cbe33210da (patch)
tree83f9af82cb7a2699ca5ca248e9907ac52b3924a8 /pp.c
parentd26ab924bbea5dd5379307deb59c11af3692350b (diff)
downloadperl-a1ca4561f20dbf547f57d39a690790cbe33210da.tar.gz
Tweak the definition of the bit complement on UTF-8 data:
if none of the characters in the string are > 0xff, the result is a complemented byte string, not a (UTF-8) char string. Based on the summary in Subject: Re: [ID 20000918.005] ~ on wide chars Message-ID: <jSDD6gzkgi/T092yn@efn.org> This should give us the maximum backward (pre-char string) compatibility and utf8 compatibility. The other alternative would be to limit the bit complement to be always byte only, taking the least significant byte of the chars. p4raw-id: //depot/perl@7665
Diffstat (limited to 'pp.c')
-rw-r--r--pp.c39
1 files changed, 29 insertions, 10 deletions
diff --git a/pp.c b/pp.c
index cc3f7ebf06..2a414b85af 100644
--- a/pp.c
+++ b/pp.c
@@ -1476,31 +1476,50 @@ PP(pp_complement)
tmps = (U8*)SvPV_force(TARG, len);
anum = len;
if (SvUTF8(TARG)) {
- /* Calculate exact length, let's not estimate */
+ /* Calculate exact length, let's not estimate. */
STRLEN targlen = 0;
U8 *result;
U8 *send;
STRLEN l;
+ UV nchar = 0;
+ UV nwide = 0;
send = tmps + len;
while (tmps < send) {
UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
tmps += UTF8SKIP(tmps);
targlen += UNISKIP(~c);
+ nchar++;
+ if (c > 0xff)
+ nwide++;
}
/* Now rewind strings and write them. */
tmps -= len;
- Newz(0, result, targlen + 1, U8);
- while (tmps < send) {
- UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
- tmps += UTF8SKIP(tmps);
- result = uv_to_utf8(result,(UV)~c);
+
+ if (nwide) {
+ Newz(0, result, targlen + 1, U8);
+ while (tmps < send) {
+ UV c = utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+ tmps += UTF8SKIP(tmps);
+ result = uv_to_utf8(result, ~c);
+ }
+ *result = '\0';
+ result -= targlen;
+ sv_setpvn(TARG, (char*)result, targlen);
+ SvUTF8_on(TARG);
+ }
+ else {
+ Newz(0, result, nchar + 1, U8);
+ while (tmps < send) {
+ U8 c = (U8)utf8_to_uv(tmps, 0, &l, UTF8_ALLOW_ANY);
+ tmps += UTF8SKIP(tmps);
+ *result++ = ~c;
+ }
+ *result = '\0';
+ result -= nchar;
+ sv_setpvn(TARG, (char*)result, nchar);
}
- *result = '\0';
- result -= targlen;
- sv_setpvn(TARG, (char*)result, targlen);
- SvUTF8_on(TARG);
Safefree(result);
SETs(TARG);
RETURN;