summaryrefslogtreecommitdiff
path: root/doop.c
diff options
context:
space:
mode:
authorSimon Cozens <simon@netthink.co.uk>2000-11-18 18:49:30 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2000-11-20 22:11:18 +0000
commit5e44153e64927839811ced002c44263afa6f952d (patch)
tree83630910da887a59ec55b434263e728762f64194 /doop.c
parenta84e0cefe392652252f998db397cb3e944414915 (diff)
downloadperl-5e44153e64927839811ced002c44263afa6f952d.tar.gz
[PATCH] doop.c - UTF8 tr///
Date: Sat, 18 Nov 2000 18:49:30 +0000 Message-ID: <20001118184930.A31687@pembro4.pmb.ox.ac.uk> Subject: Re: [PATCH] doop.c - UTF8 tr/// From: Simon Cozens <simon@cozens.net> Date: Mon, 20 Nov 2000 20:45:22 +0000 Message-ID: <20001120204522.A26042@pembro4.pmb.ox.ac.uk> Make tr on UTF-8 better but still not correct. p4raw-id: //depot/perl@7783
Diffstat (limited to 'doop.c')
-rw-r--r--doop.c52
1 files changed, 34 insertions, 18 deletions
diff --git a/doop.c b/doop.c
index a2990cee60..3c34425075 100644
--- a/doop.c
+++ b/doop.c
@@ -144,6 +144,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
U8 *s;
U8 *send;
U8 *d;
+ U8 *dstart;
I32 hasutf = SvUTF8(sv);
I32 matches = 0;
STRLEN len;
@@ -157,7 +158,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
s = (U8*)SvPV(sv, len);
send = s + len;
- d = s;
+ Newz(0, d, len*2+1, U8);
+ dstart = d;
+
if (PL_op->op_private & OPpTRANS_SQUASH) {
U8* p = send;
@@ -168,9 +171,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
if ((ch = tbl[*s]) >= 0) {
*d = ch;
matches++;
- if (p == d - 1 && *p == *d)
- matches--;
- else
+ if (p != d - 1 || *p != *d)
p = d++;
}
else if (ch == -1) /* -1 is unmapped character */
@@ -181,26 +182,41 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
}
else {
while (s < send) {
+ UV comp;
if (hasutf && *s & 0x80)
- s += UTF8SKIP(s);
- else {
- if ((ch = tbl[*s]) >= 0) {
- *d = ch;
- matches++;
- d++;
- }
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s; /* -2 is delete character */
- s++;
- }
+ comp = utf8_to_uv_simple(s, NULL);
+ else
+ comp = *s;
+
+ ch = tbl[comp];
+
+ if (ch == -1) { /* -1 is unmapped character */
+ ch = comp;
+ matches--;
+ }
+
+ if (ch >= 0) {
+ if (hasutf)
+ d = uv_to_utf8(d, ch);
+ else
+ *d++ = ch;
+ }
+ matches++;
+
+ s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1;
+
}
}
- matches += send - d; /* account for disappeared chars */
+
*d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- SvSETMAGIC(sv);
+ sv_setpvn(sv, (const char*)dstart, d - dstart);
+ Safefree(dstart);
+ if (hasutf)
+ SvUTF8_on(sv);
+ SvSETMAGIC(sv);
return matches;
+
}
STATIC I32