diff options
author | Simon Cozens <simon@netthink.co.uk> | 2000-11-18 18:49:30 +0000 |
---|---|---|
committer | Jarkko Hietaniemi <jhi@iki.fi> | 2000-11-20 22:11:18 +0000 |
commit | 5e44153e64927839811ced002c44263afa6f952d (patch) | |
tree | 83630910da887a59ec55b434263e728762f64194 /doop.c | |
parent | a84e0cefe392652252f998db397cb3e944414915 (diff) | |
download | perl-5e44153e64927839811ced002c44263afa6f952d.tar.gz |
[PATCH] doop.c - UTF8 tr///
Date: Sat, 18 Nov 2000 18:49:30 +0000
Message-ID: <20001118184930.A31687@pembro4.pmb.ox.ac.uk>
Subject: Re: [PATCH] doop.c - UTF8 tr///
From: Simon Cozens <simon@cozens.net>
Date: Mon, 20 Nov 2000 20:45:22 +0000
Message-ID: <20001120204522.A26042@pembro4.pmb.ox.ac.uk>
Make tr on UTF-8 better but still not correct.
p4raw-id: //depot/perl@7783
Diffstat (limited to 'doop.c')
-rw-r--r-- | doop.c | 52 |
1 files changed, 34 insertions, 18 deletions
@@ -144,6 +144,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ U8 *s; U8 *send; U8 *d; + U8 *dstart; I32 hasutf = SvUTF8(sv); I32 matches = 0; STRLEN len; @@ -157,7 +158,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ s = (U8*)SvPV(sv, len); send = s + len; - d = s; + Newz(0, d, len*2+1, U8); + dstart = d; + if (PL_op->op_private & OPpTRANS_SQUASH) { U8* p = send; @@ -168,9 +171,7 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ if ((ch = tbl[*s]) >= 0) { *d = ch; matches++; - if (p == d - 1 && *p == *d) - matches--; - else + if (p != d - 1 || *p != *d) p = d++; } else if (ch == -1) /* -1 is unmapped character */ @@ -181,26 +182,41 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */ } else { while (s < send) { + UV comp; if (hasutf && *s & 0x80) - s += UTF8SKIP(s); - else { - if ((ch = tbl[*s]) >= 0) { - *d = ch; - matches++; - d++; - } - else if (ch == -1) /* -1 is unmapped character */ - *d++ = *s; /* -2 is delete character */ - s++; - } + comp = utf8_to_uv_simple(s, NULL); + else + comp = *s; + + ch = tbl[comp]; + + if (ch == -1) { /* -1 is unmapped character */ + ch = comp; + matches--; + } + + if (ch >= 0) { + if (hasutf) + d = uv_to_utf8(d, ch); + else + *d++ = ch; + } + matches++; + + s += hasutf && *s & 0x80 ? UNISKIP(*s) : 1; + } } - matches += send - d; /* account for disappeared chars */ + *d = '\0'; - SvCUR_set(sv, d - (U8*)SvPVX(sv)); - SvSETMAGIC(sv); + sv_setpvn(sv, (const char*)dstart, d - dstart); + Safefree(dstart); + if (hasutf) + SvUTF8_on(sv); + SvSETMAGIC(sv); return matches; + } STATIC I32 |