summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLarry Wall <larry@wall.org>1998-09-05 23:48:24 +0000
committerLarry Wall <larry@wall.org>1998-09-05 23:48:24 +0000
commit4757a2438b123364ad98fc0cb4698e56331f713b (patch)
treed1e491103e421493e8c170df368431ccea81971a
parent342930fbe48873e4ee7fdf12f4ea00e063c13bb8 (diff)
downloadperl-4757a2438b123364ad98fc0cb4698e56331f713b.tar.gz
tr/// logic was hosed under utf8
p4raw-id: //depot/perl@1781
-rw-r--r--doop.c859
-rw-r--r--op.c15
-rw-r--r--op.h16
-rw-r--r--pp.c2
-rw-r--r--proto.h2
5 files changed, 598 insertions, 296 deletions
diff --git a/doop.c b/doop.c
index 8ebbd8335d..c6270e44f4 100644
--- a/doop.c
+++ b/doop.c
@@ -18,322 +18,613 @@
#include <signal.h>
#endif
-I32
-do_trans(SV *sv, OP *arg)
+static I32
+do_trans_CC_simple(SV *sv)
{
dTHR;
- register U8 *s;
- register U8 *send;
- register U8 *d;
- register I32 matches = 0;
- register I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
+ U8 *s;
+ U8 *send;
+ I32 matches = 0;
STRLEN len;
+ short *tbl;
+ I32 ch;
- if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_COUNTONLY))
- croak(no_modify);
+ tbl = (short*)cPVOP->op_pv;
+ if (!tbl)
+ croak("panic: do_trans");
- if (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) {
- SV* rv = (SV*)cSVOP->op_sv;
- HV* hv = (HV*)SvRV(rv);
- SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
- UV none = svp ? SvUV(*svp) : 0x7fffffff;
- UV extra = none + 1;
- I32 del = PL_op->op_private & OPpTRANS_DELETE;
- UV final;
- register UV uv;
- UV puv;
- register I32 from_utf = PL_op->op_private & OPpTRANS_FROM_UTF;
- register I32 to_utf = PL_op->op_private & OPpTRANS_TO_UTF;
-
- s = (U8*)SvPV(sv, len);
- if (!len)
- return 0;
- if (!SvPOKp(sv))
- s = (U8*)SvPV_force(sv, len);
- (void)SvPOK_only(sv);
- send = s + len;
- DEBUG_t( deb("2.TBL\n"));
- if (PL_op->op_private == (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)) { /* no other flags */
- svp = hv_fetch(hv, "FINAL", 5, FALSE);
- if (svp)
- final = SvUV(*svp);
-
- d = s;
- while (s < send) {
- if ((uv = swash_fetch(rv, s)) < none) {
- s += UTF8SKIP(s);
- matches++;
- d = uv_to_utf8(d, uv);
- }
- else if (uv == none) {
- int i;
- for (i = UTF8SKIP(s); i; i--)
- *d++ = *s++;
- }
- else if (uv == extra) {
- s += UTF8SKIP(s);
- matches++;
- d = uv_to_utf8(d, final);
- }
- else
- s += UTF8SKIP(s);
- }
- *d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- SvSETMAGIC(sv);
- }
- else if (PL_op->op_private == OPpTRANS_FROM_UTF) { /* no other flags */
- svp = hv_fetch(hv, "FINAL", 5, FALSE);
- if (svp)
- final = SvUV(*svp);
-
- d = s;
- while (s < send) {
- if ((uv = swash_fetch(rv, s)) < none) {
- s += UTF8SKIP(s);
- matches++;
- *d++ = (U8)uv;
- }
- else if (uv == none) {
- I32 ulen;
- uv = utf8_to_uv(s, &ulen);
- s += ulen;
- *d++ = (U8)uv;
- }
- else if (uv == extra) {
- s += UTF8SKIP(s);
- matches++;
- *d++ = (U8)final;
- }
- else
- s += UTF8SKIP(s);
- }
- *d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- SvSETMAGIC(sv);
- }
- else if (PL_op->op_private == OPpTRANS_TO_UTF) { /* no other flags */
- svp = hv_fetch(hv, "FINAL", 5, FALSE);
- if (svp)
- final = SvUV(*svp);
-
- d = s;
- while (s < send) {
- U8 tmpbuf[10];
- uv_to_utf8(tmpbuf, *s); /* XXX suboptimal */
- if ((uv = swash_fetch(rv, tmpbuf)) < none) {
- s += UTF8SKIP(s);
- matches++;
- d = uv_to_utf8(d, uv);
- }
- else if (uv == none) {
- I32 ulen;
- uv = utf8_to_uv(s, &ulen);
- s += ulen;
- d = uv_to_utf8(d, uv);
- }
- else if (uv == extra) {
- s += UTF8SKIP(s);
- matches++;
- d = uv_to_utf8(d, final);
- }
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ while (s < send) {
+ if ((ch = tbl[*s]) >= 0) {
+ matches++;
+ *s = ch;
+ }
+ s++;
+ }
+ SvSETMAGIC(sv);
+
+ return matches;
+}
+
+static I32
+do_trans_CC_count(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ I32 matches = 0;
+ STRLEN len;
+ short *tbl;
+
+ tbl = (short*)cPVOP->op_pv;
+ if (!tbl)
+ croak("panic: do_trans");
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ while (s < send) {
+ if (tbl[*s] >= 0)
+ matches++;
+ s++;
+ }
+
+ return matches;
+}
+
+static I32
+do_trans_CC_complex(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ U8 *d;
+ I32 matches = 0;
+ STRLEN len;
+ short *tbl;
+ I32 ch;
+
+ tbl = (short*)cPVOP->op_pv;
+ if (!tbl)
+ croak("panic: do_trans");
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ d = s;
+ if (PL_op->op_private & OPpTRANS_SQUASH) {
+ U8* p = send;
+
+ while (s < send) {
+ if ((ch = tbl[*s]) >= 0) {
+ *d = ch;
+ matches++;
+ if (p == d - 1 && *p == *d)
+ matches--;
else
- s += UTF8SKIP(s);
+ p = d++;
}
- *d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- SvSETMAGIC(sv);
+ else if (ch == -1) /* -1 is unmapped character */
+ *d++ = *s; /* -2 is delete character */
+ s++;
}
- else if (PL_op->op_private & OPpTRANS_COUNTONLY) {
- if (from_utf) {
- while (s < send) {
- if (swash_fetch(rv, s) < none)
- matches++;
- s += UTF8SKIP(s);
- }
- }
- else {
- while (s < send) {
- U8 tmpbuf[10];
- uv_to_utf8(tmpbuf, *s); /* XXX suboptimal */
- if (swash_fetch(rv, tmpbuf) < none)
- matches++;
- s += UTF8SKIP(s);
- }
+ }
+ else {
+ while (s < send) {
+ if ((ch = tbl[*s]) >= 0) {
+ *d = ch;
+ matches++;
+ d++;
}
+ else if (ch == -1) /* -1 is unmapped character */
+ *d++ = *s; /* -2 is delete character */
+ s++;
+ }
+ }
+ matches += send - d; /* account for disappeared chars */
+ *d = '\0';
+ SvCUR_set(sv, d - (U8*)SvPVX(sv));
+ SvSETMAGIC(sv);
+
+ return matches;
+}
+
+static I32
+do_trans_UU_simple(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ U8 *d;
+ I32 matches = 0;
+ STRLEN len;
+
+ SV* rv = (SV*)cSVOP->op_sv;
+ HV* hv = (HV*)SvRV(rv);
+ SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+ UV none = svp ? SvUV(*svp) : 0x7fffffff;
+ UV extra = none + 1;
+ UV final;
+ UV uv;
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ svp = hv_fetch(hv, "FINAL", 5, FALSE);
+ if (svp)
+ final = SvUV(*svp);
+
+ d = s;
+ while (s < send) {
+ if ((uv = swash_fetch(rv, s)) < none) {
+ s += UTF8SKIP(s);
+ matches++;
+ d = uv_to_utf8(d, uv);
+ }
+ else if (uv == none) {
+ int i;
+ for (i = UTF8SKIP(s); i; i--)
+ *d++ = *s++;
+ }
+ else if (uv == extra) {
+ s += UTF8SKIP(s);
+ matches++;
+ d = uv_to_utf8(d, final);
+ }
+ else
+ s += UTF8SKIP(s);
+ }
+ *d = '\0';
+ SvCUR_set(sv, d - (U8*)SvPVX(sv));
+ SvSETMAGIC(sv);
+
+ return matches;
+}
+
+static I32
+do_trans_UU_count(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ I32 matches = 0;
+ STRLEN len;
+
+ SV* rv = (SV*)cSVOP->op_sv;
+ HV* hv = (HV*)SvRV(rv);
+ SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+ UV none = svp ? SvUV(*svp) : 0x7fffffff;
+ UV uv;
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ while (s < send) {
+ if ((uv = swash_fetch(rv, s)) < none) {
+ s += UTF8SKIP(s);
+ matches++;
+ }
+ }
+
+ return matches;
+}
+
+static I32
+do_trans_UC_simple(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ U8 *d;
+ I32 matches = 0;
+ STRLEN len;
+
+ SV* rv = (SV*)cSVOP->op_sv;
+ HV* hv = (HV*)SvRV(rv);
+ SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+ UV none = svp ? SvUV(*svp) : 0x7fffffff;
+ UV extra = none + 1;
+ UV final;
+ UV uv;
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ svp = hv_fetch(hv, "FINAL", 5, FALSE);
+ if (svp)
+ final = SvUV(*svp);
+
+ d = s;
+ while (s < send) {
+ if ((uv = swash_fetch(rv, s)) < none) {
+ s += UTF8SKIP(s);
+ matches++;
+ *d++ = (U8)uv;
+ }
+ else if (uv == none) {
+ I32 ulen;
+ uv = utf8_to_uv(s, &ulen);
+ s += ulen;
+ *d++ = (U8)uv;
}
+ else if (uv == extra) {
+ s += UTF8SKIP(s);
+ matches++;
+ *d++ = (U8)final;
+ }
+ else
+ s += UTF8SKIP(s);
+ }
+ *d = '\0';
+ SvCUR_set(sv, d - (U8*)SvPVX(sv));
+ SvSETMAGIC(sv);
+
+ return matches;
+}
+
+static I32
+do_trans_CU_simple(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ U8 *d;
+ U8 *dst;
+ I32 matches = 0;
+ STRLEN len;
+
+ SV* rv = (SV*)cSVOP->op_sv;
+ HV* hv = (HV*)SvRV(rv);
+ SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+ UV none = svp ? SvUV(*svp) : 0x7fffffff;
+ UV extra = none + 1;
+ UV final;
+ UV uv;
+ U8 tmpbuf[10];
+ I32 bits = 16;
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ svp = hv_fetch(hv, "BITS", 4, FALSE);
+ if (svp)
+ bits = (I32)SvIV(*svp);
+
+ svp = hv_fetch(hv, "FINAL", 5, FALSE);
+ if (svp)
+ final = SvUV(*svp);
+
+ Newz(801, d, len * (bits >> 3) + 1, U8);
+ dst = d;
+
+ while (s < send) {
+ uv = *s++;
+ if (uv < 0x80)
+ tmpbuf[0] = uv;
else {
- I32 bits = 16;
- U8 *dst;
+ tmpbuf[0] = (( uv >> 6) | 0xc0);
+ tmpbuf[1] = (( uv & 0x3f) | 0x80);
+ }
- svp = hv_fetch(hv, "BITS", 4, FALSE);
- if (svp)
- bits = (I32)SvIV(*svp);
+ if ((uv = swash_fetch(rv, tmpbuf)) < none) {
+ matches++;
+ d = uv_to_utf8(d, uv);
+ }
+ else if (uv == none)
+ d = uv_to_utf8(d, s[-1]);
+ else if (uv == extra) {
+ matches++;
+ d = uv_to_utf8(d, final);
+ }
+ }
+ *d = '\0';
+ sv_usepvn_mg(sv, (char*)dst, d - dst);
- svp = hv_fetch(hv, "FINAL", 5, FALSE);
- if (svp)
- final = SvUV(*svp);
+ return matches;
+}
- Newz(801, d, len * (bits >> 3) + 1, U8);
- dst = d;
+/* utf-8 to latin-1 */
- puv = 0xfeedface;
- if (squash) {
- while (s < send) {
- if (from_utf)
- uv = swash_fetch(rv, s);
- else {
- U8 tmpbuf[10];
- uv_to_utf8(tmpbuf, *s); /* XXX suboptimal */
- uv = swash_fetch(rv, tmpbuf);
- }
- if (uv < none) {
- matches++;
- if (uv != puv) {
- if (to_utf)
- d = uv_to_utf8(d, uv);
- else
- *d++ = (U8)uv;
- }
- puv = uv;
- s += UTF8SKIP(s);
- continue;
- }
- else if (uv == none) { /* "none" is unmapped character */
- int i;
- if (to_utf) {
- for (i = UTF8SKIP(s); i; --i)
- *d++ = *s++;
- }
- else {
- I32 ulen;
- *d++ = (U8)utf8_to_uv(s, &ulen);
- s += ulen;
- }
- puv = 0xfeedface;
- continue;
- }
- else if (uv == extra && !del) {
- matches++;
- if (to_utf)
- d = uv_to_utf8(d, final);
- else
- *d++ = (U8)final;
- s += UTF8SKIP(s);
- puv = 0xfeedface;
- continue;
- }
- matches++; /* "none+1" is delete character */
- s += UTF8SKIP(s);
- }
+static I32
+do_trans_UC_trivial(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ U8 *d;
+ STRLEN len;
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ d = s;
+ while (s < send) {
+ if (*s < 0x80)
+ *d++ = *s++;
+ else {
+ I32 ulen;
+ UV uv = utf8_to_uv(s, &ulen);
+ s += ulen;
+ *d++ = (U8)uv;
+ }
+ }
+ *d = '\0';
+ SvCUR_set(sv, d - (U8*)SvPVX(sv));
+ SvSETMAGIC(sv);
+
+ return SvCUR(sv);
+}
+
+/* latin-1 to utf-8 */
+
+static I32
+do_trans_CU_trivial(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ U8 *d;
+ U8 *dst;
+ I32 matches;
+ STRLEN len;
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ Newz(801, d, len * 2 + 1, U8);
+ dst = d;
+
+ matches = send - s;
+
+ while (s < send) {
+ if (*s < 0x80)
+ *d++ = *s++;
+ else {
+ UV uv = *s++;
+ *d++ = (( uv >> 6) | 0xc0);
+ *d++ = (( uv & 0x3f) | 0x80);
+ }
+ }
+ *d = '\0';
+ sv_usepvn_mg(sv, (char*)dst, d - dst);
+
+ return matches;
+}
+
+static I32
+do_trans_UU_complex(SV *sv)
+{
+ dTHR;
+ U8 *s;
+ U8 *send;
+ U8 *d;
+ I32 matches = 0;
+ I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
+ I32 from_utf = PL_op->op_private & OPpTRANS_FROM_UTF;
+ I32 to_utf = PL_op->op_private & OPpTRANS_TO_UTF;
+ I32 del = PL_op->op_private & OPpTRANS_DELETE;
+ SV* rv = (SV*)cSVOP->op_sv;
+ HV* hv = (HV*)SvRV(rv);
+ SV** svp = hv_fetch(hv, "NONE", 4, FALSE);
+ UV none = svp ? SvUV(*svp) : 0x7fffffff;
+ UV extra = none + 1;
+ UV final;
+ UV uv;
+ STRLEN len;
+ U8 *dst;
+
+ s = (U8*)SvPV(sv, len);
+ send = s + len;
+
+ svp = hv_fetch(hv, "FINAL", 5, FALSE);
+ if (svp)
+ final = SvUV(*svp);
+
+ if (PL_op->op_private & OPpTRANS_GROWS) {
+ I32 bits = 16;
+
+ svp = hv_fetch(hv, "BITS", 4, FALSE);
+ if (svp)
+ bits = (I32)SvIV(*svp);
+
+ Newz(801, d, len * (bits >> 3) + 1, U8);
+ dst = d;
+ }
+ else {
+ d = s;
+ dst = 0;
+ }
+
+ if (squash) {
+ UV puv = 0xfeedface;
+ while (s < send) {
+ if (from_utf) {
+ uv = swash_fetch(rv, s);
}
else {
- while (s < send) {
- if (from_utf)
- uv = swash_fetch(rv, s);
- else {
- U8 tmpbuf[10];
- uv_to_utf8(tmpbuf, *s); /* XXX suboptimal */
- uv = swash_fetch(rv, tmpbuf);
- }
- if (uv < none) {
- if (to_utf)
- d = uv_to_utf8(d, uv);
- else
- *d++ = (U8)uv;
- matches++;
- s += UTF8SKIP(s);
- continue;
- }
- else if (uv == none) { /* "none" is unmapped character */
+ U8 tmpbuf[2];
+ uv = *s++;
+ if (uv < 0x80)
+ tmpbuf[0] = uv;
+ else {
+ tmpbuf[0] = (( uv >> 6) | 0xc0);
+ tmpbuf[1] = (( uv & 0x3f) | 0x80);
+ }
+ uv = swash_fetch(rv, tmpbuf);
+ }
+ if (uv < none) {
+ matches++;
+ if (uv != puv) {
+ if (uv >= 0x80 && to_utf)
+ d = uv_to_utf8(d, uv);
+ else
+ *d++ = (U8)uv;
+ puv = uv;
+ }
+ if (from_utf)
+ s += UTF8SKIP(s);
+ continue;
+ }
+ else if (uv == none) { /* "none" is unmapped character */
+ if (from_utf) {
+ if (*s < 0x80)
+ *d++ = *s++;
+ else if (to_utf) {
int i;
- if (to_utf) {
- for (i = UTF8SKIP(s); i; --i)
- *d++ = *s++;
- }
- else {
- I32 ulen;
- *d++ = (U8)utf8_to_uv(s, &ulen);
- s += ulen;
- }
- continue;
+ for (i = UTF8SKIP(s); i; --i)
+ *d++ = *s++;
}
- else if (uv == extra && !del) {
- matches++;
- if (to_utf)
- d = uv_to_utf8(d, final);
- else
- *d++ = (U8)final;
- s += UTF8SKIP(s);
- continue;
+ else {
+ I32 ulen;
+ *d++ = (U8)utf8_to_uv(s, &ulen);
+ s += ulen;
}
- matches++; /* "none+1" is delete character */
- s += UTF8SKIP(s);
}
+ else { /* must be to_utf only */
+ d = uv_to_utf8(d, s[-1]);
+ }
+ puv = 0xfeedface;
+ continue;
}
- sv_usepvn_mg(sv, (char*)dst, d - dst);
+ else if (uv == extra && !del) {
+ matches++;
+ if (uv != puv) {
+ if (final >= 0x80 && to_utf)
+ d = uv_to_utf8(d, final);
+ else
+ *d++ = (U8)final;
+ puv = final;
+ }
+ if (from_utf)
+ s += UTF8SKIP(s);
+ continue;
+ }
+ matches++; /* "none+1" is delete character */
+ if (from_utf)
+ s += UTF8SKIP(s);
}
- return matches;
}
else {
- register short *tbl;
- register I32 ch;
- register U8 *p;
-
- tbl = (short*)cPVOP->op_pv;
- s = (U8*)SvPV(sv, len);
- if (!len)
- return 0;
- if (!SvPOKp(sv))
- s = (U8*)SvPV_force(sv, len);
- (void)SvPOK_only(sv);
- send = s + len;
- if (!tbl || !s)
- croak("panic: do_trans");
- DEBUG_t( deb("2.TBL\n"));
- if (!PL_op->op_private) {
- while (s < send) {
- if ((ch = tbl[*s]) >= 0) {
- matches++;
- *s = ch;
+ while (s < send) {
+ if (from_utf) {
+ uv = swash_fetch(rv, s);
+ }
+ else {
+ U8 tmpbuf[2];
+ uv = *s++;
+ if (uv < 0x80)
+ tmpbuf[0] = uv;
+ else {
+ tmpbuf[0] = (( uv >> 6) | 0xc0);
+ tmpbuf[1] = (( uv & 0x3f) | 0x80);
}
- s++;
+ uv = swash_fetch(rv, tmpbuf);
}
- SvSETMAGIC(sv);
- }
- else if (PL_op->op_private & OPpTRANS_COUNTONLY) {
- while (s < send) {
- if (tbl[*s] >= 0)
- matches++;
- s++;
+ if (uv < none) {
+ matches++;
+ if (uv >= 0x80 && to_utf)
+ d = uv_to_utf8(d, uv);
+ else
+ *d++ = (U8)uv;
+ if (from_utf)
+ s += UTF8SKIP(s);
+ continue;
}
- }
- else {
- d = s;
- p = send;
- while (s < send) {
- if ((ch = tbl[*s]) >= 0) {
- *d = ch;
- matches++;
- if (squash) {
- if (p == d - 1 && *p == *d)
- matches--;
- else
- p = d++;
+ else if (uv == none) { /* "none" is unmapped character */
+ if (from_utf) {
+ if (*s < 0x80)
+ *d++ = *s++;
+ else if (to_utf) {
+ int i;
+ for (i = UTF8SKIP(s); i; --i)
+ *d++ = *s++;
+ }
+ else {
+ I32 ulen;
+ *d++ = (U8)utf8_to_uv(s, &ulen);
+ s += ulen;
}
- else
- d++;
}
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s; /* -2 is delete character */
- s++;
+ else { /* must be to_utf only */
+ d = uv_to_utf8(d, s[-1]);
+ }
+ continue;
}
- matches += send - d; /* account for disappeared chars */
- *d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
- SvSETMAGIC(sv);
+ else if (uv == extra && !del) {
+ matches++;
+ if (final >= 0x80 && to_utf)
+ d = uv_to_utf8(d, final);
+ else
+ *d++ = (U8)final;
+ if (from_utf)
+ s += UTF8SKIP(s);
+ continue;
+ }
+ matches++; /* "none+1" is delete character */
+ if (from_utf)
+ s += UTF8SKIP(s);
}
- return matches;
+ }
+ if (dst)
+ sv_usepvn(sv, (char*)dst, d - dst);
+ else {
+ *d = '\0';
+ SvCUR_set(sv, d - (U8*)SvPVX(sv));
+ }
+ SvSETMAGIC(sv);
+
+ return matches;
+}
+
+I32
+do_trans(SV *sv)
+{
+ STRLEN len;
+
+ if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
+ croak(no_modify);
+
+ (void)SvPV(sv, len);
+ if (!len)
+ return 0;
+ if (!SvPOKp(sv))
+ (void)SvPV_force(sv, len);
+ (void)SvPOK_only(sv);
+
+ DEBUG_t( deb("2.TBL\n"));
+
+ switch (PL_op->op_private & 63) {
+ case 0:
+ return do_trans_CC_simple(sv);
+
+ case OPpTRANS_FROM_UTF:
+ return do_trans_UC_simple(sv);
+
+ case OPpTRANS_TO_UTF:
+ return do_trans_CU_simple(sv);
+
+ case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF:
+ return do_trans_UU_simple(sv);
+
+ case OPpTRANS_IDENTICAL:
+ return do_trans_CC_count(sv);
+
+ case OPpTRANS_FROM_UTF|OPpTRANS_IDENTICAL:
+ return do_trans_UC_trivial(sv);
+
+ case OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL:
+ return do_trans_CU_trivial(sv);
+
+ case OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL:
+ return do_trans_UU_count(sv);
+
+ default:
+ if (PL_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF))
+ return do_trans_UU_complex(sv); /* could be UC or CU too */
+ else
+ return do_trans_CC_complex(sv);
}
}
diff --git a/op.c b/op.c
index 53fb8c1ce3..ca89229a0d 100644
--- a/op.c
+++ b/op.c
@@ -2156,8 +2156,17 @@ pmtrans(OP *o, OP *expr, OP *repl)
}
else if (!rlen && !del) {
r = t; rlen = tlen; rend = tend;
- if (!squash && to_utf && from_utf)
- o->op_private |= OPpTRANS_COUNTONLY;
+ }
+ if (!squash) {
+ if (to_utf && from_utf) { /* only counting characters */
+ if (t == r || (tlen == rlen && memEQ(t, r, tlen)))
+ o->op_private |= OPpTRANS_IDENTICAL;
+ }
+ else { /* straight latin-1 translation */
+ if (tlen == 4 && memEQ(t, "\0\377\303\277", 4) &&
+ rlen == 4 && memEQ(r, "\0\377\303\277", 4))
+ o->op_private |= OPpTRANS_IDENTICAL;
+ }
}
while (t < tend || tfirst <= tlast) {
@@ -2286,7 +2295,7 @@ pmtrans(OP *o, OP *expr, OP *repl)
if (!rlen && !del) {
r = t; rlen = tlen;
if (!squash)
- o->op_private |= OPpTRANS_COUNTONLY;
+ o->op_private |= OPpTRANS_IDENTICAL;
}
for (i = 0; i < 256; i++)
tbl[i] = -1;
diff --git a/op.h b/op.h
index cbb2ac3d8a..0b186a89db 100644
--- a/op.h
+++ b/op.h
@@ -103,13 +103,15 @@ typedef U32 PADOFFSET;
#define OPpRUNTIME 64 /* Pattern coming in on the stack */
/* Private for OP_TRANS */
-#define OPpTRANS_GROWS 1
-#define OPpTRANS_FROM_UTF 2
-#define OPpTRANS_TO_UTF 4
-#define OPpTRANS_COUNTONLY 8
-#define OPpTRANS_SQUASH 16
-#define OPpTRANS_DELETE 32
-#define OPpTRANS_COMPLEMENT 64
+#define OPpTRANS_FROM_UTF 1
+#define OPpTRANS_TO_UTF 2
+#define OPpTRANS_IDENTICAL 4
+ /* When CU or UC, means straight latin-1 to utf-8 or vice versa */
+ /* Otherwise, IDENTICAL means the right side is the same as the left */
+#define OPpTRANS_SQUASH 8
+#define OPpTRANS_DELETE 16
+#define OPpTRANS_COMPLEMENT 32
+#define OPpTRANS_GROWS 64
/* Private for OP_REPEAT */
#define OPpREPEAT_DOLIST 64 /* List replication. */
diff --git a/pp.c b/pp.c
index 9c08e2edca..a4f7828734 100644
--- a/pp.c
+++ b/pp.c
@@ -669,7 +669,7 @@ PP(pp_trans)
EXTEND(SP,1);
}
TARG = sv_newmortal();
- PUSHi(do_trans(sv, PL_op));
+ PUSHi(do_trans(sv));
RETURN;
}
diff --git a/proto.h b/proto.h
index 5b71f63428..96bb15cb6a 100644
--- a/proto.h
+++ b/proto.h
@@ -126,7 +126,7 @@ I32 do_shmio _((I32 optype, SV** mark, SV** sp));
VIRTUAL void do_sprintf _((SV* sv, I32 len, SV** sarg));
VIRTUAL long do_sysseek _((GV* gv, long pos, int whence));
VIRTUAL long do_tell _((GV* gv));
-VIRTUAL I32 do_trans _((SV* sv, OP* arg));
+VIRTUAL I32 do_trans _((SV* sv));
VIRTUAL void do_vecset _((SV* sv));
VIRTUAL void do_vop _((I32 optype, SV* sv, SV* left, SV* right));
VIRTUAL I32 dowantarray _((void));