summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doop.c52
-rw-r--r--op.c9
-rwxr-xr-xt/op/tr.t25
-rw-r--r--toke.c21
4 files changed, 90 insertions, 17 deletions
diff --git a/doop.c b/doop.c
index 9bc6d5628a..7e2b52fd2e 100644
--- a/doop.c
+++ b/doop.c
@@ -99,6 +99,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
I32 matches = 0;
STRLEN len;
short *tbl;
+ I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
tbl = (short*)cPVOP->op_pv;
if (!tbl)
@@ -117,7 +118,10 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
UV c;
STRLEN ulen;
c = utf8_to_uv(s, send - s, &ulen, 0);
- if (c < 0x100 && tbl[c] >= 0)
+ if (c < 0x100) {
+ if (tbl[c] >= 0)
+ matches++;
+ } else if (complement)
matches++;
s += ulen;
}
@@ -135,7 +139,9 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
I32 isutf8;
I32 matches = 0;
I32 grows = PL_op->op_private & OPpTRANS_GROWS;
- STRLEN len;
+ I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT;
+ I32 del = PL_op->op_private & OPpTRANS_DELETE;
+ STRLEN len, rlen;
short *tbl;
I32 ch;
@@ -186,6 +192,8 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
else
d = s;
dstart = d;
+ if (complement && !del)
+ rlen = tbl[0x100];
#ifdef MACOS_TRADITIONAL
#define comp CoMP /* "comp" is a keyword in some compilers ... */
@@ -197,9 +205,24 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
STRLEN len;
UV comp = utf8_to_uv_simple(s, &len);
- if (comp > 0xff) { /* always unmapped */
- Copy(s, d, len, U8);
- d += len;
+ if (comp > 0xff) {
+ if (!complement) {
+ Copy(s, d, len, U8);
+ d += len;
+ }
+ else {
+ matches++;
+ if (!del) {
+ ch = (comp - 0x100 < rlen) ?
+ tbl[comp+1] : tbl[0x100+rlen];
+ if (ch != pch) {
+ d = uv_to_utf8(d, ch);
+ pch = ch;
+ }
+ s += len;
+ continue;
+ }
+ }
}
else if ((ch = tbl[comp]) >= 0) {
matches++;
@@ -224,9 +247,20 @@ S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
while (s < send) {
STRLEN len;
UV comp = utf8_to_uv_simple(s, &len);
- if (comp > 0xff) { /* always unmapped */
- Copy(s, d, len, U8);
- d += len;
+ if (comp > 0xff) {
+ if (!complement) {
+ Copy(s, d, len, U8);
+ d += len;
+ }
+ else {
+ matches++;
+ if (!del) {
+ if (comp - 0x100 < rlen)
+ d = uv_to_utf8(d, tbl[comp+1]);
+ else
+ d = uv_to_utf8(d, tbl[0x100+rlen]);
+ }
+ }
}
else if ((ch = tbl[comp]) >= 0) {
d = uv_to_utf8(d, ch);
@@ -499,8 +533,8 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
}
if (uv < none) {
matches++;
- d = uv_to_utf8(d, uv);
s += UTF8SKIP(s);
+ d = uv_to_utf8(d, uv);
continue;
}
else if (uv == none) { /* "none" is unmapped character */
diff --git a/op.c b/op.c
index eb60121edf..4c5dd1302b 100644
--- a/op.c
+++ b/op.c
@@ -2871,6 +2871,15 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
}
}
}
+ if (!del) {
+ if (j >= rlen)
+ j = rlen - 1;
+ else
+ cPVOPo->op_pv = (char*)Renew(tbl, 0x101+rlen-j, short);
+ tbl[0x100] = rlen - j;
+ for (i=0; i < rlen - j; i++)
+ tbl[0x101+i] = r[j+i];
+ }
}
else {
if (!rlen && !del) {
diff --git a/t/op/tr.t b/t/op/tr.t
index 75887ab31c..b10f4f2495 100755
--- a/t/op/tr.t
+++ b/t/op/tr.t
@@ -5,7 +5,7 @@ BEGIN {
@INC = '../lib';
}
-print "1..51\n";
+print "1..55\n";
$_ = "abcdefghijklmnopqrstuvwxyz";
@@ -84,7 +84,7 @@ if (ord("\t") == 9) { # ASCII
use utf8;
}
# 11 - changing UTF8 characters in a UTF8 string, same length.
-$l = chr(300); $r = chr(400);
+my $l = chr(300); my $r = chr(400);
$x = 200.300.400;
$x =~ tr/\x{12c}/\x{190}/;
printf "not (%vd) ", $x if $x ne 200.400.400 or length $x != 3;
@@ -287,7 +287,7 @@ print "ok 48\n";
print "not " unless sprintf("%vd", $a) eq '196.172.200';
print "ok 49\n";
-# UTF8 range
+# UTF8 range tests from Inaba Hiroto
($a = v300.196.172.302.197.172) =~ tr/\x{12c}-\x{130}/\xc0-\xc4/;
print "not " unless $a eq v192.196.172.194.197.172;
@@ -296,3 +296,22 @@ print "ok 50\n";
($a = v300.196.172.302.197.172) =~ tr/\xc4-\xc8/\x{12c}-\x{130}/;
print "not " unless $a eq v300.300.172.302.301.172;
print "ok 51\n";
+
+# UTF8 range tests from Karsten Sperling (patch #9008 required)
+
+($a = "\x{0100}") =~ tr/\x00-\x{100}/X/;
+print "not " unless $a eq "X";
+print "ok 52\n";
+
+($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}/X/c;
+print "not " unless $a eq "X";
+print "ok 53\n";
+
+($a = "\x{0100}") =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
+print "not " unless $a eq "X";
+print "ok 54\n";
+
+($a = v256) =~ tr/\x{0000}-\x{00ff}\x{0101}/X/c;
+print "not " unless $a eq "X";
+print "ok 55\n";
+
diff --git a/toke.c b/toke.c
index 33915ed42a..daa0d52be6 100644
--- a/toke.c
+++ b/toke.c
@@ -1240,6 +1240,17 @@ S_scan_const(pTHX_ char *start)
I32 min; /* first character in range */
I32 max; /* last character in range */
+ if (utf) {
+ char *c = (char*)utf8_hop((U8*)d, -1);
+ char *e = d++;
+ while (e-- > c)
+ *(e + 1) = *e;
+ *c = 0xff;
+ /* mark the range as done, and continue */
+ dorange = FALSE;
+ didrange = TRUE;
+ continue;
+ }
i = d - SvPVX(sv); /* remember current offset */
SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
d = SvPVX(sv) + i; /* refresh d after realloc */
@@ -1466,7 +1477,7 @@ S_scan_const(pTHX_ char *start)
char *src, *dst;
d = SvGROW(sv,
- SvCUR(sv) + hicount + 1) +
+ SvLEN(sv) + hicount + 1) +
(d - old_pvx);
src = d - 1;
@@ -1539,7 +1550,7 @@ S_scan_const(pTHX_ char *start)
if (len > e - s + 4) {
char *odest = SvPVX(sv);
- SvGROW(sv, (SvCUR(sv) + len - (e - s + 4)));
+ SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
d = SvPVX(sv) + (d - odest);
}
Copy(str, d, len, char);
@@ -6303,9 +6314,6 @@ S_scan_trans(pTHX_ char *start)
Perl_croak(aTHX_ "Transliteration replacement not terminated");
}
- New(803,tbl,256,short);
- o = newPVOP(OP_TRANS, 0, (char*)tbl);
-
complement = del = squash = 0;
while (strchr("cds", *s)) {
if (*s == 'c')
@@ -6316,6 +6324,9 @@ S_scan_trans(pTHX_ char *start)
squash = OPpTRANS_SQUASH;
s++;
}
+
+ New(803, tbl, complement&&!del?258:256, short);
+ o = newPVOP(OP_TRANS, 0, (char*)tbl);
o->op_private = del|squash|complement|
(DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
(DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);