summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2018-01-12 12:00:30 +0000
committerDavid Mitchell <davem@iabyn.com>2018-01-19 13:45:20 +0000
commitc1048fcffff9a2b16ece33136c56febc7e5e4396 (patch)
tree14410bc416f113a61d6621f51978f53c363482b9
parent5f63644886c09045a5d86d169abdfea9e804bce7 (diff)
downloadperl-c1048fcffff9a2b16ece33136c56febc7e5e4396.tar.gz
add two structs for OP_TRANS
Originally, the op_pv of an OP_TRANS op pointed to a 256-slot array of shorts, which contained the translations. However, in the presence of tr///c, extra information needs to be stored to handle utf8 strings. The 256 slot array was extended, with slot 0x100 holding a length, and slots 0x101 holding some extra chars. This has made things a bit messy, so this commit adds two structs, one being an array of 256 shorts, and the other being the same but with some extra fields. So for example tbl->[0x100] has been replaced with tbl->excess_len. This commit should make no functional difference, but will allow us shortly to fix a bug by changing the type of the excess_len field from short to something bigger, for example.
-rw-r--r--doop.c51
-rw-r--r--op.c45
-rw-r--r--op.h17
3 files changed, 67 insertions, 46 deletions
diff --git a/doop.c b/doop.c
index 58a49b0705..fa908cf4fa 100644
--- a/doop.c
+++ b/doop.c
@@ -42,7 +42,7 @@ S_do_trans_simple(pTHX_ SV * const sv)
STRLEN len;
U8 *s = (U8*)SvPV_nomg(sv,len);
U8 * const send = s+len;
- const short * const tbl = (short*)cPVOP->op_pv;
+ const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv;
PERL_ARGS_ASSERT_DO_TRANS_SIMPLE;
@@ -52,7 +52,7 @@ S_do_trans_simple(pTHX_ SV * const sv)
/* First, take care of non-UTF-8 input strings, because they're easy */
if (!SvUTF8(sv)) {
while (s < send) {
- const I32 ch = tbl[*s];
+ const I32 ch = tbl->map[*s];
if (ch >= 0) {
matches++;
*s = (U8)ch;
@@ -78,7 +78,7 @@ S_do_trans_simple(pTHX_ SV * const sv)
/* Need to check this, otherwise 128..255 won't match */
const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
- if (c < 0x100 && (ch = tbl[c]) >= 0) {
+ if (c < 0x100 && (ch = tbl->map[c]) >= 0) {
matches++;
d = uvchr_to_utf8(d, ch);
s += ulen;
@@ -121,7 +121,7 @@ S_do_trans_count(pTHX_ SV * const sv)
const U8 *s = (const U8*)SvPV_nomg_const(sv, len);
const U8 * const send = s + len;
I32 matches = 0;
- const short * const tbl = (short*)cPVOP->op_pv;
+ const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv;
PERL_ARGS_ASSERT_DO_TRANS_COUNT;
@@ -130,7 +130,7 @@ S_do_trans_count(pTHX_ SV * const sv)
if (!SvUTF8(sv)) {
while (s < send) {
- if (tbl[*s++] >= 0)
+ if (tbl->map[*s++] >= 0)
matches++;
}
}
@@ -140,7 +140,7 @@ S_do_trans_count(pTHX_ SV * const sv)
STRLEN ulen;
const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT);
if (c < 0x100) {
- if (tbl[c] >= 0)
+ if (tbl->map[c] >= 0)
matches++;
} else if (complement)
matches++;
@@ -166,11 +166,11 @@ S_do_trans_complex(pTHX_ SV * const sv)
U8 *s = (U8*)SvPV_nomg(sv, len);
U8 * const send = s+len;
I32 matches = 0;
- const short * const tbl = (short*)cPVOP->op_pv;
+ const OPtrans_map_ex * const extbl = (OPtrans_map_ex*)cPVOP->op_pv;
PERL_ARGS_ASSERT_DO_TRANS_COMPLEX;
- if (!tbl)
+ if (!extbl)
Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__);
if (!SvUTF8(sv)) {
@@ -180,7 +180,7 @@ S_do_trans_complex(pTHX_ SV * const sv)
if (PL_op->op_private & OPpTRANS_SQUASH) {
const U8* p = send;
while (s < send) {
- const I32 ch = tbl[*s];
+ const I32 ch = extbl->map[*s];
if (ch >= 0) {
*d = (U8)ch;
matches++;
@@ -196,7 +196,7 @@ S_do_trans_complex(pTHX_ SV * const sv)
}
else {
while (s < send) {
- const I32 ch = tbl[*s];
+ const I32 ch = extbl->map[*s];
if (ch >= 0) {
matches++;
*d++ = (U8)ch;
@@ -227,7 +227,7 @@ S_do_trans_complex(pTHX_ SV * const sv)
if (complement)
/* number of replacement chars in excess of any 0x00..0xff
* search characters */
- excess = (SSize_t)tbl[0x100];
+ excess = (SSize_t)extbl->excess_len;
if (PL_op->op_private & OPpTRANS_SQUASH) {
UV pch = 0xfeedface;
@@ -245,16 +245,19 @@ S_do_trans_complex(pTHX_ SV * const sv)
}
else {
/* use the implicit 0x100..0x7fffffff search range */
+ UV comp100 = comp - 0x100;
matches++;
ch = del
/* setting ch to pch forces char to be deleted */
- ? ((excess >= (IV)comp - 0xff) ? (UV)tbl[comp+2]
- : pch )
+ ? ((excess > (IV)comp100)
+ ? (UV)extbl->map_ex[comp100]
+ : pch )
- : ( (excess == -1) ? comp :
+ : ( (excess == -1) ? comp :
(UV)(( excess == 0
- || excess < (IV)comp - 0xff) ? tbl[0x101]
- : tbl[comp+2]
+ || excess <= (IV)comp100)
+ ? extbl->repeat_char
+ : extbl->map_ex[comp100]
)
);
if (ch != pch) {
@@ -265,7 +268,7 @@ S_do_trans_complex(pTHX_ SV * const sv)
continue;
}
}
- else if ((sch = tbl[comp]) >= 0) {
+ else if ((sch = extbl->map[comp]) >= 0) {
ch = (UV)sch;
matches++;
if (ch != pch) {
@@ -299,10 +302,11 @@ S_do_trans_complex(pTHX_ SV * const sv)
}
else {
/* use the implicit 0x100..0x7fffffff search range */
+ UV comp100 = comp - 0x100;
matches++;
if (del) {
- if (excess >= (IV)comp - 0xff) {
- ch = (UV)tbl[comp+2];
+ if (excess > (IV)comp100) {
+ ch = (UV)extbl->map_ex[comp100];
d = uvchr_to_utf8(d, ch);
}
}
@@ -310,14 +314,15 @@ S_do_trans_complex(pTHX_ SV * const sv)
/* tr/...//c should call S_do_trans_count
* instead */
assert(excess != -1);
- ch = (UV)( excess == 0
- || excess < (IV)comp-0xff) ? tbl[0x101]
- : tbl[comp+2];
+ ch = ( excess == 0
+ || excess <= (IV)comp100)
+ ? (UV)extbl->repeat_char
+ : (UV)extbl->map_ex[comp100];
d = uvchr_to_utf8(d, ch);
}
}
}
- else if ((sch = tbl[comp]) >= 0) {
+ else if ((sch = extbl->map[comp]) >= 0) {
d = uvchr_to_utf8(d, (UV)sch);
matches++;
}
diff --git a/op.c b/op.c
index b3c3336030..4fb46e1612 100644
--- a/op.c
+++ b/op.c
@@ -6343,7 +6343,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
I32 i;
I32 j;
I32 grows = 0;
- short *tbl;
+ OPtrans_map *tbl;
const I32 complement = o->op_private & OPpTRANS_COMPLEMENT;
const I32 squash = o->op_private & OPpTRANS_SQUASH;
@@ -6629,11 +6629,9 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
* The toker will have already expanded char ranges in t and r.
*/
- tbl = (short*)PerlMemShared_calloc(
- /* one slot for 'extra len' count and one slot
- * for storing of last replacement char */
- complement ? 258 : 256,
- sizeof(short));
+ tbl = (OPtrans_map*)PerlMemShared_calloc(
+ complement ? sizeof(OPtrans_map_ex) : sizeof(OPtrans_map),
+ sizeof(char));
cPVOPo->op_pv = (char*)tbl;
if (complement) {
@@ -6641,21 +6639,21 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
* with a search char) replacement chars (so j <= rlen always)
*/
for (i = 0; i < (I32)tlen; i++)
- tbl[t[i]] = -1;
+ tbl->map[t[i]] = -1;
for (i = 0, j = 0; i < 256; i++) {
- if (!tbl[i]) {
+ if (!tbl->map[i]) {
if (j == (I32)rlen) {
if (del)
- tbl[i] = -2;
+ tbl->map[i] = -2;
else if (rlen)
- tbl[i] = r[j-1];
+ tbl->map[i] = r[j-1];
else
- tbl[i] = (short)i;
+ tbl->map[i] = (short)i;
}
else {
if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j]))
grows = 1;
- tbl[i] = r[j++];
+ tbl->map[i] = r[j++];
}
}
}
@@ -6676,17 +6674,18 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
*/
short repeat_char;
SSize_t excess = rlen - (SSize_t)j;
+ OPtrans_map_ex *extbl = (OPtrans_map_ex*)tbl;
if (excess) {
/* More replacement chars than search chars:
* store excess replacement chars at end of main table.
*/
- tbl = (short *) PerlMemShared_realloc(tbl,
- (0x102+excess) * sizeof(short));
- cPVOPo->op_pv = (char*)tbl;
+ extbl = (OPtrans_map_ex *) PerlMemShared_realloc(extbl,
+ sizeof(OPtrans_map_ex) + excess * sizeof(short));
+ cPVOPo->op_pv = (char*)extbl;
for (i = 0; i < (I32)excess; i++)
- tbl[0x102+i] = r[j+i];
+ extbl->map_ex[i] = r[j+i];
repeat_char = r[rlen-1];
}
else {
@@ -6703,8 +6702,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
o->op_private |= OPpTRANS_IDENTICAL;
}
}
- tbl[0x100] = (short)excess; /* excess char count */
- tbl[0x101] = (short)repeat_char; /* repeated replace char */
+ extbl->excess_len = (short)excess; /* excess char count */
+ extbl->repeat_char = (short)repeat_char; /* repeated replace char */
}
}
else {
@@ -6717,21 +6716,21 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
o->op_private |= OPpTRANS_IDENTICAL;
}
for (i = 0; i < 256; i++)
- tbl[i] = -1;
+ tbl->map[i] = -1;
for (i = 0, j = 0; i < (I32)tlen; i++,j++) {
if (j >= (I32)rlen) {
if (del) {
- if (tbl[t[i]] == -1)
- tbl[t[i]] = -2;
+ if (tbl->map[t[i]] == -1)
+ tbl->map[t[i]] = -2;
continue;
}
--j;
}
- if (tbl[t[i]] == -1) {
+ if (tbl->map[t[i]] == -1) {
if ( UVCHR_IS_INVARIANT(t[i])
&& ! UVCHR_IS_INVARIANT(r[j]))
grows = 1;
- tbl[t[i]] = r[j];
+ tbl->map[t[i]] = r[j];
}
}
}
diff --git a/op.h b/op.h
index eb62c946fc..aeee339c14 100644
--- a/op.h
+++ b/op.h
@@ -627,6 +627,23 @@ typedef enum {
#define ref(o, type) doref(o, type, TRUE)
#endif
+
+/* basic and extended translation tables attached to OP_TRANS/OP_TRANSR ops */
+
+typedef struct {
+ short map[256];
+} OPtrans_map;
+
+/* used in the presence of tr///c to record any replacement chars that
+ * are paired with the implicit 0x100..0x7fffffff search chars */
+typedef struct {
+ short map[256];
+ short excess_len; /* number of entries in map_ex[] */
+ short repeat_char;
+ short map_ex[1]; /* Unwarranted chumminess */
+} OPtrans_map_ex;
+
+
/*
=head1 Optree Manipulation Functions