diff options
author | David Mitchell <davem@iabyn.com> | 2018-01-12 12:00:30 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2018-01-19 13:45:20 +0000 |
commit | c1048fcffff9a2b16ece33136c56febc7e5e4396 (patch) | |
tree | 14410bc416f113a61d6621f51978f53c363482b9 | |
parent | 5f63644886c09045a5d86d169abdfea9e804bce7 (diff) | |
download | perl-c1048fcffff9a2b16ece33136c56febc7e5e4396.tar.gz |
add two structs for OP_TRANS
Originally, the op_pv of an OP_TRANS op pointed to a 256-slot array of
shorts, which contained the translations. However, in the presence of
tr///c, extra information needs to be stored to handle utf8 strings.
The 256 slot array was extended, with slot 0x100 holding a length,
and slots 0x101 holding some extra chars.
This has made things a bit messy, so this commit adds two structs,
one being an array of 256 shorts, and the other being the same but with
some extra fields. So for example tbl->[0x100] has been replaced with
tbl->excess_len.
This commit should make no functional difference, but will allow us
shortly to fix a bug by changing the type of the excess_len field from
short to something bigger, for example.
-rw-r--r-- | doop.c | 51 | ||||
-rw-r--r-- | op.c | 45 | ||||
-rw-r--r-- | op.h | 17 |
3 files changed, 67 insertions, 46 deletions
@@ -42,7 +42,7 @@ S_do_trans_simple(pTHX_ SV * const sv) STRLEN len; U8 *s = (U8*)SvPV_nomg(sv,len); U8 * const send = s+len; - const short * const tbl = (short*)cPVOP->op_pv; + const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv; PERL_ARGS_ASSERT_DO_TRANS_SIMPLE; @@ -52,7 +52,7 @@ S_do_trans_simple(pTHX_ SV * const sv) /* First, take care of non-UTF-8 input strings, because they're easy */ if (!SvUTF8(sv)) { while (s < send) { - const I32 ch = tbl[*s]; + const I32 ch = tbl->map[*s]; if (ch >= 0) { matches++; *s = (U8)ch; @@ -78,7 +78,7 @@ S_do_trans_simple(pTHX_ SV * const sv) /* Need to check this, otherwise 128..255 won't match */ const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); - if (c < 0x100 && (ch = tbl[c]) >= 0) { + if (c < 0x100 && (ch = tbl->map[c]) >= 0) { matches++; d = uvchr_to_utf8(d, ch); s += ulen; @@ -121,7 +121,7 @@ S_do_trans_count(pTHX_ SV * const sv) const U8 *s = (const U8*)SvPV_nomg_const(sv, len); const U8 * const send = s + len; I32 matches = 0; - const short * const tbl = (short*)cPVOP->op_pv; + const OPtrans_map * const tbl = (OPtrans_map*)cPVOP->op_pv; PERL_ARGS_ASSERT_DO_TRANS_COUNT; @@ -130,7 +130,7 @@ S_do_trans_count(pTHX_ SV * const sv) if (!SvUTF8(sv)) { while (s < send) { - if (tbl[*s++] >= 0) + if (tbl->map[*s++] >= 0) matches++; } } @@ -140,7 +140,7 @@ S_do_trans_count(pTHX_ SV * const sv) STRLEN ulen; const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); if (c < 0x100) { - if (tbl[c] >= 0) + if (tbl->map[c] >= 0) matches++; } else if (complement) matches++; @@ -166,11 +166,11 @@ S_do_trans_complex(pTHX_ SV * const sv) U8 *s = (U8*)SvPV_nomg(sv, len); U8 * const send = s+len; I32 matches = 0; - const short * const tbl = (short*)cPVOP->op_pv; + const OPtrans_map_ex * const extbl = (OPtrans_map_ex*)cPVOP->op_pv; PERL_ARGS_ASSERT_DO_TRANS_COMPLEX; - if (!tbl) + if (!extbl) Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); if (!SvUTF8(sv)) { @@ -180,7 +180,7 @@ S_do_trans_complex(pTHX_ SV * const sv) if (PL_op->op_private & OPpTRANS_SQUASH) { const U8* p = send; while (s < send) { - const I32 ch = tbl[*s]; + const I32 ch = extbl->map[*s]; if (ch >= 0) { *d = (U8)ch; matches++; @@ -196,7 +196,7 @@ S_do_trans_complex(pTHX_ SV * const sv) } else { while (s < send) { - const I32 ch = tbl[*s]; + const I32 ch = extbl->map[*s]; if (ch >= 0) { matches++; *d++ = (U8)ch; @@ -227,7 +227,7 @@ S_do_trans_complex(pTHX_ SV * const sv) if (complement) /* number of replacement chars in excess of any 0x00..0xff * search characters */ - excess = (SSize_t)tbl[0x100]; + excess = (SSize_t)extbl->excess_len; if (PL_op->op_private & OPpTRANS_SQUASH) { UV pch = 0xfeedface; @@ -245,16 +245,19 @@ S_do_trans_complex(pTHX_ SV * const sv) } else { /* use the implicit 0x100..0x7fffffff search range */ + UV comp100 = comp - 0x100; matches++; ch = del /* setting ch to pch forces char to be deleted */ - ? ((excess >= (IV)comp - 0xff) ? (UV)tbl[comp+2] - : pch ) + ? ((excess > (IV)comp100) + ? (UV)extbl->map_ex[comp100] + : pch ) - : ( (excess == -1) ? comp : + : ( (excess == -1) ? comp : (UV)(( excess == 0 - || excess < (IV)comp - 0xff) ? tbl[0x101] - : tbl[comp+2] + || excess <= (IV)comp100) + ? extbl->repeat_char + : extbl->map_ex[comp100] ) ); if (ch != pch) { @@ -265,7 +268,7 @@ S_do_trans_complex(pTHX_ SV * const sv) continue; } } - else if ((sch = tbl[comp]) >= 0) { + else if ((sch = extbl->map[comp]) >= 0) { ch = (UV)sch; matches++; if (ch != pch) { @@ -299,10 +302,11 @@ S_do_trans_complex(pTHX_ SV * const sv) } else { /* use the implicit 0x100..0x7fffffff search range */ + UV comp100 = comp - 0x100; matches++; if (del) { - if (excess >= (IV)comp - 0xff) { - ch = (UV)tbl[comp+2]; + if (excess > (IV)comp100) { + ch = (UV)extbl->map_ex[comp100]; d = uvchr_to_utf8(d, ch); } } @@ -310,14 +314,15 @@ S_do_trans_complex(pTHX_ SV * const sv) /* tr/...//c should call S_do_trans_count * instead */ assert(excess != -1); - ch = (UV)( excess == 0 - || excess < (IV)comp-0xff) ? tbl[0x101] - : tbl[comp+2]; + ch = ( excess == 0 + || excess <= (IV)comp100) + ? (UV)extbl->repeat_char + : (UV)extbl->map_ex[comp100]; d = uvchr_to_utf8(d, ch); } } } - else if ((sch = tbl[comp]) >= 0) { + else if ((sch = extbl->map[comp]) >= 0) { d = uvchr_to_utf8(d, (UV)sch); matches++; } @@ -6343,7 +6343,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) I32 i; I32 j; I32 grows = 0; - short *tbl; + OPtrans_map *tbl; const I32 complement = o->op_private & OPpTRANS_COMPLEMENT; const I32 squash = o->op_private & OPpTRANS_SQUASH; @@ -6629,11 +6629,9 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) * The toker will have already expanded char ranges in t and r. */ - tbl = (short*)PerlMemShared_calloc( - /* one slot for 'extra len' count and one slot - * for storing of last replacement char */ - complement ? 258 : 256, - sizeof(short)); + tbl = (OPtrans_map*)PerlMemShared_calloc( + complement ? sizeof(OPtrans_map_ex) : sizeof(OPtrans_map), + sizeof(char)); cPVOPo->op_pv = (char*)tbl; if (complement) { @@ -6641,21 +6639,21 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) * with a search char) replacement chars (so j <= rlen always) */ for (i = 0; i < (I32)tlen; i++) - tbl[t[i]] = -1; + tbl->map[t[i]] = -1; for (i = 0, j = 0; i < 256; i++) { - if (!tbl[i]) { + if (!tbl->map[i]) { if (j == (I32)rlen) { if (del) - tbl[i] = -2; + tbl->map[i] = -2; else if (rlen) - tbl[i] = r[j-1]; + tbl->map[i] = r[j-1]; else - tbl[i] = (short)i; + tbl->map[i] = (short)i; } else { if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j])) grows = 1; - tbl[i] = r[j++]; + tbl->map[i] = r[j++]; } } } @@ -6676,17 +6674,18 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) */ short repeat_char; SSize_t excess = rlen - (SSize_t)j; + OPtrans_map_ex *extbl = (OPtrans_map_ex*)tbl; if (excess) { /* More replacement chars than search chars: * store excess replacement chars at end of main table. */ - tbl = (short *) PerlMemShared_realloc(tbl, - (0x102+excess) * sizeof(short)); - cPVOPo->op_pv = (char*)tbl; + extbl = (OPtrans_map_ex *) PerlMemShared_realloc(extbl, + sizeof(OPtrans_map_ex) + excess * sizeof(short)); + cPVOPo->op_pv = (char*)extbl; for (i = 0; i < (I32)excess; i++) - tbl[0x102+i] = r[j+i]; + extbl->map_ex[i] = r[j+i]; repeat_char = r[rlen-1]; } else { @@ -6703,8 +6702,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) o->op_private |= OPpTRANS_IDENTICAL; } } - tbl[0x100] = (short)excess; /* excess char count */ - tbl[0x101] = (short)repeat_char; /* repeated replace char */ + extbl->excess_len = (short)excess; /* excess char count */ + extbl->repeat_char = (short)repeat_char; /* repeated replace char */ } } else { @@ -6717,21 +6716,21 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) o->op_private |= OPpTRANS_IDENTICAL; } for (i = 0; i < 256; i++) - tbl[i] = -1; + tbl->map[i] = -1; for (i = 0, j = 0; i < (I32)tlen; i++,j++) { if (j >= (I32)rlen) { if (del) { - if (tbl[t[i]] == -1) - tbl[t[i]] = -2; + if (tbl->map[t[i]] == -1) + tbl->map[t[i]] = -2; continue; } --j; } - if (tbl[t[i]] == -1) { + if (tbl->map[t[i]] == -1) { if ( UVCHR_IS_INVARIANT(t[i]) && ! UVCHR_IS_INVARIANT(r[j])) grows = 1; - tbl[t[i]] = r[j]; + tbl->map[t[i]] = r[j]; } } } @@ -627,6 +627,23 @@ typedef enum { #define ref(o, type) doref(o, type, TRUE) #endif + +/* basic and extended translation tables attached to OP_TRANS/OP_TRANSR ops */ + +typedef struct { + short map[256]; +} OPtrans_map; + +/* used in the presence of tr///c to record any replacement chars that + * are paired with the implicit 0x100..0x7fffffff search chars */ +typedef struct { + short map[256]; + short excess_len; /* number of entries in map_ex[] */ + short repeat_char; + short map_ex[1]; /* Unwarranted chumminess */ +} OPtrans_map_ex; + + /* =head1 Optree Manipulation Functions |