diff options
author | Gurusamy Sarathy <gsar@cpan.org> | 2000-07-11 18:43:26 +0000 |
---|---|---|
committer | Gurusamy Sarathy <gsar@cpan.org> | 2000-07-11 18:43:26 +0000 |
commit | 036b4402dc24284de44ae733b52896d6fd4fbb77 (patch) | |
tree | d5e9e3da35cb3d3e73042376fa1e177e1605d2ae /doop.c | |
parent | 5e8d048a952c2fb1d246a3acf32bd1f15a6358fa (diff) | |
download | perl-036b4402dc24284de44ae733b52896d6fd4fbb77.tar.gz |
integrate cfgperl changes#6252..6260 into mainline
p4raw-link: @6260 on //depot/cfgperl: fc865a0069737312ca5ef9762fe8a9be7aa37747
p4raw-link: @6252 on //depot/cfgperl: 0e4dedf1581344244dfa297db1d00c01c5f821aa
p4raw-id: //depot/perl@6362
p4raw-integrated: from //depot/cfgperl@6361 'copy in'
t/pragma/constant.t (@5717..) t/op/pack.t t/pragma/warn/op
(@5996..) pp_proto.h (@6243..) t/op/my_stash.t (@6250..)
lib/IPC/Open3.pm (@6253..) 'ignore' t/pragma/warn/regcomp
(@6241..) lib/Exporter.pm (@6251..)
p4raw-integrated: from //depot/cfgperl@6260 'copy in' pp.c (@6217..)
pod/perlfunc.pod (@6248..)
p4raw-integrated: from //depot/cfgperl@6259 'copy in' MANIFEST
(@6250..)
p4raw-integrated: from //depot/cfgperl@6257 'copy in' op.c (@6228..)
'merge in' sv.c (@6244..)
p4raw-integrated: from //depot/cfgperl@6256 'copy in' doop.c (@6254..)
p4raw-integrated: from //depot/cfgperl@6254 'copy in' t/op/tr.t
(@6192..) 'ignore' embedvar.h objXSUB.h (@6243..) 'merge in'
embed.h (@6243..) embed.pl proto.h (@6250..)
Diffstat (limited to 'doop.c')
-rw-r--r-- | doop.c | 159 |
1 files changed, 112 insertions, 47 deletions
@@ -22,12 +22,13 @@ #endif STATIC I32 -S_do_trans_CC_simple(pTHX_ SV *sv) +S_do_trans_simple(pTHX_ SV *sv) /* SPC - OK */ { dTHR; U8 *s; U8 *send; I32 matches = 0; + I32 hasutf = SvUTF8(sv); STRLEN len; short *tbl; I32 ch; @@ -40,11 +41,15 @@ S_do_trans_CC_simple(pTHX_ SV *sv) send = s + len; while (s < send) { - if ((ch = tbl[*s]) >= 0) { - matches++; - *s = ch; - } + if (hasutf && *s & 0x80) + s+=UTF8SKIP(s); /* Given that we're here because tbl is !UTF8...*/ + else { + if ((ch = tbl[*s]) >= 0) { + matches++; + *s = ch; + } s++; + } } SvSETMAGIC(sv); @@ -52,12 +57,13 @@ S_do_trans_CC_simple(pTHX_ SV *sv) } STATIC I32 -S_do_trans_CC_count(pTHX_ SV *sv) +S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */ { dTHR; U8 *s; U8 *send; I32 matches = 0; + I32 hasutf = SvUTF8(sv); STRLEN len; short *tbl; @@ -69,21 +75,26 @@ S_do_trans_CC_count(pTHX_ SV *sv) send = s + len; while (s < send) { - if (tbl[*s] >= 0) - matches++; - s++; + if (hasutf && *s & 0x80) + s+=UTF8SKIP(s); + else { + if (tbl[*s] >= 0) + matches++; + s++; + } } return matches; } STATIC I32 -S_do_trans_CC_complex(pTHX_ SV *sv) +S_do_trans_complex(pTHX_ SV *sv)/* SPC - OK */ { dTHR; U8 *s; U8 *send; U8 *d; + I32 hasutf = SvUTF8(sv); I32 matches = 0; STRLEN len; short *tbl; @@ -101,29 +112,37 @@ S_do_trans_CC_complex(pTHX_ SV *sv) U8* p = send; while (s < send) { - if ((ch = tbl[*s]) >= 0) { - *d = ch; - matches++; - if (p == d - 1 && *p == *d) - matches--; - else - p = d++; - } - else if (ch == -1) /* -1 is unmapped character */ - *d++ = *s; /* -2 is delete character */ - s++; + if (hasutf && *s & 0x80) + s+=UTF8SKIP(s); + else { + if ((ch = tbl[*s]) >= 0) { + *d = ch; + matches++; + if (p == d - 1 && *p == *d) + matches--; + else + p = d++; + } + else if (ch == -1) /* -1 is unmapped character */ + *d++ = *s; /* -2 is delete character */ + s++; + } } } else { while (s < send) { - if ((ch = tbl[*s]) >= 0) { - *d = ch; - matches++; - d++; - } - else if (ch == -1) /* -1 is unmapped character */ - *d++ = *s; /* -2 is delete character */ - s++; + if (hasutf && *s & 0x80) + s+=UTF8SKIP(s); + else { + if ((ch = tbl[*s]) >= 0) { + *d = ch; + matches++; + d++; + } + else if (ch == -1) /* -1 is unmapped character */ + *d++ = *s; /* -2 is delete character */ + s++; + } } } matches += send - d; /* account for disappeared chars */ @@ -135,12 +154,14 @@ S_do_trans_CC_complex(pTHX_ SV *sv) } STATIC I32 -S_do_trans_UU_simple(pTHX_ SV *sv) +S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */ { dTHR; U8 *s; U8 *send; U8 *d; + U8 *start; + U8 *dstart; I32 matches = 0; STRLEN len; @@ -151,43 +172,83 @@ S_do_trans_UU_simple(pTHX_ SV *sv) UV extra = none + 1; UV final; UV uv; + I32 isutf; + I32 howmany; + isutf = SvUTF8(sv); s = (U8*)SvPV(sv, len); send = s + len; + start = s; svp = hv_fetch(hv, "FINAL", 5, FALSE); if (svp) final = SvUV(*svp); - d = s; + /* d needs to be bigger than s, in case e.g. upgrading is required */ + Newz(0, d, len*2+1, U8); + dstart = d; while (s < send) { if ((uv = swash_fetch(rv, s)) < none) { s += UTF8SKIP(s); matches++; + if (uv & 0x80 && !isutf) { + /* Sneaky-upgrade dstart...d */ + U8* new; + STRLEN len; + len = dstart - d; + new = bytes_to_utf8(dstart, &len); + Copy(new,dstart,len,U8*); + d = dstart + len; + isutf++; + } d = uv_to_utf8(d, uv); } else if (uv == none) { int i; - for (i = UTF8SKIP(s); i; i--) - *d++ = *s++; + i = UTF8SKIP(s); + if (i > 1 && !isutf) { + U8* new; + STRLEN len; + len = dstart - d; + new = bytes_to_utf8(dstart, &len); + Copy(new,dstart,len,U8*); + d = dstart + len; + isutf++; + } + while(i--) + *d++ = *s++; } else if (uv == extra) { - s += UTF8SKIP(s); + int i; + i = UTF8SKIP(s); + s += i; matches++; + if (i > 1 && !isutf) { + U8* new; + STRLEN len; + len = dstart - d; + new = bytes_to_utf8(dstart, &len); + Copy(new,dstart,len,U8*); + d = dstart + len; + isutf++; + } d = uv_to_utf8(d, final); } else s += UTF8SKIP(s); } *d = '\0'; - SvCUR_set(sv, d - (U8*)SvPVX(sv)); + SvPV_set(sv, dstart); + SvCUR_set(sv, d - dstart); SvSETMAGIC(sv); + if (isutf) + SvUTF8_on(sv); return matches; } STATIC I32 -S_do_trans_UU_count(pTHX_ SV *sv) +S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */ { dTHR; U8 *s; @@ -202,6 +263,8 @@ S_do_trans_UU_count(pTHX_ SV *sv) UV uv; s = (U8*)SvPV(sv, len); + if (!SvUTF8(sv)) + s = bytes_to_utf8(s, &len); send = s + len; while (s < send) { @@ -214,7 +277,7 @@ S_do_trans_UU_count(pTHX_ SV *sv) } STATIC I32 -S_do_trans_UU_complex(pTHX_ SV *sv) +S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */ { dTHR; U8 *s; @@ -403,6 +466,8 @@ Perl_do_trans(pTHX_ SV *sv) { dTHR; STRLEN len; + I32 hasutf = (PL_op->op_private & + (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)); if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) Perl_croak(aTHX_ PL_no_modify); @@ -417,24 +482,24 @@ Perl_do_trans(pTHX_ SV *sv) DEBUG_t( Perl_deb(aTHX_ "2.TBL\n")); - switch (PL_op->op_private & 63) { + switch (PL_op->op_private & ~hasutf & 63) { case 0: - if (SvUTF8(sv)) - return do_trans_UU_simple(sv); + if (hasutf) + return do_trans_simple_utf8(sv); else - return do_trans_CC_simple(sv); + return do_trans_simple(sv); case OPpTRANS_IDENTICAL: - if (SvUTF8(sv)) - return do_trans_UU_count(sv); + if (hasutf) + return do_trans_count_utf8(sv); else - return do_trans_CC_count(sv); + return do_trans_count(sv); default: - if (SvUTF8(sv)) - return do_trans_UU_complex(sv); /* could be UC or CU too */ + if (hasutf) + return do_trans_complex_utf8(sv); else - return do_trans_CC_complex(sv); + return do_trans_complex(sv); } } |