summaryrefslogtreecommitdiff
path: root/doop.c
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>2000-07-11 18:43:26 +0000
committerGurusamy Sarathy <gsar@cpan.org>2000-07-11 18:43:26 +0000
commit036b4402dc24284de44ae733b52896d6fd4fbb77 (patch)
treed5e9e3da35cb3d3e73042376fa1e177e1605d2ae /doop.c
parent5e8d048a952c2fb1d246a3acf32bd1f15a6358fa (diff)
downloadperl-036b4402dc24284de44ae733b52896d6fd4fbb77.tar.gz
integrate cfgperl changes#6252..6260 into mainline
p4raw-link: @6260 on //depot/cfgperl: fc865a0069737312ca5ef9762fe8a9be7aa37747 p4raw-link: @6252 on //depot/cfgperl: 0e4dedf1581344244dfa297db1d00c01c5f821aa p4raw-id: //depot/perl@6362 p4raw-integrated: from //depot/cfgperl@6361 'copy in' t/pragma/constant.t (@5717..) t/op/pack.t t/pragma/warn/op (@5996..) pp_proto.h (@6243..) t/op/my_stash.t (@6250..) lib/IPC/Open3.pm (@6253..) 'ignore' t/pragma/warn/regcomp (@6241..) lib/Exporter.pm (@6251..) p4raw-integrated: from //depot/cfgperl@6260 'copy in' pp.c (@6217..) pod/perlfunc.pod (@6248..) p4raw-integrated: from //depot/cfgperl@6259 'copy in' MANIFEST (@6250..) p4raw-integrated: from //depot/cfgperl@6257 'copy in' op.c (@6228..) 'merge in' sv.c (@6244..) p4raw-integrated: from //depot/cfgperl@6256 'copy in' doop.c (@6254..) p4raw-integrated: from //depot/cfgperl@6254 'copy in' t/op/tr.t (@6192..) 'ignore' embedvar.h objXSUB.h (@6243..) 'merge in' embed.h (@6243..) embed.pl proto.h (@6250..)
Diffstat (limited to 'doop.c')
-rw-r--r--doop.c159
1 files changed, 112 insertions, 47 deletions
diff --git a/doop.c b/doop.c
index fe2df464f5..7dc5a2b4e8 100644
--- a/doop.c
+++ b/doop.c
@@ -22,12 +22,13 @@
#endif
STATIC I32
-S_do_trans_CC_simple(pTHX_ SV *sv)
+S_do_trans_simple(pTHX_ SV *sv) /* SPC - OK */
{
dTHR;
U8 *s;
U8 *send;
I32 matches = 0;
+ I32 hasutf = SvUTF8(sv);
STRLEN len;
short *tbl;
I32 ch;
@@ -40,11 +41,15 @@ S_do_trans_CC_simple(pTHX_ SV *sv)
send = s + len;
while (s < send) {
- if ((ch = tbl[*s]) >= 0) {
- matches++;
- *s = ch;
- }
+ if (hasutf && *s & 0x80)
+ s+=UTF8SKIP(s); /* Given that we're here because tbl is !UTF8...*/
+ else {
+ if ((ch = tbl[*s]) >= 0) {
+ matches++;
+ *s = ch;
+ }
s++;
+ }
}
SvSETMAGIC(sv);
@@ -52,12 +57,13 @@ S_do_trans_CC_simple(pTHX_ SV *sv)
}
STATIC I32
-S_do_trans_CC_count(pTHX_ SV *sv)
+S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
{
dTHR;
U8 *s;
U8 *send;
I32 matches = 0;
+ I32 hasutf = SvUTF8(sv);
STRLEN len;
short *tbl;
@@ -69,21 +75,26 @@ S_do_trans_CC_count(pTHX_ SV *sv)
send = s + len;
while (s < send) {
- if (tbl[*s] >= 0)
- matches++;
- s++;
+ if (hasutf && *s & 0x80)
+ s+=UTF8SKIP(s);
+ else {
+ if (tbl[*s] >= 0)
+ matches++;
+ s++;
+ }
}
return matches;
}
STATIC I32
-S_do_trans_CC_complex(pTHX_ SV *sv)
+S_do_trans_complex(pTHX_ SV *sv)/* SPC - OK */
{
dTHR;
U8 *s;
U8 *send;
U8 *d;
+ I32 hasutf = SvUTF8(sv);
I32 matches = 0;
STRLEN len;
short *tbl;
@@ -101,29 +112,37 @@ S_do_trans_CC_complex(pTHX_ SV *sv)
U8* p = send;
while (s < send) {
- if ((ch = tbl[*s]) >= 0) {
- *d = ch;
- matches++;
- if (p == d - 1 && *p == *d)
- matches--;
- else
- p = d++;
- }
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s; /* -2 is delete character */
- s++;
+ if (hasutf && *s & 0x80)
+ s+=UTF8SKIP(s);
+ else {
+ if ((ch = tbl[*s]) >= 0) {
+ *d = ch;
+ matches++;
+ if (p == d - 1 && *p == *d)
+ matches--;
+ else
+ p = d++;
+ }
+ else if (ch == -1) /* -1 is unmapped character */
+ *d++ = *s; /* -2 is delete character */
+ s++;
+ }
}
}
else {
while (s < send) {
- if ((ch = tbl[*s]) >= 0) {
- *d = ch;
- matches++;
- d++;
- }
- else if (ch == -1) /* -1 is unmapped character */
- *d++ = *s; /* -2 is delete character */
- s++;
+ if (hasutf && *s & 0x80)
+ s+=UTF8SKIP(s);
+ else {
+ if ((ch = tbl[*s]) >= 0) {
+ *d = ch;
+ matches++;
+ d++;
+ }
+ else if (ch == -1) /* -1 is unmapped character */
+ *d++ = *s; /* -2 is delete character */
+ s++;
+ }
}
}
matches += send - d; /* account for disappeared chars */
@@ -135,12 +154,14 @@ S_do_trans_CC_complex(pTHX_ SV *sv)
}
STATIC I32
-S_do_trans_UU_simple(pTHX_ SV *sv)
+S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
{
dTHR;
U8 *s;
U8 *send;
U8 *d;
+ U8 *start;
+ U8 *dstart;
I32 matches = 0;
STRLEN len;
@@ -151,43 +172,83 @@ S_do_trans_UU_simple(pTHX_ SV *sv)
UV extra = none + 1;
UV final;
UV uv;
+ I32 isutf;
+ I32 howmany;
+ isutf = SvUTF8(sv);
s = (U8*)SvPV(sv, len);
send = s + len;
+ start = s;
svp = hv_fetch(hv, "FINAL", 5, FALSE);
if (svp)
final = SvUV(*svp);
- d = s;
+ /* d needs to be bigger than s, in case e.g. upgrading is required */
+ Newz(0, d, len*2+1, U8);
+ dstart = d;
while (s < send) {
if ((uv = swash_fetch(rv, s)) < none) {
s += UTF8SKIP(s);
matches++;
+ if (uv & 0x80 && !isutf) {
+ /* Sneaky-upgrade dstart...d */
+ U8* new;
+ STRLEN len;
+ len = dstart - d;
+ new = bytes_to_utf8(dstart, &len);
+ Copy(new,dstart,len,U8*);
+ d = dstart + len;
+ isutf++;
+ }
d = uv_to_utf8(d, uv);
}
else if (uv == none) {
int i;
- for (i = UTF8SKIP(s); i; i--)
- *d++ = *s++;
+ i = UTF8SKIP(s);
+ if (i > 1 && !isutf) {
+ U8* new;
+ STRLEN len;
+ len = dstart - d;
+ new = bytes_to_utf8(dstart, &len);
+ Copy(new,dstart,len,U8*);
+ d = dstart + len;
+ isutf++;
+ }
+ while(i--)
+ *d++ = *s++;
}
else if (uv == extra) {
- s += UTF8SKIP(s);
+ int i;
+ i = UTF8SKIP(s);
+ s += i;
matches++;
+ if (i > 1 && !isutf) {
+ U8* new;
+ STRLEN len;
+ len = dstart - d;
+ new = bytes_to_utf8(dstart, &len);
+ Copy(new,dstart,len,U8*);
+ d = dstart + len;
+ isutf++;
+ }
d = uv_to_utf8(d, final);
}
else
s += UTF8SKIP(s);
}
*d = '\0';
- SvCUR_set(sv, d - (U8*)SvPVX(sv));
+ SvPV_set(sv, dstart);
+ SvCUR_set(sv, d - dstart);
SvSETMAGIC(sv);
+ if (isutf)
+ SvUTF8_on(sv);
return matches;
}
STATIC I32
-S_do_trans_UU_count(pTHX_ SV *sv)
+S_do_trans_count_utf8(pTHX_ SV *sv)/* SPC - OK */
{
dTHR;
U8 *s;
@@ -202,6 +263,8 @@ S_do_trans_UU_count(pTHX_ SV *sv)
UV uv;
s = (U8*)SvPV(sv, len);
+ if (!SvUTF8(sv))
+ s = bytes_to_utf8(s, &len);
send = s + len;
while (s < send) {
@@ -214,7 +277,7 @@ S_do_trans_UU_count(pTHX_ SV *sv)
}
STATIC I32
-S_do_trans_UU_complex(pTHX_ SV *sv)
+S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
{
dTHR;
U8 *s;
@@ -403,6 +466,8 @@ Perl_do_trans(pTHX_ SV *sv)
{
dTHR;
STRLEN len;
+ I32 hasutf = (PL_op->op_private &
+ (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF));
if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL))
Perl_croak(aTHX_ PL_no_modify);
@@ -417,24 +482,24 @@ Perl_do_trans(pTHX_ SV *sv)
DEBUG_t( Perl_deb(aTHX_ "2.TBL\n"));
- switch (PL_op->op_private & 63) {
+ switch (PL_op->op_private & ~hasutf & 63) {
case 0:
- if (SvUTF8(sv))
- return do_trans_UU_simple(sv);
+ if (hasutf)
+ return do_trans_simple_utf8(sv);
else
- return do_trans_CC_simple(sv);
+ return do_trans_simple(sv);
case OPpTRANS_IDENTICAL:
- if (SvUTF8(sv))
- return do_trans_UU_count(sv);
+ if (hasutf)
+ return do_trans_count_utf8(sv);
else
- return do_trans_CC_count(sv);
+ return do_trans_count(sv);
default:
- if (SvUTF8(sv))
- return do_trans_UU_complex(sv); /* could be UC or CU too */
+ if (hasutf)
+ return do_trans_complex_utf8(sv);
else
- return do_trans_CC_complex(sv);
+ return do_trans_complex(sv);
}
}