summaryrefslogtreecommitdiff
path: root/doop.c
diff options
context:
space:
mode:
authorGurusamy Sarathy <gsar@cpan.org>2000-07-11 18:49:43 +0000
committerGurusamy Sarathy <gsar@cpan.org>2000-07-11 18:49:43 +0000
commitb250498faaf6fbd04315d2b632649596e2498c42 (patch)
tree25f6de9511b199debdbf56b7ff77e5c37b5288ef /doop.c
parent036b4402dc24284de44ae733b52896d6fd4fbb77 (diff)
downloadperl-b250498faaf6fbd04315d2b632649596e2498c42.tar.gz
integrate cfgperl changes#6261..6266 into mainline
p4raw-link: @6266 on //depot/cfgperl: a009ce76c9b4ddbde44a58eab3fe27d331cf27fe p4raw-link: @6261 on //depot/cfgperl: 27d76ecff97d0a9449f569d789504cc8b69a6d01 p4raw-id: //depot/perl@6363 p4raw-integrated: from //depot/cfgperl@6362 'copy in' README.epoc epoc/createpkg.pl epoc/epocish.c (@5586..) epoc/epocish.h t/comp/require.t (@5639..) cygwin/Makefile.SHs (@6096..) ext/POSIX/POSIX.pm (@6140..) hints/bsdos.sh (@6156..) epoc/config.sh (@6168..) ext/POSIX/POSIX.xs (@6198..) p4raw-integrated: from //depot/cfgperl@6265 'copy in' ext/POSIX/POSIX.pod (@5586..) p4raw-integrated: from //depot/cfgperl@6263 'copy in' doop.c (@6256..) p4raw-integrated: from //depot/cfgperl@6261 'merge in' pod/perldiag.pod (@6206..) toke.c (@6250..)
Diffstat (limited to 'doop.c')
-rw-r--r--doop.c178
1 files changed, 72 insertions, 106 deletions
diff --git a/doop.c b/doop.c
index 7dc5a2b4e8..4a7430989c 100644
--- a/doop.c
+++ b/doop.c
@@ -21,14 +21,27 @@
#endif
#endif
+
+#define HALF_UPGRADE(start,end) { \
+ U8* new; \
+ STRLEN len; \
+ len = end-start; \
+ new = bytes_to_utf8(start, &len); \
+ Copy(new,start,len,U8*); \
+ end = start + len; \
+ }
+
+
STATIC I32
-S_do_trans_simple(pTHX_ SV *sv) /* SPC - OK */
+S_do_trans_simple(pTHX_ SV *sv)
{
dTHR;
U8 *s;
+ U8 *d;
U8 *send;
+ U8 *dstart;
I32 matches = 0;
- I32 hasutf = SvUTF8(sv);
+ I32 sutf = SvUTF8(sv);
STRLEN len;
short *tbl;
I32 ch;
@@ -40,19 +53,46 @@ S_do_trans_simple(pTHX_ SV *sv) /* SPC - OK */
s = (U8*)SvPV(sv, len);
send = s + len;
+ /* First, take care of non-UTF8 input strings, because they're easy */
+ if (!sutf) {
while (s < send) {
- if (hasutf && *s & 0x80)
- s+=UTF8SKIP(s); /* Given that we're here because tbl is !UTF8...*/
- else {
if ((ch = tbl[*s]) >= 0) {
matches++;
- *s = ch;
- }
+ *s++ = ch;
+ } else
s++;
}
- }
SvSETMAGIC(sv);
+ return matches;
+ }
+ /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */
+ Newz(0, d, len*2+1, U8);
+ dstart = d;
+ while (s < send) {
+ I32 ulen;
+ short c;
+
+ ulen = 1;
+ /* Need to check this, otherwise 128..255 won't match */
+ c = utf8_to_uv(s, &ulen);
+ if (c < 0x100 && (ch = tbl[(short)c]) >= 0) {
+ matches++;
+ if (ch < 0x80)
+ *d++ = ch;
+ else
+ d = uv_to_utf8(d,ch);
+ s += ulen;
+ } else { /* No match -> copy */
+ while (ulen--)
+ *d++ = *s++;
+ }
+ }
+ *d='\0';
+ sv_setpvn(sv, dstart, d - dstart);
+ SvUTF8_on(sv);
+ SvLEN_set(sv, 2*len+1);
+ SvSETMAGIC(sv);
return matches;
}
@@ -78,9 +118,16 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
if (hasutf && *s & 0x80)
s+=UTF8SKIP(s);
else {
- if (tbl[*s] >= 0)
+ UV c;
+ I32 ulen;
+ ulen = 1;
+ if (hasutf)
+ c = utf8_to_uv(s,&ulen);
+ else
+ c = *s;
+ if (c < 0x100 && tbl[c] >= 0)
matches++;
- s++;
+ s+=ulen;
}
}
@@ -88,7 +135,7 @@ S_do_trans_count(pTHX_ SV *sv)/* SPC - OK */
}
STATIC I32
-S_do_trans_complex(pTHX_ SV *sv)/* SPC - OK */
+S_do_trans_complex(pTHX_ SV *sv)/* SPC - NOT OK */
{
dTHR;
U8 *s;
@@ -191,30 +238,15 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
if ((uv = swash_fetch(rv, s)) < none) {
s += UTF8SKIP(s);
matches++;
- if (uv & 0x80 && !isutf) {
- /* Sneaky-upgrade dstart...d */
- U8* new;
- STRLEN len;
- len = dstart - d;
- new = bytes_to_utf8(dstart, &len);
- Copy(new,dstart,len,U8*);
- d = dstart + len;
- isutf++;
- }
+ if (uv & 0x80 && !isutf++)
+ HALF_UPGRADE(dstart,d);
d = uv_to_utf8(d, uv);
}
else if (uv == none) {
int i;
i = UTF8SKIP(s);
- if (i > 1 && !isutf) {
- U8* new;
- STRLEN len;
- len = dstart - d;
- new = bytes_to_utf8(dstart, &len);
- Copy(new,dstart,len,U8*);
- d = dstart + len;
- isutf++;
- }
+ if (i > 1 && !isutf++)
+ HALF_UPGRADE(dstart,d);
while(i--)
*d++ = *s++;
}
@@ -223,23 +255,15 @@ S_do_trans_simple_utf8(pTHX_ SV *sv)/* SPC - OK */
i = UTF8SKIP(s);
s += i;
matches++;
- if (i > 1 && !isutf) {
- U8* new;
- STRLEN len;
- len = dstart - d;
- new = bytes_to_utf8(dstart, &len);
- Copy(new,dstart,len,U8*);
- d = dstart + len;
- isutf++;
- }
+ if (i > 1 && !isutf++)
+ HALF_UPGRADE(dstart,d);
d = uv_to_utf8(d, final);
}
else
s += UTF8SKIP(s);
}
*d = '\0';
- SvPV_set(sv, dstart);
- SvCUR_set(sv, d - dstart);
+ sv_setpvn(sv, dstart, d - dstart);
SvSETMAGIC(sv);
if (isutf)
SvUTF8_on(sv);
@@ -285,8 +309,6 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
U8 *d;
I32 matches = 0;
I32 squash = PL_op->op_private & OPpTRANS_SQUASH;
- I32 from_utf = PL_op->op_private & OPpTRANS_FROM_UTF;
- I32 to_utf = PL_op->op_private & OPpTRANS_TO_UTF;
I32 del = PL_op->op_private & OPpTRANS_DELETE;
SV* rv = (SV*)cSVOP->op_sv;
HV* hv = (HV*)SvRV(rv);
@@ -297,6 +319,7 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
UV uv;
STRLEN len;
U8 *dst;
+ I32 isutf = SvUTF8(sv);
s = (U8*)SvPV(sv, len);
send = s + len;
@@ -305,27 +328,14 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
if (svp)
final = SvUV(*svp);
- if (PL_op->op_private & OPpTRANS_GROWS) {
- I32 bits = 16;
-
- svp = hv_fetch(hv, "BITS", 4, FALSE);
- if (svp)
- bits = (I32)SvIV(*svp);
-
- Newz(801, d, len * (bits >> 3) + 1, U8);
+ Newz(0, d, len*2+1, U8);
dst = d;
- }
- else {
- d = s;
- dst = 0;
- }
if (squash) {
UV puv = 0xfeedface;
while (s < send) {
- if (from_utf) {
+ if (SvUTF8(sv))
uv = swash_fetch(rv, s);
- }
else {
U8 tmpbuf[2];
uv = *s++;
@@ -337,63 +347,42 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
}
uv = swash_fetch(rv, tmpbuf);
}
+
if (uv < none) {
matches++;
if (uv != puv) {
- if (uv >= 0x80 && to_utf)
+ if (uv & 0x80 && !isutf++)
+ HALF_UPGRADE(dst,d);
d = uv_to_utf8(d, uv);
- else
- *d++ = (U8)uv;
puv = uv;
}
- if (from_utf)
s += UTF8SKIP(s);
continue;
}
else if (uv == none) { /* "none" is unmapped character */
- if (from_utf) {
- if (*s < 0x80)
- *d++ = *s++;
- else if (to_utf) {
- int i;
- for (i = UTF8SKIP(s); i; --i)
- *d++ = *s++;
- }
- else {
I32 ulen;
*d++ = (U8)utf8_to_uv(s, &ulen);
s += ulen;
- }
- }
- else { /* must be to_utf only */
- d = uv_to_utf8(d, s[-1]);
- }
puv = 0xfeedface;
continue;
}
else if (uv == extra && !del) {
matches++;
if (uv != puv) {
- if (final >= 0x80 && to_utf)
d = uv_to_utf8(d, final);
- else
- *d++ = (U8)final;
puv = final;
}
- if (from_utf)
s += UTF8SKIP(s);
continue;
}
matches++; /* "none+1" is delete character */
- if (from_utf)
s += UTF8SKIP(s);
}
}
else {
while (s < send) {
- if (from_utf) {
+ if (SvUTF8(sv))
uv = swash_fetch(rv, s);
- }
else {
U8 tmpbuf[2];
uv = *s++;
@@ -407,46 +396,23 @@ S_do_trans_complex_utf8(pTHX_ SV *sv) /* SPC - NOT OK */
}
if (uv < none) {
matches++;
- if (uv >= 0x80 && to_utf)
d = uv_to_utf8(d, uv);
- else
- *d++ = (U8)uv;
- if (from_utf)
s += UTF8SKIP(s);
continue;
}
else if (uv == none) { /* "none" is unmapped character */
- if (from_utf) {
- if (*s < 0x80)
- *d++ = *s++;
- else if (to_utf) {
- int i;
- for (i = UTF8SKIP(s); i; --i)
- *d++ = *s++;
- }
- else {
I32 ulen;
*d++ = (U8)utf8_to_uv(s, &ulen);
s += ulen;
- }
- }
- else { /* must be to_utf only */
- d = uv_to_utf8(d, s[-1]);
- }
continue;
}
else if (uv == extra && !del) {
matches++;
- if (final >= 0x80 && to_utf)
d = uv_to_utf8(d, final);
- else
- *d++ = (U8)final;
- if (from_utf)
s += UTF8SKIP(s);
continue;
}
matches++; /* "none+1" is delete character */
- if (from_utf)
s += UTF8SKIP(s);
}
}