summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2006-07-22 21:51:48 +0300
committerRafael Garcia-Suarez <rgarciasuarez@gmail.com>2006-07-23 13:04:10 +0000
commite294cc5d7355a434d6b698c777674e1b7d4d4583 (patch)
tree7149b707cac3dbe5e8fb910a0b8ae2f73b451bfe
parent8c06321bcee4c3ffba180fe51c1785ca0a328fdd (diff)
downloadperl-e294cc5d7355a434d6b698c777674e1b7d4d4583.tar.gz
z/OS: pp_sys.c, reg*.c, toke.c, utf8.c
Message-ID: <44C24994.6020008@iki.fi> p4raw-id: //depot/perl@28607
-rw-r--r--pp_sys.c11
-rw-r--r--regcomp.c25
-rw-r--r--regexec.c14
-rw-r--r--toke.c108
-rw-r--r--utf8.c4
5 files changed, 149 insertions, 13 deletions
diff --git a/pp_sys.c b/pp_sys.c
index d0b3b10da5..4246e3cbd7 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -2557,6 +2557,17 @@ PP(pp_accept)
nstio = GvIOn(ngv);
fd = PerlSock_accept(PerlIO_fileno(IoIFP(gstio)), (struct sockaddr *) namebuf, &len);
+#if defined(OEMVS)
+ if (len == 0) {
+ /* Some platforms indicate zero length when an AF_UNIX client is
+ * not bound. Simulate a non-zero-length sockaddr structure in
+ * this case. */
+ namebuf[0] = 0; /* sun_len */
+ namebuf[1] = AF_UNIX; /* sun_family */
+ len = 2;
+ }
+#endif
+
if (fd < 0)
goto badexit;
if (IoIFP(nstio))
diff --git a/regcomp.c b/regcomp.c
index f4821c0481..8928c419f4 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2005,14 +2005,23 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan, I32 *min, U32 flags
char * const s0 = STRING(scan), *s, *t;
char * const s1 = s0 + STR_LEN(scan) - 1;
char * const s2 = s1 - 4;
+#ifdef EBCDIC /* RD tunifold greek 0390 and 03B0 */
+ const char t0[] = "\xaf\x49\xaf\x42";
+#else
const char t0[] = "\xcc\x88\xcc\x81";
+#endif
const char * const t1 = t0 + 3;
for (s = s0 + 2;
s < s2 && (t = ninstr(s, s1, t0, t1));
s = t + 4) {
+#ifdef EBCDIC
+ if (((U8)t[-1] == 0x68 && (U8)t[-2] == 0xB4) ||
+ ((U8)t[-1] == 0x46 && (U8)t[-2] == 0xB5))
+#else
if (((U8)t[-1] == 0xB9 && (U8)t[-2] == 0xCE) ||
((U8)t[-1] == 0x85 && (U8)t[-2] == 0xCF))
+#endif
*min -= 4;
}
}
@@ -5881,11 +5890,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, U32 depth)
STRLEN foldlen;
const UV f = to_uni_fold(natvalue, foldbuf, &foldlen);
+#ifdef EBCDIC /* RD t/uni/fold ff and 6b */
+ if (RExC_precomp[0] == ':' &&
+ RExC_precomp[1] == '[' &&
+ (f == 0xDF || f == 0x92)) {
+ f = NATIVE_TO_UNI(f);
+ }
+#endif
/* If folding and foldable and a single
* character, insert also the folded version
* to the charclass. */
if (f != value) {
+#ifdef EBCDIC /* RD tunifold ligatures s,t fb05, fb06 */
+ if ((RExC_precomp[0] == ':' &&
+ RExC_precomp[1] == '[' &&
+ (f == 0xA2 &&
+ (value == 0xFB05 || value == 0xFB06))) ?
+ foldlen == ((STRLEN)UNISKIP(f) - 1) :
+ foldlen == (STRLEN)UNISKIP(f) )
+#else
if (foldlen == (STRLEN)UNISKIP(f))
+#endif
Perl_sv_catpvf(aTHX_ listsv,
"%04"UVxf"\n", f);
else {
diff --git a/regexec.c b/regexec.c
index 3eee31e361..59d5624a67 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3941,11 +3941,19 @@ S_regmatch(pTHX_ const regmatch_info *reginfo, regnode *prog)
to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
-
+#ifdef EBCDIC
+ ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
+ ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
+ ckWARN(WARN_UTF8) ?
+ 0 : UTF8_ALLOW_ANY);
+#else
ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
- uniflags);
+ uniflags);
ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
- uniflags);
+ uniflags);
+#endif
}
else {
ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
diff --git a/toke.c b/toke.c
index 30a4548c92..2c6d8be40b 100644
--- a/toke.c
+++ b/toke.c
@@ -1787,6 +1787,7 @@ S_scan_const(pTHX_ char *start)
UV uv;
#ifdef EBCDIC
UV literal_endpoint = 0;
+ bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
#endif
const char * const leaveit = /* set of acceptably-backslashed characters */
@@ -1810,7 +1811,15 @@ S_scan_const(pTHX_ char *start)
I32 min; /* first character in range */
I32 max; /* last character in range */
- if (has_utf8) {
+#ifdef EBCDIC
+ UV uvmax = 0;
+#endif
+
+ if (has_utf8
+#ifdef EBCDIC
+ && !native_range
+#endif
+ ) {
char * const c = (char*)utf8_hop((U8*)d, -1);
char *e = d++;
while (e-- > c)
@@ -1823,12 +1832,43 @@ S_scan_const(pTHX_ char *start)
}
i = d - SvPVX_const(sv); /* remember current offset */
+#ifdef EBCDIC
+ SvGROW(sv,
+ SvLEN(sv) + (has_utf8 ?
+ (512 - UTF_CONTINUATION_MARK +
+ UNISKIP(0x100))
+ : 256));
+ /* How many two-byte within 0..255: 128 in UTF-8,
+ * 96 in UTF-8-mod. */
+#else
SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
+#endif
d = SvPVX(sv) + i; /* refresh d after realloc */
- d -= 2; /* eat the first char and the - */
-
- min = (U8)*d; /* first char in range */
- max = (U8)d[1]; /* last char in range */
+#ifdef EBCDIC
+ if (has_utf8) {
+ int j;
+ for (j = 0; j <= 1; j++) {
+ char * const c = (char*)utf8_hop((U8*)d, -1);
+ const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
+ if (j)
+ min = (U8)uv;
+ else if (uv < 256)
+ max = (U8)uv;
+ else {
+ max = (U8)0xff; /* only to \xff */
+ uvmax = uv; /* \x{100} to uvmax */
+ }
+ d = c; /* eat endpoint chars */
+ }
+ }
+ else {
+#endif
+ d -= 2; /* eat the first char and the - */
+ min = (U8)*d; /* first char in range */
+ max = (U8)d[1]; /* last char in range */
+#ifdef EBCDIC
+ }
+#endif
if (min > max) {
Perl_croak(aTHX_
@@ -1853,7 +1893,29 @@ S_scan_const(pTHX_ char *start)
else
#endif
for (i = min; i <= max; i++)
- *d++ = (char)i;
+#ifdef EBCDIC
+ if (has_utf8) {
+ const U8 ch = (U8)NATIVE_TO_UTF(i);
+ if (UNI_IS_INVARIANT(ch))
+ *d++ = (U8)i;
+ else {
+ *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
+ *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
+ }
+ }
+ else
+#endif
+ *d++ = (char)i;
+
+#ifdef EBCDIC
+ if (uvmax) {
+ d = (char*)uvchr_to_utf8((U8*)d, 0x100);
+ if (uvmax > 0x101)
+ *d++ = (char)UTF_TO_NATIVE(0xff);
+ if (uvmax > 0x100)
+ d = (char*)uvchr_to_utf8((U8*)d, uvmax);
+ }
+#endif
/* mark the range as done, and continue */
dorange = FALSE;
@@ -1869,7 +1931,11 @@ S_scan_const(pTHX_ char *start)
if (didrange) {
Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
}
- if (has_utf8) {
+ if (has_utf8
+#ifdef EBCDIC
+ && !native_range
+#endif
+ ) {
*d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
s++;
continue;
@@ -1881,6 +1947,7 @@ S_scan_const(pTHX_ char *start)
didrange = FALSE;
#ifdef EBCDIC
literal_endpoint = 0;
+ native_range = TRUE;
#endif
}
}
@@ -1985,8 +2052,8 @@ S_scan_const(pTHX_ char *start)
if ((isALPHA(*s) || isDIGIT(*s)) &&
ckWARN(WARN_MISC))
Perl_warner(aTHX_ packWARN(WARN_MISC),
- "Unrecognized escape \\%c passed through",
- *s);
+ "Unrecognized escape \\%c passed through",
+ *s);
/* default action is to copy the quoted character */
goto default_action;
}
@@ -2084,6 +2151,10 @@ S_scan_const(pTHX_ char *start)
(PL_lex_repl ? OPpTRANS_FROM_UTF
: OPpTRANS_TO_UTF);
}
+#ifdef EBCDIC
+ if (uv > 255 && !dorange)
+ native_range = FALSE;
+#endif
}
else {
*d++ = (char)uv;
@@ -2161,6 +2232,10 @@ S_scan_const(pTHX_ char *start)
SvGROW(sv, (SvLEN(sv) + len - (e - s + 4)));
d = SvPVX(sv) + (d - odest);
}
+#ifdef EBCDIC
+ if (!dorange)
+ native_range = FALSE; /* \N{} is guessed to be Unicode */
+#endif
Copy(str, d, len, char);
d += len;
SvREFCNT_dec(res);
@@ -2234,6 +2309,10 @@ S_scan_const(pTHX_ char *start)
}
d = (char*)uvchr_to_utf8((U8*)d, nextuv);
has_utf8 = TRUE;
+#ifdef EBCDIC
+ if (uv > 255 && !dorange)
+ native_range = FALSE;
+#endif
}
else {
*d++ = NATIVE_TO_NEED(has_utf8,*s++);
@@ -12343,7 +12422,7 @@ S_swallow_bom(pTHX_ U8 *s)
filter_add(utf16rev_textfilter, NULL);
Newx(news, (PL_bufend - (char*)s) * 3 / 2 + 1, U8);
utf16_to_utf8_reversed(s, news,
- PL_bufend - (char*)s - 1,
+ PL_bufend - (char*)s,
&newlen);
sv_setpvn(PL_linestr, (const char*)news, newlen);
#ifdef PERL_MAD
@@ -12413,6 +12492,15 @@ S_swallow_bom(pTHX_ U8 *s)
goto utf16be;
}
}
+#ifdef EBCDIC
+ case 0xDD:
+ if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
+ if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
+ s += 4; /* UTF-8 */
+ }
+ break;
+#endif
+
default:
if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
/* Leading bytes
diff --git a/utf8.c b/utf8.c
index b62e552aac..3b2297a993 100644
--- a/utf8.c
+++ b/utf8.c
@@ -896,7 +896,11 @@ Perl_utf16_to_utf8(pTHX_ U8* p, U8* d, I32 bytelen, I32 *newlen)
UV uv = (p[0] << 8) + p[1]; /* UTF-16BE */
p += 2;
if (uv < 0x80) {
+#ifdef EBCDIC
+ *d++ = UNI_TO_NATIVE(uv);
+#else
*d++ = (U8)uv;
+#endif
continue;
}
if (uv < 0x800) {