From 9df9a5cd21960315c39a8675579b46e68b5402c2 Mon Sep 17 00:00:00 2001 From: Nick Ing-Simmons Date: Sun, 11 Mar 2001 17:44:20 +0000 Subject: USE_SFIO issues as observed by Chris Nandor - exclude layer syms in makedef.pl for sfio - also inhibit Encode from trying to build ":encode()" layer. p4raw-id: //depot/perlio@9102 --- ext/Encode/Encode.xs | 4 +-- makedef.pl | 77 +++++++++++++++++++++++++++------------------------- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/ext/Encode/Encode.xs b/ext/Encode/Encode.xs index 1332adab7c..b559120077 100644 --- a/ext/Encode/Encode.xs +++ b/ext/Encode/Encode.xs @@ -14,7 +14,7 @@ UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) -#ifdef USE_PERLIO +#if defined(USE_PERLIO) && !defined(USE_SFIO) /* Define an encoding "layer" in the perliol.h sense. The layer defined here "inherits" in an object-oriented sense from the "perlio" layer with its PerlIOBuf_* "methods". @@ -324,7 +324,7 @@ PerlIO_funcs PerlIO_encode = { PerlIOBuf_get_cnt, PerlIOBuf_set_ptrcnt, }; -#endif +#endif /* encode layer */ void Encode_Define(pTHX_ encode_t *enc) diff --git a/makedef.pl b/makedef.pl index 88c2d88ff3..f165a90b86 100644 --- a/makedef.pl +++ b/makedef.pl @@ -57,7 +57,7 @@ my %PLATFORM; @PLATFORM{@PLATFORM} = (); defined $PLATFORM || die "PLATFORM undefined, must be one of: @PLATFORM\n"; -exists $PLATFORM{$PLATFORM} || die "PLATFORM must be one of: @PLATFORM\n"; +exists $PLATFORM{$PLATFORM} || die "PLATFORM must be one of: @PLATFORM\n"; my $config_sh = "config.sh"; my $config_h = "config.h"; @@ -69,7 +69,7 @@ my $pp_sym = "pp.sym"; my $globvar_sym = "globvar.sym"; my $perlio_sym = "perlio.sym"; -if ($PLATFORM eq 'aix') { +if ($PLATFORM eq 'aix') { # Nothing for now. } elsif ($PLATFORM eq 'win32') { @@ -125,7 +125,7 @@ $define{PERL_IMPLICIT_CONTEXT} ||= if ($define{PERL_CAPI}) { delete $define{PERL_OBJECT}; - $define{MULTIPLICITY} = 1; + $define{MULTIPLICITY} = 1; $define{PERL_IMPLICIT_CONTEXT} = 1; $define{PERL_IMPLICIT_SYS} = 1; } @@ -520,8 +520,8 @@ sub readvar { # All symbols have a Perl_ prefix because that's what embed.h # sticks in front of them. push(@syms, &$proc($1,$2,$3)) if (/\bPERLVAR(A?I?C?)\(([IGT])(\w+)/); - } - close(VARS); + } + close(VARS); return \@syms; } @@ -541,9 +541,38 @@ if ($define{'PERL_GLOBAL_STRUCT'}) { my @syms = ($global_sym, $globvar_sym); # $pp_sym is not part of the API +my @layer_syms = qw( + PerlIOBase_clearerr + PerlIOBase_close + PerlIOBase_eof + PerlIOBase_error + PerlIOBase_fileno + PerlIOBuf_bufsiz + PerlIOBuf_fdopen + PerlIOBuf_fill + PerlIOBuf_flush + PerlIOBuf_get_cnt + PerlIOBuf_get_ptr + PerlIOBuf_open + PerlIOBuf_pushed + PerlIOBuf_read + PerlIOBuf_reopen + PerlIOBuf_seek + PerlIOBuf_set_ptrcnt + PerlIOBuf_setlinebuf + PerlIOBuf_tell + PerlIOBuf_unread + PerlIOBuf_write + PerlIO_define_layer + PerlIO_pending + PerlIO_push + PerlIO_unread +); + if ($define{'USE_PERLIO'}) { push @syms, $perlio_sym; if ($define{'USE_SFIO'}) { + skip_symbols \@layer_syms; # SFIO defines most of the PerlIO routines as macros skip_symbols [qw( PerlIO_canset_cnt @@ -591,34 +620,8 @@ if ($define{'USE_PERLIO'}) { } } else { # Skip the PerlIO New Generation symbols. - skip_symbols [qw( - PerlIOBase_clearerr - PerlIOBase_close - PerlIOBase_eof - PerlIOBase_error - PerlIOBase_fileno - PerlIOBuf_bufsiz - PerlIOBuf_fdopen - PerlIOBuf_fill - PerlIOBuf_flush - PerlIOBuf_get_cnt - PerlIOBuf_get_ptr - PerlIOBuf_open - PerlIOBuf_pushed - PerlIOBuf_read - PerlIOBuf_reopen - PerlIOBuf_seek - PerlIOBuf_set_ptrcnt - PerlIOBuf_setlinebuf - PerlIOBuf_tell - PerlIOBuf_unread - PerlIOBuf_write - PerlIO_define_layer - PerlIO_pending - PerlIO_push - PerlIO_unread - )]; -} + skip_symbols \@layer_syms; +} for my $syms (@syms) { open (GLOBAL, "<$syms") || die "failed to open $syms: $!\n"; @@ -651,15 +654,15 @@ else { unless ($define{'PERL_GLOBAL_STRUCT'}) { my $glob = readvar($perlvars_h); emit_symbols $glob; - } + } unless ($define{'MULTIPLICITY'}) { my $glob = readvar($intrpvar_h); emit_symbols $glob; - } + } unless ($define{'MULTIPLICITY'} || $define{'USE_5005THREADS'}) { my $glob = readvar($thrdvar_h); emit_symbols $glob; - } + } } sub try_symbol { @@ -863,7 +866,7 @@ foreach my $symbol (sort keys %export) { sub emit_symbol { my $symbol = shift; - chomp($symbol); + chomp($symbol); $export{$symbol} = 1; } -- cgit v1.2.1 From ffebcc3e6acf9636047196271385e91b0490ae34 Mon Sep 17 00:00:00 2001 From: Nick Ing-Simmons Date: Sun, 11 Mar 2001 18:15:44 +0000 Subject: Put utf8_downgrade in CRIPPLED_CC versions of SvPVbyte* before I forget. p4raw-id: //depot/perlio@9104 --- sv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sv.c b/sv.c index fa3b29edfb..0a2c2c07b9 100644 --- a/sv.c +++ b/sv.c @@ -3032,9 +3032,8 @@ Perl_sv_utf8_downgrade(pTHX_ register SV* sv, bool fail_ok) } SvCUR(sv) = len; } - SvUTF8_off(sv); } - + SvUTF8_off(sv); return TRUE; } @@ -6153,18 +6152,21 @@ Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) char * Perl_sv_pvbyte(pTHX_ SV *sv) { + sv_utf8_downgrade(sv,0); return sv_pv(sv); } char * Perl_sv_pvbyten(pTHX_ SV *sv, STRLEN *lp) { + sv_utf8_downgrade(sv,0); return sv_pvn(sv,lp); } char * Perl_sv_pvbyten_force(pTHX_ SV *sv, STRLEN *lp) { + sv_utf8_downgrade(sv,0); return sv_pvn_force(sv,lp); } -- cgit v1.2.1 From c7f1f0165ac822994a67426c2d8003c3974e49ef Mon Sep 17 00:00:00 2001 From: Nick Ing-Simmons Date: Sun, 11 Mar 2001 19:39:34 +0000 Subject: Audit #ifdef EBCDIC and #ifndef ASCIIish, replace latter with former. Use ASCII_TO_NATIVE and NATIVE_TO_ASCII to avoid some #ifs. p4raw-id: //depot/perlio@9105 --- regcomp.c | 27 +++++++-------------------- toke.c | 53 ++++++++++++++++++++--------------------------------- utf8.c | 4 ---- 3 files changed, 27 insertions(+), 57 deletions(-) diff --git a/regcomp.c b/regcomp.c index 227737cfb5..05a48d9747 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2837,19 +2837,11 @@ tryagain: p++; break; case 'e': -#ifdef ASCIIish - ender = '\033'; -#else - ender = '\047'; -#endif + ender = ASCII_TO_NATIVE('\033'); p++; break; case 'a': -#ifdef ASCIIish - ender = '\007'; -#else - ender = '\057'; -#endif + ender = ASCII_TO_NATIVE('\007'); p++; break; case 'x': @@ -3267,13 +3259,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) case 't': value = '\t'; break; case 'f': value = '\f'; break; case 'b': value = '\b'; break; -#ifdef ASCIIish - case 'e': value = '\033'; break; - case 'a': value = '\007'; break; -#else - case 'e': value = '\047'; break; - case 'a': value = '\057'; break; -#endif + case 'e': value = ASCII_TO_NATIVE('\033');break; + case 'a': value = ASCII_TO_NATIVE('\007');break; case 'x': if (*RExC_parse == '{') { e = strchr(RExC_parse++, '}'); @@ -3417,7 +3404,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (LOC) ANYOF_CLASS_SET(ret, ANYOF_ASCII); else { -#ifdef ASCIIish +#ifndef EBCDIC for (value = 0; value < 128; value++) ANYOF_BITMAP_SET(ret, value); #else /* EBCDIC */ @@ -3433,7 +3420,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (LOC) ANYOF_CLASS_SET(ret, ANYOF_NASCII); else { -#ifdef ASCIIish +#ifndef EBCDIC for (value = 128; value < 256; value++) ANYOF_BITMAP_SET(ret, value); #else /* EBCDIC */ @@ -3733,7 +3720,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) /* now is the next time */ if (!SIZE_ONLY) { if (lastvalue < 256 && value < 256) { -#ifndef ASCIIish /* EBCDIC, for example. */ +#ifdef EBCDIC /* EBCDIC, for example. */ if ((isLOWER(lastvalue) && isLOWER(value)) || (isUPPER(lastvalue) && isUPPER(value))) { diff --git a/toke.c b/toke.c index 0d4fc1d018..0bc4a53bb1 100644 --- a/toke.c +++ b/toke.c @@ -1265,7 +1265,7 @@ S_scan_const(pTHX_ char *start) (char)min, (char)max); } -#ifndef ASCIIish +#ifdef EBCDIC if ((isLOWER(min) && isLOWER(max)) || (isUPPER(min) && isUPPER(max))) { if (isLOWER(min)) { @@ -1450,13 +1450,11 @@ S_scan_const(pTHX_ char *start) * There will always enough room in sv since such * escapes will be longer than any UT-F8 sequence * they can end up as. */ - - /* This spot is wrong for EBCDIC. Characters like - * the lowercase letters and digits are >127 in EBCDIC, - * so here they would need to be mapped to the Unicode - * repertoire. --jhi */ - if (uv > 127) { + /* We need to map to chars to ASCII before doing the tests + to cover EBCDIC + */ + if (NATIVE_TO_ASCII(uv) > 127) { if (!has_utf8 && uv > 255) { /* Might need to recode whatever we have * accumulated so far if it contains any @@ -1465,11 +1463,11 @@ S_scan_const(pTHX_ char *start) * (Can't we keep track of that and avoid * this rescan? --jhi) */ - int hicount = 0; + int hicount = 0; char *c; for (c = SvPVX(sv); c < d; c++) { - if (UTF8_IS_CONTINUED(*c)) + if (UTF8_IS_CONTINUED(NATIVE_TO_ASCII(*c))) hicount++; } if (hicount) { @@ -1485,13 +1483,15 @@ S_scan_const(pTHX_ char *start) dst = d - 1; while (src < dst) { - if (UTF8_IS_CONTINUED(*src)) { - *dst-- = UTF8_EIGHT_BIT_LO(*src); - *dst-- = UTF8_EIGHT_BIT_HI(*src--); + U8 ch = NATIVE_TO_ASCII(*src); + if (UTF8_IS_CONTINUED(ch)) { + *dst-- = UTF8_EIGHT_BIT_LO(ch); + *dst-- = UTF8_EIGHT_BIT_HI(ch); } else { - *dst-- = *src--; + *dst-- = ch; } + src--; } } } @@ -1566,18 +1566,14 @@ S_scan_const(pTHX_ char *start) /* \c is a control character */ case 'c': s++; -#ifdef EBCDIC - *d = *s++; - if (isLOWER(*d)) - *d = toUPPER(*d); - *d = toCTRL(*d); - d++; -#else { U8 c = *s++; +#ifdef EBCDIC + if (isLOWER(c)) + c = toUPPER(c); +#endif *d++ = toCTRL(c); } -#endif continue; /* printf-style backslashes, formfeeds, newlines, etc */ @@ -1596,21 +1592,12 @@ S_scan_const(pTHX_ char *start) case 't': *d++ = '\t'; break; -#ifdef EBCDIC case 'e': - *d++ = '\047'; /* CP 1047 */ + *d++ = ASCII_TO_NATIVE('\033'); break; case 'a': - *d++ = '\057'; /* CP 1047 */ - break; -#else - case 'e': - *d++ = '\033'; + *d++ = ASCII_TO_NATIVE('\007'); break; - case 'a': - *d++ = '\007'; - break; -#endif } /* end switch */ s++; @@ -1618,7 +1605,7 @@ S_scan_const(pTHX_ char *start) } /* end if (backslash) */ default_action: - if (UTF8_IS_CONTINUED(*s) && (this_utf8 || has_utf8)) { + if (UTF8_IS_CONTINUED(NATIVE_TO_ASCII(*s)) && (this_utf8 || has_utf8)) { STRLEN len = (STRLEN) -1; UV uv; if (this_utf8) { diff --git a/utf8.c b/utf8.c index 450138af56..7302bb77e2 100644 --- a/utf8.c +++ b/utf8.c @@ -253,11 +253,7 @@ Perl_utf8n_to_uvuni(pTHX_ U8* s, STRLEN curlen, STRLEN* retlen, U32 flags) { UV uv = *s, ouv; STRLEN len = 1; -#ifdef EBCDIC - bool dowarn = 0; -#else bool dowarn = ckWARN_d(WARN_UTF8); -#endif STRLEN expectlen = 0; U32 warning = 0; -- cgit v1.2.1 From 5e12f4fbf2391bc262df1da61c1745c66dc76d71 Mon Sep 17 00:00:00 2001 From: Nick Ing-Simmons Date: Sun, 11 Mar 2001 20:10:12 +0000 Subject: regcomp.c is working in native space, not Unicode space (if different) as it is doing compare against 'W' in \W etc. p4raw-id: //depot/perlio@9106 --- regcomp.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/regcomp.c b/regcomp.c index 05a48d9747..4638d77047 100644 --- a/regcomp.c +++ b/regcomp.c @@ -2902,7 +2902,7 @@ tryagain: default: normal_default: if (UTF8_IS_START(*p) && UTF) { - ender = utf8n_to_uvuni((U8*)p, RExC_end - p, + ender = utf8n_to_uvchr((U8*)p, RExC_end - p, &numlen, 0); p += numlen; } @@ -2914,7 +2914,7 @@ tryagain: p = regwhite(p, RExC_end); if (UTF && FOLD) { if (LOC) - ender = toLOWER_LC_uvchr(UNI_TO_NATIVE(ender)); + ender = toLOWER_LC_uvchr(ender); else ender = toLOWER_uni(ender); } @@ -2923,7 +2923,7 @@ tryagain: p = oldp; /* ender is a Unicode value so it can be > 0xff -- * in other words, do not use UTF8_IS_CONTINUED(). */ - else if (ender >= 0x80 && UTF) { + else if (NATIVE_TO_ASCII(ender) >= 0x80 && UTF) { reguni(pRExC_state, ender, s, &numlen); s += numlen; len += numlen; @@ -2936,7 +2936,7 @@ tryagain: } /* ender is a Unicode value so it can be > 0xff -- * in other words, do not use UTF8_IS_CONTINUED(). */ - if (ender >= 0x80 && UTF) { + if (NATIVE_TO_ASCII(ender) >= 0x80 && UTF) { reguni(pRExC_state, ender, s, &numlen); s += numlen; len += numlen - 1; @@ -3201,7 +3201,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) if (!range) rangebegin = RExC_parse; if (UTF) { - value = utf8n_to_uvuni((U8*)RExC_parse, + value = utf8n_to_uvchr((U8*)RExC_parse, RExC_end - RExC_parse, &numlen, 0); RExC_parse += numlen; @@ -3212,7 +3212,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) namedclass = regpposixcc(pRExC_state, value); else if (value == '\\') { if (UTF) { - value = utf8n_to_uvuni((U8*)RExC_parse, + value = utf8n_to_uvchr((U8*)RExC_parse, RExC_end - RExC_parse, &numlen, 0); RExC_parse += numlen; @@ -3320,8 +3320,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) else { ANYOF_FLAGS(ret) |= ANYOF_UNICODE; Perl_sv_catpvf(aTHX_ listsv, - /* 0x002D is Unicode for '-' */ - "%04"UVxf"\n002D\n", (UV)lastvalue); + "%04"UVxf"\n%04"UVxf"\n", (UV)lastvalue, (UV) '-'); } } @@ -3883,7 +3882,7 @@ S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg) STATIC void S_reguni(pTHX_ RExC_state_t *pRExC_state, UV uv, char* s, STRLEN* lenp) { - *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvuni_to_utf8((U8*)s, uv) - (U8*)s); + *lenp = SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s); } /* -- cgit v1.2.1