diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-12-22 10:44:11 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-12-22 10:44:11 +0000 |
commit | 1a97d9951436306dd0b0c6c78146e7c8dfef7130 (patch) | |
tree | 3cf8628ef9638ae5c8d7cda5587a193f53b60a8e | |
parent | 5833650dbdc313b4d5a31e4d3a0c7cbd0afff7f2 (diff) | |
parent | 55da934421fcbc3e0aef697419eb0bae333786b1 (diff) | |
download | perl-1a97d9951436306dd0b0c6c78146e7c8dfef7130.tar.gz |
Integrate mainline - some fails:
Failed Test Stat Wstat Total Fail Failed List of Failed
-------------------------------------------------------------------------------
../ext/POSIX/t/posix.t 255 65280 38 9 23.68% 30-38
../lib/encoding.t 19 2 10.53% 17-18
op/pat.t 770 3 0.39% 754-755 757
p4raw-id: //depot/perlio@13845
-rw-r--r-- | Changes | 134 | ||||
-rwxr-xr-x | Configure | 35 | ||||
-rw-r--r-- | MANIFEST | 2 | ||||
-rw-r--r-- | Makefile.SH | 2 | ||||
-rw-r--r-- | embed.h | 4 | ||||
-rwxr-xr-x | embed.pl | 4 | ||||
-rw-r--r-- | ext/B/B/Assembler.pm | 2 | ||||
-rw-r--r-- | ext/B/B/Deparse.pm | 15 | ||||
-rw-r--r-- | ext/B/B/Disassembler.pm | 2 | ||||
-rw-r--r-- | ext/B/B/Xref.pm | 19 | ||||
-rw-r--r-- | ext/B/t/assembler.t | 2 | ||||
-rw-r--r-- | ext/re/re.t | 65 | ||||
-rw-r--r-- | global.sym | 6 | ||||
-rw-r--r-- | hints/vos.sh | 5 | ||||
-rw-r--r-- | hv.c | 25 | ||||
-rw-r--r-- | lib/Carp.t | 7 | ||||
-rw-r--r-- | lib/Carp/Heavy.pm | 10 | ||||
-rw-r--r-- | lib/ExtUtils/MM_Unix.pm | 7 | ||||
-rw-r--r-- | lib/ExtUtils/t/MM_OS2.t | 270 | ||||
-rw-r--r-- | lib/File/Basename.pm | 14 | ||||
-rw-r--r-- | lib/File/Spec/OS2.pm | 227 | ||||
-rw-r--r-- | lib/perl5db.pl | 90 | ||||
-rw-r--r-- | op.c | 7 | ||||
-rw-r--r-- | patchlevel.h | 2 | ||||
-rw-r--r-- | pod/perlapi.pod | 16 | ||||
-rw-r--r-- | pod/perlfunc.pod | 4 | ||||
-rw-r--r-- | pod/perliol.pod | 256 | ||||
-rw-r--r-- | pod/perlrun.pod | 9 | ||||
-rw-r--r-- | pod/perltie.pod | 2 | ||||
-rw-r--r-- | pp.c | 5 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | regcomp.c | 22 | ||||
-rw-r--r-- | regexec.c | 13 | ||||
-rwxr-xr-x | t/op/each.t | 2 | ||||
-rwxr-xr-x | t/op/pat.t | 56 | ||||
-rw-r--r-- | t/op/qq.t | 2 | ||||
-rw-r--r-- | utf8.c | 75 | ||||
-rw-r--r-- | utf8.h | 11 |
38 files changed, 1233 insertions, 198 deletions
@@ -31,6 +31,140 @@ or any other branch. Version v5.7.2 Development release working toward v5.8 -------------- ____________________________________________________________________________ +[ 13832] By: jhi on 2001/12/21 14:46:24 + Log: Also the search for cat needs to be _exe-aware. + Branch: perl + ! Configure +____________________________________________________________________________ +[ 13831] By: jhi on 2001/12/21 13:43:53 + Log: Subject: [PATCH] ext/B/B/Xref.pm adding "our" recognition + From: Wolfgang Laun <Wolfgang.Laun@alcatel.at> + Date: Fri, 21 Dec 2001 14:02:01 +0100 + Message-ID: <3C2332C9.7CFED5F2@alcatel.at> + Branch: perl + ! ext/B/B/Xref.pm +____________________________________________________________________________ +[ 13830] By: jhi on 2001/12/21 13:42:31 + Log: packing I32 with L is not nice, need l; from Wolfgang Laun. + Branch: perl + ! ext/B/B/Assembler.pm ext/B/B/Disassembler.pm + ! ext/B/t/assembler.t +____________________________________________________________________________ +[ 13829] By: jhi on 2001/12/21 13:39:06 + Log: Subject: [ PATCH ] Smoke 13820 /pro/3gl/CPAN/perl-current + From: "H.Merijn Brand" <h.m.brand@hccnet.nl> + Date: Fri, 21 Dec 2001 11:15:38 +0100 + Message-Id: <20011221104035.4B4F.H.M.BRAND@hccnet.nl> + Branch: perl + ! hv.c +____________________________________________________________________________ +[ 13828] By: jhi on 2001/12/21 01:59:10 + Log: A plan is good. + Branch: perl + ! ext/re/re.t +____________________________________________________________________________ +[ 13827] By: jhi on 2001/12/21 01:57:48 + Log: Subject: [REPATCH MANIFEST, ext/re/re.t] Tests for re pragma + From: chromatic <chromatic@rmci.net> + Date: Thu, 20 Dec 2001 16:16:48 -0700 + Message-ID: <20011220231726.23878.qmail@onion.perl.org> + Branch: perl + + ext/re/re.t + ! MANIFEST +____________________________________________________________________________ +[ 13826] By: jhi on 2001/12/21 01:34:37 + Log: Synchronize the skippage message. + Branch: perl + ! lib/ExtUtils/t/MM_OS2.t +____________________________________________________________________________ +[ 13825] By: jhi on 2001/12/21 01:32:42 + Log: Subject: [PATCH MANIFEST, lib/ExtUtils/t/MM_OS2.t] Tests for ExtUtils::MM_OS2 + From: chromatic <chromatic@rmci.net> + Date: Thu, 20 Dec 2001 16:09:10 -0700 + Message-ID: <20011220230948.18010.qmail@onion.perl.org> + Branch: perl + + lib/ExtUtils/t/MM_OS2.t + ! MANIFEST +____________________________________________________________________________ +[ 13824] By: jhi on 2001/12/21 01:14:12 + Log: Subject: perlfunc.pod patch re flock via fcntl + From: "Tye McQueen" <tye@metronet.com> + Date: Thu, 20 Dec 2001 14:52:19 -0600 (CST) + Message-Id: <200112202052.fBKKqKH16263@metronet.com> + Branch: perl + ! pod/perlfunc.pod +____________________________________________________________________________ +[ 13823] By: jhi on 2001/12/21 00:54:49 + Log: Make using U+FDD0..U+FDEF (noncharacters since Unicode 3.1), + U+...FFFE, U+...FFFF, and characters beyond U+10FFFF + (the Unicode maximum code point) warnable offenses. + Branch: perl + ! embed.h embed.pl global.sym op.c pod/perlapi.pod pp.c proto.h + ! t/op/each.t t/op/pat.t t/op/qq.t utf8.c utf8.h +____________________________________________________________________________ +[ 13822] By: jhi on 2001/12/20 20:59:55 + Log: Sterner admonishment on -t. + Branch: perl + ! pod/perlrun.pod +____________________________________________________________________________ +[ 13821] By: jhi on 2001/12/20 20:53:42 + Log: Subject: [PATCH B::Dpparse] minor qr// fix + From: Rafael Garcia-Suarez <rgarciasuarez@free.fr> + Date: Thu, 20 Dec 2001 22:53:34 +0100 + Message-ID: <20011220225334.A735@rafael> + Branch: perl + ! ext/B/B/Deparse.pm +____________________________________________________________________________ +[ 13820] By: jhi on 2001/12/20 15:18:25 + Log: Subject: [PATCH} typo fix in Carp/Heavy.pm + From: Robert Spier <rspier@pobox.com> + Date: Thu, 20 Dec 2001 08:10:00 -0800 + Message-ID: <15394.3416.693353.107334@rls.cx> + Branch: perl + ! lib/Carp/Heavy.pm +____________________________________________________________________________ +[ 13819] By: jhi on 2001/12/20 14:56:30 + Log: Integrate perlio (pTHX_ fixes for :win32 layer) + Branch: perl + !> win32/win32io.c +____________________________________________________________________________ +[ 13818] By: jhi on 2001/12/20 14:53:05 + Log: Subject: Re: [PATCH pod/perliol.pod] resend + From: Stas Bekman <stas@stason.org> + Date: Thu, 20 Dec 2001 17:15:09 +0800 (SGT) + Message-ID: <Pine.LNX.4.40.0112201714210.23498-100000@hope.stason.org> + Branch: perl + ! pod/perliol.pod +____________________________________________________________________________ +[ 13817] By: jhi on 2001/12/20 14:51:24 + Log: Fix up the APIs noone hopefully uses. + Branch: perl + ! utf8.c +____________________________________________________________________________ +[ 13816] By: jhi on 2001/12/20 14:18:56 + Log: Subject: [PATCH] Carp.pm caller_info returns wrong info for eval + Date: Wed, 19 Dec 2001 14:13:38 -0800 + From: Robert Spier <rspier@pobox.com> + Message-ID: <15393.4370.605214.548582@rls.cx> + Branch: perl + ! lib/Carp.t lib/Carp/Heavy.pm +____________________________________________________________________________ +[ 13813] By: jhi on 2001/12/19 21:29:49 + Log: Some bincompat clawbacks. + Branch: perl + ! embed.h embed.pl embedvar.h perlapi.h proto.h thrdvar.h utf8.c + ! wince/perldll.def +____________________________________________________________________________ +[ 13812] By: jhi on 2001/12/19 17:56:53 + Log: gcc pacifying (RH 7.1/ia64). + Branch: perl + ! pp.c +____________________________________________________________________________ +[ 13811] By: jhi on 2001/12/19 16:55:09 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ [ 13810] By: jhi on 2001/12/19 16:50:12 Log: Subject: Re: [BUG] bleadperl regexp (was ok in 5.6.0) From: Wolfgang Laun <Wolfgang.Laun@alcatel.at> @@ -20,7 +20,7 @@ # $Id: Head.U,v 3.0.1.9 1997/02/28 15:02:09 ram Exp $ # -# Generated on Wed Dec 19 03:25:46 EET 2001 [metaconfig 3.0 PL70] +# Generated on Fri Dec 21 20:17:57 EET 2001 [metaconfig 3.0 PL70] # (with additional metaconfig patches by perlbug@perl.org) cat >c1$$ <<EOF @@ -165,6 +165,12 @@ esac test -d UU || mkdir UU cd UU && rm -f ./* +if test -f "/system/gnu_library/bin/ar.pm"; then + _exe=".pm" +else + _exe="" +fi + ccname='' ccversion='' ccsymbols='' @@ -1044,7 +1050,6 @@ useposix=true : set useopcode=false in your hint file to disable the Opcode extension. useopcode=true : Trailing extension. Override this in a hint file, if needed. -_exe='' : Extra object files, if any, needed on this platform. archobjs='' archname='' @@ -1153,11 +1158,11 @@ if `$sh -c '#' >/dev/null 2>&1`; then shsharp=true spitshell=cat xcat=/bin/cat - test -f $xcat || xcat=/usr/bin/cat - if test ! -f $xcat; then + test -f $xcat$_exe || xcat=/usr/bin/cat + if test ! -f $xcat$_exe; then for p in $paths; do - if test -f $p/cat; then - xcat=$p/cat + if test -f $p/cat$_exe; then + xcat=$p/cat$_exe break fi done @@ -2075,6 +2080,7 @@ cpp csh date egrep +gmake gzip less ln @@ -2152,19 +2158,28 @@ for file in $trylist; do esac done case "$egrep" in -egrep$_exe) +egrep) echo "Substituting grep for egrep." egrep=$grep ;; esac case "$ln" in -ln$_exe) +ln) echo "Substituting cp for ln." ln=$cp ;; esac case "$make$gmake" in -*make$_exe) +*/gmake|?:[\\/]gmake) + # We can't have osname yet. + if test -f "/system/gnu_library/bin/ar.pm"; then # Stratus VOS + # Assume that gmake, if found, is definitely GNU make + # and prefer it over the system make. + echo "Substituting gmake for make." + make=$gmake + fi + ;; +*/make|?:[\\/]make) ;; *) echo "I can't find make or gmake, and my life depends on it." >&4 @@ -4141,7 +4156,7 @@ so="$ans" : or the new name. case "$_exe" in '') case "$exe_ext" in - '') ;; + '') ;; *) _exe="$exe_ext" ;; esac ;; @@ -499,6 +499,7 @@ ext/POSIX/typemap POSIX extension interface types ext/re/hints/mpeix.pl Hints for re for named architecture ext/re/Makefile.PL re extension makefile writer ext/re/re.pm re extension Perl module +ext/re/re.t see if re pragma works ext/re/re.xs re extension external subroutines ext/Safe/safe1.t See if Safe works ext/Safe/safe2.t See if Safe works @@ -948,6 +949,7 @@ lib/ExtUtils/t/Installed.t See if ExtUtils::Installed works lib/ExtUtils/t/Manifest.t See if ExtUtils::Manifest works lib/ExtUtils/t/Mkbootstrap.t See if ExtUtils::Mkbootstrap works lib/ExtUtils/t/MM_Cygwin.t See if ExtUtils::MM_Cygwin works +lib/ExtUtils/t/MM_OS2.t See if ExtUtils::MM_OS2 works lib/ExtUtils/t/MM_Unix.t See if ExtUtils::MM_UNIX works lib/ExtUtils/t/MM_VMS.t See if ExtUtils::MM_VMS works lib/ExtUtils/t/Packlist.t See if Packlist works diff --git a/Makefile.SH b/Makefile.SH index e6fb640ada..63f7417eb9 100644 --- a/Makefile.SH +++ b/Makefile.SH @@ -198,6 +198,8 @@ shellflags = $shellflags # do it for you. $make_set_make +# Mention $gmake here so it gets probed for by Configure. + # These variables may need to be manually set for non-Unix systems. AR = $full_ar EXE_EXT = $_exe @@ -755,6 +755,8 @@ #define utf8n_to_uvuni Perl_utf8n_to_uvuni #define uvchr_to_utf8 Perl_uvchr_to_utf8 #define uvuni_to_utf8 Perl_uvuni_to_utf8 +#define uvchr_to_utf8_flags Perl_uvchr_to_utf8_flags +#define uvuni_to_utf8_flags Perl_uvuni_to_utf8_flags #define pv_uni_display Perl_pv_uni_display #define sv_uni_display Perl_sv_uni_display #define vivify_defelem Perl_vivify_defelem @@ -2274,6 +2276,8 @@ #define utf8n_to_uvuni(a,b,c,d) Perl_utf8n_to_uvuni(aTHX_ a,b,c,d) #define uvchr_to_utf8(a,b) Perl_uvchr_to_utf8(aTHX_ a,b) #define uvuni_to_utf8(a,b) Perl_uvuni_to_utf8(aTHX_ a,b) +#define uvchr_to_utf8_flags(a,b,c) Perl_uvchr_to_utf8_flags(aTHX_ a,b,c) +#define uvuni_to_utf8_flags(a,b,c) Perl_uvuni_to_utf8_flags(aTHX_ a,b,c) #define pv_uni_display(a,b,c,d,e) Perl_pv_uni_display(aTHX_ a,b,c,d,e) #define sv_uni_display(a,b,c,d) Perl_sv_uni_display(aTHX_ a,b,c,d) #define vivify_defelem(a) Perl_vivify_defelem(aTHX_ a) @@ -1853,7 +1853,9 @@ Apd |UV |utf8_to_uvuni |U8 *s|STRLEN* retlen Adp |UV |utf8n_to_uvchr |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags Adp |UV |utf8n_to_uvuni |U8 *s|STRLEN curlen|STRLEN* retlen|U32 flags Apd |U8* |uvchr_to_utf8 |U8 *d|UV uv -Apd |U8* |uvuni_to_utf8 |U8 *d|UV uv +Ap |U8* |uvuni_to_utf8 |U8 *d|UV uv +Ap |U8* |uvchr_to_utf8_flags |U8 *d|UV uv|UV flags +Apd |U8* |uvuni_to_utf8_flags |U8 *d|UV uv|UV flags Apd |char* |pv_uni_display |SV *dsv|U8 *spv|STRLEN len \ |STRLEN pvlim|UV flags Apd |char* |sv_uni_display |SV *dsv|SV *ssv|STRLEN pvlim|UV flags diff --git a/ext/B/B/Assembler.pm b/ext/B/B/Assembler.pm index 4db23f18bc..429405fd36 100644 --- a/ext/B/B/Assembler.pm +++ b/ext/B/B/Assembler.pm @@ -72,7 +72,7 @@ sub B::Asmdata::PUT_U32 { } sub B::Asmdata::PUT_I32 { my $arg = limcheck( $_[0], -0x80000000, 0x7fffffff, 'I32' ); - pack("L", $arg); + pack("l", $arg); } sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" looses precision and pack('d',...) # may not even be portable between compilers diff --git a/ext/B/B/Deparse.pm b/ext/B/B/Deparse.pm index fd8819167c..778cec7bb8 100644 --- a/ext/B/B/Deparse.pm +++ b/ext/B/B/Deparse.pm @@ -561,9 +561,11 @@ sub compile { # Print __DATA__ section, if necessary no strict 'refs'; - if (defined *{$self->{'curstash'}."::DATA"}{IO}) { + my $laststash = defined $self->{'curcop'} + ? $self->{'curcop'}->stash->NAME : $self->{'curstash'}; + if (defined *{$laststash."::DATA"}{IO}) { print "__DATA__\n"; - print readline(*{$self->{'curstash'}."::DATA"}); + print readline(*{$laststash."::DATA"}); } } } @@ -3158,10 +3160,11 @@ sub balanced_delim { sub single_delim { my($q, $default, $str) = @_; return "$default$str$default" if $default and index($str, $default) == -1; - my($succeed, $delim); - ($succeed, $str) = balanced_delim($str); - return "$q$str" if $succeed; - for $delim ('/', '"', '#') { + if ($q ne 'qr') { + (my $succeed, $str) = balanced_delim($str); + return "$q$str" if $succeed; + } + for my $delim ('/', '"', '#') { return "$q$delim" . $str . $delim if index($str, $delim) == -1; } if ($default) { diff --git a/ext/B/B/Disassembler.pm b/ext/B/B/Disassembler.pm index b8b5262f41..a50b48f741 100644 --- a/ext/B/B/Disassembler.pm +++ b/ext/B/B/Disassembler.pm @@ -56,7 +56,7 @@ sub GET_I32 { my $fh = shift; my $str = $fh->readn(4); croak "reached EOF while reading I32" unless length($str) == 4; - return cast_I32(unpack("L", $str)); + return unpack("l", $str); } sub GET_objindex { diff --git a/ext/B/B/Xref.pm b/ext/B/B/Xref.pm index 1731b86ac7..d0cddbf371 100644 --- a/ext/B/B/Xref.pm +++ b/ext/B/B/Xref.pm @@ -89,7 +89,7 @@ Malcolm Beattie, mbeattie@sable.ox.ac.uk. use strict; use Config; use B qw(peekop class comppadlist main_start svref_2object walksymtable - OPpLVAL_INTRO SVf_POK + OPpLVAL_INTRO SVf_POK OPpOUR_INTRO ); sub UNKNOWN { ["?", "?", "?"] } @@ -230,16 +230,16 @@ sub pp_padav { pp_padsv(@_) } sub pp_padhv { pp_padsv(@_) } sub deref { - my ($var, $as) = @_; + my ($op, $var, $as) = @_; $var->[1] = $as . $var->[1]; - process($var, "used"); + process($var, $op->private & OPpOUR_INTRO ? "intro" : "used"); } -sub pp_rv2cv { deref($top, "&"); } -sub pp_rv2hv { deref($top, "%"); } -sub pp_rv2sv { deref($top, "\$"); } -sub pp_rv2av { deref($top, "\@"); } -sub pp_rv2gv { deref($top, "*"); } +sub pp_rv2cv { deref(shift, $top, "&"); } +sub pp_rv2hv { deref(shift, $top, "%"); } +sub pp_rv2sv { deref(shift, $top, "\$"); } +sub pp_rv2av { deref(shift, $top, "\@"); } +sub pp_rv2gv { deref(shift, $top, "*"); } sub pp_gvsv { my $op = shift; @@ -253,7 +253,8 @@ sub pp_gvsv { $gv = $op->gv; $top = [$gv->STASH->NAME, '$', $gv->NAME]; } - process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used"); + process($top, $op->private & OPpLVAL_INTRO || + $op->private & OPpOUR_INTRO ? "intro" : "used"); } sub pp_gv { diff --git a/ext/B/t/assembler.t b/ext/B/t/assembler.t index 6bec7e091b..3e987e092a 100644 --- a/ext/B/t/assembler.t +++ b/ext/B/t/assembler.t @@ -197,7 +197,7 @@ sub putdis(@){ # sub gen_type($$$){ my( $href, $descref, $text ) = @_; - for my $odt ( keys( %opsByType ) ){ + for my $odt ( sort( keys( %opsByType ) ) ){ my $opcode = $opsByType{$odt}->[0]; my $sel = $odt; $sel =~ s/^GET_//; diff --git a/ext/re/re.t b/ext/re/re.t new file mode 100644 index 0000000000..ff3a3c51e9 --- /dev/null +++ b/ext/re/re.t @@ -0,0 +1,65 @@ +#!./perl + +use strict; + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use Test::More tests => 13; +require_ok( 're' ); + +# setcolor +$INC{ 'Term/Cap.pm' } = 1; +local $ENV{PERL_RE_TC}; +re::setcolor(); +is( $ENV{PERL_RE_COLORS}, "md\tme\tso\tse\tus\tue", + 'setcolor() should provide default colors' ); +$ENV{PERL_RE_TC} = 'su,n,ny'; +re::setcolor(); +is( $ENV{PERL_RE_COLORS}, "su\tn\tny", '... or use $ENV{PERL_RE_COLORS}' ); + +# bits +# get on +my $warn; +local $SIG{__WARN__} = sub { + $warn = shift; +}; +eval { re::bits(1) }; +like( $warn, qr/Useless use/, 'bits() should warn with no args' ); + +delete $ENV{PERL_RE_COLORS}; +re::bits(0, 'debug'); +is( $ENV{PERL_RE_COLORS}, '', + "... should not set regex colors given 'debug'" ); +re::bits(0, 'debugcolor'); +isnt( $ENV{PERL_RE_COLORS}, '', + "... should set regex colors given 'debugcolor'" ); +re::bits(0, 'nosuchsubpragma'); +like( $warn, qr/Unknown "re" subpragma/, + '... should warn about unknown subpragma' ); +ok( re::bits(0, 'taint') & 0x00100000, '... should set taint bits' ); +ok( re::bits(0, 'eval') & 0x00200000, '... should set eval bits' ); + +local $^H; + +# import +re->import('taint', 'eval'); +ok( $^H & 0x00100000, 'import should set taint bits in $^H when requested' ); +ok( $^H & 0x00200000, 'import should set eval bits in $^H when requested' ); + +re->unimport('taint'); +ok( !( $^H & 0x00100000 ), 'unimport should clear bits in $^H when requested' ); +re->unimport('eval'); +ok( !( $^H & 0x00200000 ), '... and again' ); + +package Term::Cap; + +sub Tgetent { + bless({}, $_[0]); +} + +sub Tputs { + return $_[1]; +} diff --git a/global.sym b/global.sym index b2a9225100..c19e004d66 100644 --- a/global.sym +++ b/global.sym @@ -157,6 +157,10 @@ Perl_ibcmp_utf8 Perl_init_stacks Perl_init_tm Perl_instr +Perl_is_lvalue_sub +Perl_to_uni_upper_lc +Perl_to_uni_title_lc +Perl_to_uni_lower_lc Perl_is_uni_alnum Perl_is_uni_alnumc Perl_is_uni_idfirst @@ -496,6 +500,8 @@ Perl_utf8n_to_uvchr Perl_utf8n_to_uvuni Perl_uvchr_to_utf8 Perl_uvuni_to_utf8 +Perl_uvchr_to_utf8_flags +Perl_uvuni_to_utf8_flags Perl_pv_uni_display Perl_sv_uni_display Perl_warn diff --git a/hints/vos.sh b/hints/vos.sh index 10b4212696..037a6f33d2 100644 --- a/hints/vos.sh +++ b/hints/vos.sh @@ -63,3 +63,8 @@ uselargefiles="n" # Don't use malloc that comes with perl. usemymalloc="n" +# Make bison the default compiler-compiler. +yacc="/system/gnu_library/bin/bison" + +# VOS doesn't have (or need) a pager, but perl needs one. +pager="/system/gnu_library/bin/cat.pm" @@ -893,19 +893,18 @@ Perl_hv_delete_ent(pTHX_ HV *hv, SV *keysv, I32 flags, U32 hash) { if (SvREADONLY(hv)) return Nullsv; /* if still SvREADONLY, leave it deleted. */ - else { - // okay, really delete the placeholder. - *oentry = HeNEXT(entry); - if (i && !*oentry) - xhv->xhv_fill--; /* HvFILL(hv)-- */ - if (entry == xhv->xhv_eiter /* HvEITER(hv) */) - HvLAZYDEL_on(hv); - else - hv_free_ent(hv, entry); - xhv->xhv_keys--; /* HvKEYS(hv)-- */ - xhv->xhv_placeholders--; - return Nullsv; - } + + /* okay, really delete the placeholder. */ + *oentry = HeNEXT(entry); + if (i && !*oentry) + xhv->xhv_fill--; /* HvFILL(hv)-- */ + if (entry == xhv->xhv_eiter /* HvEITER(hv) */) + HvLAZYDEL_on(hv); + else + hv_free_ent(hv, entry); + xhv->xhv_keys--; /* HvKEYS(hv)-- */ + xhv->xhv_placeholders--; + return Nullsv; } else if (SvREADONLY(hv) && HeVAL(entry) && SvREADONLY(HeVAL(entry))) { Perl_hv_notallowed(aTHX_ is_utf8, key, klen, keysave); diff --git a/lib/Carp.t b/lib/Carp.t index a318c19751..e9dd8cd7f5 100644 --- a/lib/Carp.t +++ b/lib/Carp.t @@ -5,7 +5,7 @@ BEGIN { use Carp qw(carp cluck croak confess); -print "1..7\n"; +print "1..8\n"; print "ok 1\n"; @@ -51,3 +51,8 @@ sub_6; print "ok 7\n"; +# test for caller_info API +my $eval = "use Carp::Heavy; return Carp::caller_info(0);"; +my %info = eval($eval); +print "not " if ($info{sub_name} ne "eval '$eval'"); +print "ok 8\n"; diff --git a/lib/Carp/Heavy.pm b/lib/Carp/Heavy.pm index 5228b9b3a9..cf108923f4 100644 --- a/lib/Carp/Heavy.pm +++ b/lib/Carp/Heavy.pm @@ -12,7 +12,7 @@ Carp heavy machinery - no user serviceable parts inside # On one line so MakeMaker will see it. use Carp; our $VERSION = $Carp::VERSION; -our ($CarpLevel, $MaxArgNums, $MaxEvalLen, $MaxLenArg, $Verbose); +our ($CarpLevel, $MaxArgNums, $MaxEvalLen, $MaxArgLen, $Verbose); sub caller_info { my $i = shift(@_) + 1; @@ -50,7 +50,7 @@ sub format_arg { $arg = defined($overload::VERSION) ? overload::StrVal($arg) : "$arg"; } $arg =~ s/'/\\'/g; - $arg = str_len_trim($arg, $MaxLenArg); + $arg = str_len_trim($arg, $MaxArgLen); # Quote it? $arg = "'$arg'" unless $arg =~ /^-?[\d.]+\z/; @@ -77,14 +77,14 @@ sub get_status { # the sub/require/eval sub get_subname { my $info = shift; - if (defined($info->{eval})) { - my $eval = $info->{eval}; + if (defined($info->{evaltext})) { + my $eval = $info->{evaltext}; if ($info->{is_require}) { return "require $eval"; } else { $eval =~ s/([\\\'])/\\$1/g; - return str_len_trim($eval, $MaxEvalLen); + return "eval '" . str_len_trim($eval, $MaxEvalLen) . "'"; } } diff --git a/lib/ExtUtils/MM_Unix.pm b/lib/ExtUtils/MM_Unix.pm index 103854fc9a..0efe2d8782 100644 --- a/lib/ExtUtils/MM_Unix.pm +++ b/lib/ExtUtils/MM_Unix.pm @@ -2066,6 +2066,7 @@ usually solves this kind of problem. # How do we run perl? $self->{PERLRUN} = $self->{PERL}; + $self->{PERLRUN} .= ' -I$(PERL_LIB)' if $self->{UNINSTALLED_PERL}; # How do we run perl when installing libraries? $self->{PERLRUNINST} .= $self->{PERL}. ' -I$(INST_ARCHLIB) -I$(INST_LIB)'; @@ -3504,12 +3505,12 @@ test :: \$(TEST_TYPE) push(@m, "\n"); push(@m, "test_dynamic :: pure_all\n"); - push(@m, $self->test_via_harness('$(FULLPERL)', '$(TEST_FILES)')) if $tests; - push(@m, $self->test_via_script('$(FULLPERL)', '$(TEST_FILE)')) if -f "test.pl"; + push(@m, $self->test_via_harness('$(PERLRUN)', '$(TEST_FILES)')) if $tests; + push(@m, $self->test_via_script('$(PERLRUN)', '$(TEST_FILE)')) if -f "test.pl"; push(@m, "\n"); push(@m, "testdb_dynamic :: pure_all\n"); - push(@m, $self->test_via_script('$(FULLPERL) $(TESTDB_SW)', '$(TEST_FILE)')); + push(@m, $self->test_via_script('$(PERLRUN) $(TESTDB_SW)', '$(TEST_FILE)')); push(@m, "\n"); # Occasionally we may face this degenerate target: diff --git a/lib/ExtUtils/t/MM_OS2.t b/lib/ExtUtils/t/MM_OS2.t new file mode 100644 index 0000000000..f80b0fb90f --- /dev/null +++ b/lib/ExtUtils/t/MM_OS2.t @@ -0,0 +1,270 @@ +#!./perl -w + +use strict; + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use Test::More; +if ($^O =~ /os2/i) { + plan( tests => 32 ); +} else { + plan( skip_all => "This is not OS/2" ); +} + +# for dlsyms, overridden in tests +BEGIN { + package ExtUtils::MM_OS2; + use subs 'system', 'unlink'; +} + +# for maybe_command +use File::Spec; + +use_ok( 'ExtUtils::MM_OS2' ); +ok( grep( 'ExtUtils::MM_OS2', @MM::ISA), + 'ExtUtils::MM_OS2 should be parent of MM' ); + +# dlsyms +my $mm = bless({ + SKIPHASH => { + dynamic => 1 + }, + NAME => 'foo:bar::', +}, 'ExtUtils::MM_OS2'); + +is( $mm->dlsyms(), '', + 'dlsyms() should return nothing with dynamic flag set' ); + +$mm->{BASEEXT} = 'baseext'; +delete $mm->{SKIPHASH}; +my $res = $mm->dlsyms(); +like( $res, qr/baseext\.def: Makefile/, + '... without flag, should return make targets' ); +like( $res, qr/"DL_FUNCS" => { }/, + '... should provide empty hash refs where necessary' ); +like( $res, qr/"DL_VARS" => \[]/, '... and empty array refs too' ); + +$mm->{FUNCLIST} = 'funclist'; +$res = $mm->dlsyms( IMPORTS => 'imports' ); +like( $res, qr/"FUNCLIST" => .+funclist/, + '... should pick up values from object' ); +like( $res, qr/"IMPORTS" => .+imports/, '... and allow parameter options too' ); + +my $can_write; +{ + local *OUT; + $can_write = open(OUT, '>tmp_imp'); +} + +SKIP: { + skip("Cannot write test files: $!", 7) unless $can_write; + + $mm->{IMPORTS} = { foo => 'bar' }; + + local $@; + eval { $mm->dlsyms() }; + like( $@, qr/Can.t mkdir tmp_imp/, + '... should die if directory cannot be made' ); + + unlink('tmp_imp') or skip("Cannot remove test file: $!", 9); + eval { $mm->dlsyms() }; + like( $@, qr/Malformed IMPORT/, 'should die from malformed import symbols'); + + $mm->{IMPORTS} = { foo => 'bar.baz' }; + + my @sysfail = ( 1, 0, 1 ); + my ($sysargs, $unlinked); + + *ExtUtils::MM_OS2::system = sub { + $sysargs = shift; + return shift @sysfail; + }; + + *ExtUtils::MM_OS2::unlink = sub { + $unlinked++; + }; + + eval { $mm->dlsyms() }; + + like( $sysargs, qr/^emximp/, '... should try to call system() though' ); + like( $@, qr/Cannot make import library/, + '... should die if emximp syscall fails' ); + + # sysfail is 0 now, call emximp call should succeed + eval { $mm->dlsyms() }; + is( $unlinked, 1, '... should attempt to unlink temp files' ); + like( $@, qr/Cannot extract import/, + '... should die if other syscall fails' ); + + # make both syscalls succeed + @sysfail = (0, 0); + local $@; + eval { $mm->dlsyms() }; + is( $@, '', '... should not die if both syscalls succeed' ); +} + +# static_lib +{ + my $called = 0; + + # avoid "used only once" + local *ExtUtils::MM_Unix::static_lib; + *ExtUtils::MM_Unix::static_lib = sub { + $called++; + return "\n\ncalled static_lib\n\nline2\nline3\n\nline4"; + }; + + my $args = bless({ IMPORTS => {}, }, 'MM'); + + # without IMPORTS as a populated hash, there will be no extra data + my $ret = ExtUtils::MM_OS2::static_lib( $args ); + is( $called, 1, 'static_lib() should call parent method' ); + like( $ret, qr/^called static_lib/m, + '... should return parent data unless IMPORTS exists' ); + + $args->{IMPORTS} = { foo => 1}; + $ret = ExtUtils::MM_OS2::static_lib( $args ); + is( $called, 2, '... should call parent method if extra imports passed' ); + like( $ret, qr/^called static_lib\n\t\$\(AR\) \$\(AR_STATIC_ARGS\)/m, + '... should append make tags to first line from parent method' ); + like( $ret, qr/\$@\n\n\nline2\nline3\n\nline4/m, + '... should include remaining data from parent method' ); + +} + +# replace_manpage_separator +my $sep = '//a///b//c/de'; +is( ExtUtils::MM_OS2->replace_manpage_separator($sep), '.a.b.c.de', + 'replace_manpage_separator() should turn multiple slashes into periods' ); + +# maybe_command +{ + local *DIR; + my ($dir, $noext, $exe, $cmd); + my $found = 0; + + my ($curdir, $updir) = (File::Spec->curdir, File::Spec->updir); + + # we need: + # 1) a directory + # 2) an executable file with no extension + # 3) an executable file with the .exe extension + # 4) an executable file with the .cmd extension + # we assume there will be one somewhere in the path + # in addition, we need them to be unique enough they do not trip + # an earlier file test in maybe_command(). Portability. + + foreach my $path (split(/:/, $ENV{PATH})) { + opendir(DIR, $path) or next; + while (defined(my $file = readdir(DIR))) { + next if $file eq $curdir or $file eq $updir; + $file = File::Spec->catfile($path, $file); + unless (defined $dir) { + if (-d $file) { + next if ( -x $file . '.exe' or -x $file . '.cmd' ); + + $dir = $file; + $found++; + } + } + if (-x $file) { + my $ext; + if ($file =~ s/\.(exe|cmd)\z//) { + $ext = $1; + + # skip executable files with names too similar + next if -x $file; + $file .= '.' . $ext; + + } else { + unless (defined $noext) { + $noext = $file; + $found++; + } + next; + } + + unless (defined $exe) { + if ($ext eq 'exe') { + $exe = $file; + $found++; + next; + } + } + unless (defined $cmd) { + if ($ext eq 'cmd') { + $cmd = $file; + $found++; + next; + } + } + } + last if $found == 4; + } + last if $found == 4; + } + + SKIP: { + skip('No appropriate directory found', 1) unless defined $dir; + is( ExtUtils::MM_OS2->maybe_command( $dir ), undef, + 'maybe_command() should ignore directories' ); + } + + SKIP: { + skip('No non-exension command found', 1) unless defined $noext; + is( ExtUtils::MM_OS2->maybe_command( $noext ), $noext, + 'maybe_command() should find executable lacking file extension' ); + } + + SKIP: { + skip('No .exe command found', 1) unless defined $exe; + (my $noexe = $exe) =~ s/\.exe\z//; + is( ExtUtils::MM_OS2->maybe_command( $noexe ), $exe, + 'maybe_command() should find .exe file lacking extension' ); + } + + SKIP: { + skip('No .cmd command found', 1) unless defined $cmd; + (my $nocmd = $cmd) =~ s/\.cmd\z//; + is( ExtUtils::MM_OS2->maybe_command( $nocmd ), $cmd, + 'maybe_command() should find .cmd file lacking extension' ); + } +} + +# file_name_is_absolute +ok( ExtUtils::MM_OS2->file_name_is_absolute( 's:/' ), + 'file_name_is_absolute() should be true for paths with volume and slash' ); +ok( ExtUtils::MM_OS2->file_name_is_absolute( '\foo' ), + '... and for paths with leading slash but no volume' ); +ok( ! ExtUtils::MM_OS2->file_name_is_absolute( 'arduk' ), + '... but not for paths with no leading slash or volume' ); + +# perl_archive +is( ExtUtils::MM_OS2->perl_archive(), '$(PERL_INC)/libperl$(LIB_EXT)', + 'perl_archive() should return a static string' ); + +# perl_archive_after +{ + my $aout = 0; + local *OS2::is_aout; + *OS2::is_aout = \$aout; + + isnt( ExtUtils::MM_OS2->perl_archive_after(), '', + 'perl_archive_after() should return string without $is_aout set' ); + $aout = 1; + is( ExtUtils::MM_OS2->perl_archive_after(), '', + '... and blank string if it is set' ); +} + +# export_list +is( ExtUtils::MM_OS2::export_list({ BASEEXT => 'foo' }), 'foo.def', + 'export_list() should add .def to BASEEXT member' ); + +END { + use File::Path; + rmtree('tmp_imp'); + unlink 'tmpimp.imp'; +} diff --git a/lib/File/Basename.pm b/lib/File/Basename.pm index 035c597991..37faa6d465 100644 --- a/lib/File/Basename.pm +++ b/lib/File/Basename.pm @@ -182,6 +182,11 @@ sub fileparse { ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s); $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/; } + elsif ($fstype =~ /^os2/i) { + ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s); + $dirpath = './' unless $dirpath; # Can't be 0 + $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#; + } elsif ($fstype =~ /^MacOS/si) { ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s); $dirpath = ':' unless $dirpath; @@ -251,14 +256,7 @@ sub dirname { } $dirname .= ":" unless $dirname =~ /:\z/; } - elsif ($fstype =~ /MSDOS/i) { - $dirname =~ s/([^:])[\\\/]*\z/$1/; - unless( length($basename) ) { - ($basename,$dirname) = fileparse $dirname; - $dirname =~ s/([^:])[\\\/]*\z/$1/; - } - } - elsif ($fstype =~ /MSWin32/i) { + elsif ($fstype =~ /MS(DOS|Win32)|os2/i) { $dirname =~ s/([^:])[\\\/]*\z/$1/; unless( length($basename) ) { ($basename,$dirname) = fileparse $dirname; diff --git a/lib/File/Spec/OS2.pm b/lib/File/Spec/OS2.pm index 6392ba4acb..b494e2cbf2 100644 --- a/lib/File/Spec/OS2.pm +++ b/lib/File/Spec/OS2.pm @@ -52,6 +52,233 @@ sub tmpdir { return $tmpdir; } +=item canonpath + +No physical check on the filesystem, but a logical cleanup of a +path. On UNIX eliminated successive slashes and successive "/.". + +=cut + +sub canonpath { + my ($self,$path) = @_; + $path =~ s/^([a-z]:)/\l$1/s; + $path =~ s|\\|/|g; + $path =~ s|([^/])/+|$1/|g; # xx////xx -> xx/xx + $path =~ s|(/\.)+/|/|g; # xx/././xx -> xx/xx + $path =~ s|^(\./)+(?=[^/])||s; # ./xx -> xx + $path =~ s|/\Z(?!\n)|| + unless $path =~ m#^([a-z]:)?/\Z(?!\n)#si;# xx/ -> xx + return $path; +} + +=item splitpath + + ($volume,$directories,$file) = File::Spec->splitpath( $path ); + ($volume,$directories,$file) = File::Spec->splitpath( $path, $no_file ); + +Splits a path in to volume, directory, and filename portions. Assumes that +the last file is a path unless the path ends in '/', '/.', '/..' +or $no_file is true. On Win32 this means that $no_file true makes this return +( $volume, $path, undef ). + +Separators accepted are \ and /. + +Volumes can be drive letters or UNC sharenames (\\server\share). + +The results can be passed to L</catpath> to get back a path equivalent to +(usually identical to) the original path. + +=cut + +sub splitpath { + my ($self,$path, $nofile) = @_; + my ($volume,$directory,$file) = ('','',''); + if ( $nofile ) { + $path =~ + m{^( (?:[a-zA-Z]:|(?:\\\\|//)[^\\/]+[\\/][^\\/]+)? ) + (.*) + }xs; + $volume = $1; + $directory = $2; + } + else { + $path =~ + m{^ ( (?: [a-zA-Z]: | + (?:\\\\|//)[^\\/]+[\\/][^\\/]+ + )? + ) + ( (?:.*[\\\\/](?:\.\.?\Z(?!\n))?)? ) + (.*) + }xs; + $volume = $1; + $directory = $2; + $file = $3; + } + + return ($volume,$directory,$file); +} + + +=item splitdir + +The opposite of L<catdir()|File::Spec/catdir()>. + + @dirs = File::Spec->splitdir( $directories ); + +$directories must be only the directory portion of the path on systems +that have the concept of a volume or that have path syntax that differentiates +files from directories. + +Unlike just splitting the directories on the separator, leading empty and +trailing directory entries can be returned, because these are significant +on some OSs. So, + + File::Spec->splitdir( "/a/b//c/" ); + +Yields: + + ( '', 'a', 'b', '', 'c', '' ) + +=cut + +sub splitdir { + my ($self,$directories) = @_ ; + split m|[\\/]|, $directories, -1; +} + + +=item catpath + +Takes volume, directory and file portions and returns an entire path. Under +Unix, $volume is ignored, and this is just like catfile(). On other OSs, +the $volume become significant. + +=cut + +sub catpath { + my ($self,$volume,$directory,$file) = @_; + + # If it's UNC, make sure the glue separator is there, reusing + # whatever separator is first in the $volume + $volume .= $1 + if ( $volume =~ m@^([\\/])[\\/][^\\/]+[\\/][^\\/]+\Z(?!\n)@s && + $directory =~ m@^[^\\/]@s + ) ; + + $volume .= $directory ; + + # If the volume is not just A:, make sure the glue separator is + # there, reusing whatever separator is first in the $volume if possible. + if ( $volume !~ m@^[a-zA-Z]:\Z(?!\n)@s && + $volume =~ m@[^\\/]\Z(?!\n)@ && + $file =~ m@[^\\/]@ + ) { + $volume =~ m@([\\/])@ ; + my $sep = $1 ? $1 : '/' ; + $volume .= $sep ; + } + + $volume .= $file ; + + return $volume ; +} + + +sub abs2rel { + my($self,$path,$base) = @_; + + # Clean up $path + if ( ! $self->file_name_is_absolute( $path ) ) { + $path = $self->rel2abs( $path ) ; + } else { + $path = $self->canonpath( $path ) ; + } + + # Figure out the effective $base and clean it up. + if ( !defined( $base ) || $base eq '' ) { + $base = Cwd::sys_cwd() ; + } elsif ( ! $self->file_name_is_absolute( $base ) ) { + $base = $self->rel2abs( $base ) ; + } else { + $base = $self->canonpath( $base ) ; + } + + # Split up paths + my ( undef, $path_directories, $path_file ) = + $self->splitpath( $path, 1 ) ; + + my $base_directories = ($self->splitpath( $base, 1 ))[1] ; + + # Now, remove all leading components that are the same + my @pathchunks = $self->splitdir( $path_directories ); + my @basechunks = $self->splitdir( $base_directories ); + + while ( @pathchunks && + @basechunks && + lc( $pathchunks[0] ) eq lc( $basechunks[0] ) + ) { + shift @pathchunks ; + shift @basechunks ; + } + + # No need to catdir, we know these are well formed. + $path_directories = CORE::join( '/', @pathchunks ); + $base_directories = CORE::join( '/', @basechunks ); + + # $base_directories now contains the directories the resulting relative + # path must ascend out of before it can descend to $path_directory. So, + # replace all names with $parentDir + + #FA Need to replace between backslashes... + $base_directories =~ s|[^\\/]+|..|g ; + + # Glue the two together, using a separator if necessary, and preventing an + # empty result. + + #FA Must check that new directories are not empty. + if ( $path_directories ne '' && $base_directories ne '' ) { + $path_directories = "$base_directories/$path_directories" ; + } else { + $path_directories = "$base_directories$path_directories" ; + } + + return $self->canonpath( + $self->catpath( "", $path_directories, $path_file ) + ) ; +} + + +sub rel2abs { + my ($self,$path,$base ) = @_; + + if ( ! $self->file_name_is_absolute( $path ) ) { + + if ( !defined( $base ) || $base eq '' ) { + $base = Cwd::sys_cwd() ; + } + elsif ( ! $self->file_name_is_absolute( $base ) ) { + $base = $self->rel2abs( $base ) ; + } + else { + $base = $self->canonpath( $base ) ; + } + + my ( $path_directories, $path_file ) = + ($self->splitpath( $path, 1 ))[1,2] ; + + my ( $base_volume, $base_directories ) = + $self->splitpath( $base, 1 ) ; + + $path = $self->catpath( + $base_volume, + $self->catdir( $base_directories, $path_directories ), + $path_file + ) ; + } + + return $self->canonpath( $path ) ; +} + 1; __END__ diff --git a/lib/perl5db.pl b/lib/perl5db.pl index a1eaf09a38..b62ac8b5e5 100644 --- a/lib/perl5db.pl +++ b/lib/perl5db.pl @@ -2,6 +2,36 @@ package DB; # Debugger for Perl 5.00x; perl5db.pl patch level: +# It is crucial that there is no lexicals in scope of `eval ""' down below +sub eval { + # 'my' would make it visible from user code + # but so does local! --tchrist [... into @DB::res, not @res. IZ] + local @res; + { + local $otrace = $trace; + local $osingle = $single; + local $od = $^D; + { ($evalarg) = $evalarg =~ /(.*)/s; } + @res = eval "$usercontext $evalarg;\n"; # '\n' for nice recursive debug + $trace = $otrace; + $single = $osingle; + $^D = $od; + } + my $at = $@; + local $saved[0]; # Preserve the old value of $@ + eval { &DB::save }; + if ($at) { + print $OUT $at; + } elsif ($onetimeDump) { + dumpit($OUT, \@res) if $onetimeDump eq 'dump'; + methods($res[0]) if $onetimeDump eq 'methods'; + } + @res; +} + +# After this point it is safe to introduce lexicals +# However, one should not overdo it: leave as much control from outside as possible + $VERSION = 1.15; $header = "perl5db.pl version $VERSION"; @@ -543,21 +573,19 @@ if ($notty) { $IN = $OUT; } else { create_IN_OUT(4) if $CreateTTY & 4; - if (defined $console) { + if ($console) { my ($i, $o) = split /,/, $console; $o = $i unless defined $o; open(IN,"+<$i") || open(IN,"<$i") || open(IN,"<&STDIN"); open(OUT,"+>$o") || open(OUT,">$o") || open(OUT,">&STDERR") || open(OUT,">&STDOUT"); # so we don't dongle stdout - } else { + } elsif (not defined $console) { open(IN,"<&STDIN"); open(OUT,">&STDERR") || open(OUT,">&STDOUT"); # so we don't dongle stdout $console = 'STDIN/OUT'; } # so open("|more") can read from STDOUT and so we don't dingle stdin - $IN = \*IN; - - $OUT = \*OUT; + $IN = \*IN, $OUT = \*OUT if $console or not defined $console; } my $previous = select($OUT); $| = 1; # for DB::OUT @@ -1739,32 +1767,6 @@ sub print_lineinfo { # The following takes its argument via $evalarg to preserve current @_ -sub eval { - # 'my' would make it visible from user code - # but so does local! --tchrist [... into @DB::res, not @res. IZ] - local @res; - { - local $otrace = $trace; - local $osingle = $single; - local $od = $^D; - { ($evalarg) = $evalarg =~ /(.*)/s; } - @res = eval "$usercontext $evalarg;\n"; # '\n' for nice recursive debug - $trace = $otrace; - $single = $osingle; - $^D = $od; - } - my $at = $@; - local $saved[0]; # Preserve the old value of $@ - eval { &DB::save }; - if ($at) { - print $OUT $at; - } elsif ($onetimeDump) { - dumpit($OUT, \@res) if $onetimeDump eq 'dump'; - methods($res[0]) if $onetimeDump eq 'methods'; - } - @res; -} - sub postponed_sub { my $subname = shift; if ($postponed{$subname} =~ s/^break\s([+-]?\d+)\s+if\s//) { @@ -2030,24 +2032,26 @@ sub xterm_get_fork_TTY { return $tty; } -# This one resets $IN, $OUT itself +# This example function resets $IN, $OUT itself sub os2_get_fork_TTY { - $^F = 40; # XXXX Fixme! + local $^F = 40; # XXXX Fixme! my ($in1, $out1, $in2, $out2); # Having -d in PERL5OPT would lead to a disaster... local $ENV{PERL5OPT} = $ENV{PERL5OPT} if $ENV{PERL5OPT}; $ENV{PERL5OPT} =~ s/(?:^|(?<=\s))-d\b// if $ENV{PERL5OPT}; $ENV{PERL5OPT} =~ s/(?:^|(?<=\s))-d\B/-/ if $ENV{PERL5OPT}; - print $OUT "Making PERL5OPT->`$ENV{PERL5OPT}'.\n" if $ENV{PERL5OPT}; + print $OUT "Making kid PERL5OPT->`$ENV{PERL5OPT}'.\n" if $ENV{PERL5OPT}; (my $name = $0) =~ s,^.*[/\\],,s; - if ( pipe $in1, $out1 and pipe $in2, $out2 and + my @args; + if ( pipe $in1, $out1 and pipe $in2, $out2 # system P_SESSION will fail if there is another process # in the same session with a "dependent" asynchronous child session. - (($kpid = CORE::system 4, $^X, '-we', <<'ES', fileno $in1, fileno $out2, "Daughter Perl debugger $pids $name") >= 0 or warn "system P_SESSION: $!, $^E" and 0) # P_SESSION -use Term::ReadKey; + and @args = ($rl, fileno $in1, fileno $out2, + "Daughter Perl debugger $pids $name") and + (($kpid = CORE::system 4, $^X, '-we', <<'ES', @args) >= 0 # P_SESSION use OS2::Process; -my $in = shift; # Read from here and pass through +my ($rl, $in) = (shift, shift); # Read from $in and pass through set_title pop; system P_NOWAIT, $^X, '-we', <<EOS or die "Cannot start a grandkid"; open IN, '<&=$in' or die "open <&=$in: \$!"; @@ -2057,11 +2061,13 @@ EOS my $out = shift; open OUT, ">&=$out" or die "Cannot open &=$out for writing: $!"; select OUT; $| = 1; -ReadMode 4; # Nodelay on kbd. Pipe is automatically nodelay... -print while sysread STDIN, $_, 1<<16; +require Term::ReadKey if $rl; +Term::ReadKey::ReadMode(4) if $rl; # Nodelay on kbd. Pipe is automatically nodelay... +print while sysread STDIN, $_, 1<<($rl ? 16 : 0); ES + or warn "system P_SESSION: $!, $^E" and 0) and close $in1 and close $out2 ) { - $pidprompt = ''; # Shown anyway in titlebar + $pidprompt = ''; # Shown anyway in titlebar reset_IN_OUT($in2, $out1); $tty = '*reset*'; return ''; # Indicate that reset_IN_OUT is called @@ -2096,6 +2102,8 @@ EOP EOP } elsif ($in ne '') { TTY($in); + } else { + $console = ''; # Indicate no need to open-from-the-console } undef $fork_TTY; } @@ -2866,7 +2866,8 @@ Perl_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) U8 range_mark = UTF_TO_NATIVE(0xff); sv_catpvn(transv, (char *)&range_mark, 1); } - t = uvuni_to_utf8(tmpbuf, 0x7fffffff); + t = uvuni_to_utf8_flags(tmpbuf, 0x7fffffff, + UNICODE_ALLOW_SUPER); sv_catpvn(transv, (char*)tmpbuf, t - tmpbuf); t = (U8*)SvPVX(transv); tlen = SvCUR(transv); @@ -3126,12 +3127,16 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, OP *repl) p = SvPV(pat, plen); pm->op_pmflags |= PMf_SKIPWHITE; } + if (DO_UTF8(pat) || (PL_hints & HINT_UTF8)) + pm->op_pmdynflags |= PMdf_UTF8; PM_SETRE(pm, CALLREGCOMP(aTHX_ p, p + plen, pm)); if (strEQ("\\s+", PM_GETRE(pm)->precomp)) pm->op_pmflags |= PMf_WHITE; op_free(expr); } else { + if (PL_hints & HINT_UTF8) + pm->op_pmdynflags |= PMdf_UTF8; if (pm->op_pmflags & PMf_KEEP || !(PL_hints & HINT_RE_EVAL)) expr = newUNOP((!(PL_hints & HINT_RE_EVAL) ? OP_REGCRESET diff --git a/patchlevel.h b/patchlevel.h index 47177b61af..15ac32ceb7 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -70,7 +70,7 @@ #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT) static char *local_patches[] = { NULL - ,"DEVEL13810" + ,"DEVEL13832" ,NULL }; diff --git a/pod/perlapi.pod b/pod/perlapi.pod index 7bdf75c8c9..397f52b029 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -1573,8 +1573,8 @@ Found in file handy.h Returns a pointer to the next character after the parsed vstring, as well as updating the passed in sv. - * -Function must be called like + * +Function must be called like sv = NEWSV(92,5); s = new_vstring(s,sv); @@ -4453,20 +4453,28 @@ is the recommended wide native character-aware way of saying =for hackers Found in file utf8.c -=item uvuni_to_utf8 +=item uvuni_to_utf8_flags Adds the UTF8 representation of the Unicode codepoint C<uv> to the end of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free bytes available. The return value is the pointer to the byte after the end of the new character. In other words, + d = uvuni_to_utf8_flags(d, uv, flags); + +or, in most cases, + d = uvuni_to_utf8(d, uv); +(which is equivalent to) + + d = uvuni_to_utf8_flags(d, uv, 0); + is the recommended Unicode-aware way of saying *(d++) = uv; - U8* uvuni_to_utf8(U8 *d, UV uv) + U8* uvuni_to_utf8_flags(U8 *d, UV uv, UV flags) =for hackers Found in file utf8.c diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index e5f322c8d1..1d65ca6fde 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -1658,6 +1658,10 @@ are the semantics that lockf(3) implements. Most if not all systems implement lockf(3) in terms of fcntl(2) locking, though, so the differing semantics shouldn't bite too many people. +Note that the fcntl(2) emulation of flock(3) requires that FILEHANDLE +be open with read intent to use LOCK_SH and requires that it be open +with write intent to use LOCK_EX. + Note also that some versions of C<flock> cannot lock things over the network; you would need to use the more system-specific C<fcntl> for that. If you like you can force Perl to ignore your system's flock(2) diff --git a/pod/perliol.pod b/pod/perliol.pod index 4ef52d7e2e..cde9be54b8 100644 --- a/pod/perliol.pod +++ b/pod/perliol.pod @@ -1,4 +1,3 @@ - =head1 NAME perliol - C API for Perl's implementation of IO in Layers. @@ -8,7 +7,6 @@ perliol - C API for Perl's implementation of IO in Layers. /* Defining a layer ... */ #include <perliol.h> - =head1 DESCRIPTION This document describes the behavior and implementation of the PerlIO @@ -63,7 +61,7 @@ then in general represented as a pointer to this linked-list of "layers". It should be noted that because of the double indirection in a C<PerlIO *>, -a C<< &(perlio-E<gt>next) >> "is" a C<PerlIO *>, and so to some degree +a C<< &(perlio->next) >> "is" a C<PerlIO *>, and so to some degree at least one layer can use the "standard" API on the next layer down. A "layer" is composed of two parts: @@ -92,40 +90,39 @@ same as the public C<PerlIO_xxxxx> functions: char * name; Size_t size; IV kind; - IV (*Pushed)(PerlIO *f,const char *mode,SV *arg); - IV (*Popped)(PerlIO *f); + IV (*Pushed)(pTHX_ PerlIO *f,const char *mode,SV *arg); + IV (*Popped)(pTHX_ PerlIO *f); PerlIO * (*Open)(pTHX_ PerlIO_funcs *tab, AV *layers, IV n, const char *mode, int fd, int imode, int perm, PerlIO *old, int narg, SV **args); - SV * (*Getarg)(PerlIO *f); - IV (*Fileno)(PerlIO *f); + SV * (*Getarg)(pTHX_ PerlIO *f, CLONE_PARAMS *param, int flags) + IV (*Fileno)(pTHX_ PerlIO *f); + PerlIO * (*Dup)(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *param, int flags) /* Unix-like functions - cf sfio line disciplines */ - SSize_t (*Read)(PerlIO *f, void *vbuf, Size_t count); - SSize_t (*Unread)(PerlIO *f, const void *vbuf, Size_t count); - SSize_t (*Write)(PerlIO *f, const void *vbuf, Size_t count); - IV (*Seek)(PerlIO *f, Off_t offset, int whence); - Off_t (*Tell)(PerlIO *f); - IV (*Close)(PerlIO *f); + SSize_t (*Read)(pTHX_ PerlIO *f, void *vbuf, Size_t count); + SSize_t (*Unread)(pTHX_ PerlIO *f, const void *vbuf, Size_t count); + SSize_t (*Write)(pTHX_ PerlIO *f, const void *vbuf, Size_t count); + IV (*Seek)(pTHX_ PerlIO *f, Off_t offset, int whence); + Off_t (*Tell)(pTHX_ PerlIO *f); + IV (*Close)(pTHX_ PerlIO *f); /* Stdio-like buffered IO functions */ - IV (*Flush)(PerlIO *f); - IV (*Fill)(PerlIO *f); - IV (*Eof)(PerlIO *f); - IV (*Error)(PerlIO *f); - void (*Clearerr)(PerlIO *f); - void (*Setlinebuf)(PerlIO *f); + IV (*Flush)(pTHX_ PerlIO *f); + IV (*Fill)(pTHX_ PerlIO *f); + IV (*Eof)(pTHX_ PerlIO *f); + IV (*Error)(pTHX_ PerlIO *f); + void (*Clearerr)(pTHX_ PerlIO *f); + void (*Setlinebuf)(pTHX_ PerlIO *f); /* Perl's snooping functions */ - STDCHAR * (*Get_base)(PerlIO *f); - Size_t (*Get_bufsiz)(PerlIO *f); - STDCHAR * (*Get_ptr)(PerlIO *f); - SSize_t (*Get_cnt)(PerlIO *f); - void (*Set_ptrcnt)(PerlIO *f,STDCHAR *ptr,SSize_t cnt); + STDCHAR * (*Get_base)(pTHX_ PerlIO *f); + Size_t (*Get_bufsiz)(pTHX_ PerlIO *f); + STDCHAR * (*Get_ptr)(pTHX_ PerlIO *f); + SSize_t (*Get_cnt)(pTHX_ PerlIO *f); + void (*Set_ptrcnt)(pTHX_ PerlIO *f,STDCHAR *ptr,SSize_t cnt); }; - - The first few members of the struct give a "name" for the layer, the size to C<malloc> for the per-instance data, and some flags which are attributes of the class as whole (such as whether it is a buffering @@ -255,7 +252,7 @@ Reads are permitted i.e. opened "r" or "w+" (or even "a+" - ick). =item PERLIO_F_ERROR -An error has occurred (for C<PerlIO_error()>) +An error has occurred (for C<PerlIO_error()>). =item PERLIO_F_TRUNCATE @@ -325,7 +322,45 @@ to change during one "get".) =over 4 -=item IV (*Pushed)(PerlIO *f,const char *mode, SV *arg); +=item char * name; + +The name of the layer whose open() method Perl should invoke on +open(). For example if the layer is called APR, you will call: + + open $fh, ">:APR", ... + +and Perl knows that it has to invoke the PerlIOAPR_open() method +implemented by the APR layer. + +=item Size_t size; + +The size of the per-instance data structure, e.g.: + + sizeof(PerlIOAPR) + +=item IV kind; + + XXX: explain all the available flags here + +=over 4 + +=item * PERLIO_K_BUFFERED + +=item * PERLIO_K_CANCRLF + +=item * PERLIO_K_FASTGETS + +=item * PERLIO_K_MULTIARG + +Used when the layer's open() accepts more arguments than usual. The +extra arguments should come not before the C<MODE> argument. When this +flag is used it's up to the layer to validate the args. + +=item * PERLIO_K_RAW + +=back + +=item IV (*Pushed)(pTHX_ PerlIO *f,const char *mode, SV *arg); The only absolutely mandatory method. Called when the layer is pushed onto the stack. The C<mode> argument may be NULL if this occurs @@ -337,7 +372,9 @@ expecting an argument it need neither save the one passed to it, nor provide C<Getarg()> (it could perhaps C<Perl_warn> that the argument was un-expected). -=item IV (*Popped)(PerlIO *f); +Returns 0 on success. On failure returns -1 and should set errno. + +=item IV (*Popped)(pTHX_ PerlIO *f); Called when the layer is popped from the stack. A layer will normally be popped after C<Close()> is called. But a layer can be popped @@ -348,6 +385,8 @@ struct. It should also C<Unread()> any unconsumed data that has been read and buffered from the layer below back to that layer, so that it can be re-provided to what ever is now above. +Returns 0 on success and failure. + =item PerlIO * (*Open)(...); The C<Open()> method has lots of arguments because it combines the @@ -402,100 +441,141 @@ and wait to be "pushed". If a layer does provide C<Open()> it should normally call the C<Open()> method of next layer down (if any) and then push itself on top if that succeeds. -=item SV * (*Getarg)(PerlIO *f); +Returns C<NULL> on failure. + +=item SV * (*Getarg)(pTHX_ PerlIO *f, CLONE_PARAMS *param, int flags) -Optional. If present should return an SV * representing the string argument -passed to the layer when it was pushed. e.g. ":encoding(ascii)" would -return an SvPV with value "ascii". +Optional. If present should return an SV * representing the string +argument passed to the layer when it was +pushed. e.g. ":encoding(ascii)" would return an SvPV with value +"ascii". (I<param> and I<flags> arguments can be ignored in most +cases) -=item IV (*Fileno)(PerlIO *f); +=item IV (*Fileno)(pTHX_ PerlIO *f); Returns the Unix/Posix numeric file descriptor for the handle. Normally C<PerlIOBase_fileno()> (which just asks next layer down) will suffice for this. -=item SSize_t (*Read)(PerlIO *f, void *vbuf, Size_t count); +Returns -1 if the layer cannot provide such a file descriptor, or in +the case of the error. + +XXX: two possible results end up in -1, one is an error the other is +not. + +=item PerlIO * (*Dup)(pTHX_ PerlIO *f, PerlIO *o, CLONE_PARAMS *param, int flags) + +XXX: not documented + +Similar to C<Open>, returns PerlIO* on success, C<NULL> on failure. + +=item SSize_t (*Read)(pTHX_ PerlIO *f, void *vbuf, Size_t count); + +Basic read operation. -Basic read operation. Returns actual bytes read, or -1 on an error. -Typically will call Fill and manipulate pointers (possibly via the API). -C<PerlIOBuf_read()> may be suitable for derived classes which provide -"fast gets" methods. +Typically will call C<Fill> and manipulate pointers (possibly via the +API). C<PerlIOBuf_read()> may be suitable for derived classes which +provide "fast gets" methods. -=item SSize_t (*Unread)(PerlIO *f, const void *vbuf, Size_t count); +Returns actual bytes read, or -1 on an error. + +=item SSize_t (*Unread)(pTHX_ PerlIO *f, const void *vbuf, Size_t count); A superset of stdio's C<ungetc()>. Should arrange for future reads to see the bytes in C<vbuf>. If there is no obviously better implementation then C<PerlIOBase_unread()> provides the function by pushing a "fake" "pending" layer above the calling layer. +Returns the number of unread chars. + =item SSize_t (*Write)(PerlIO *f, const void *vbuf, Size_t count); -Basic write operation. Returns bytes written or -1 on an error. +Basic write operation. -=item IV (*Seek)(PerlIO *f, Off_t offset, int whence); +Returns bytes written or -1 on an error. + +=item IV (*Seek)(pTHX_ PerlIO *f, Off_t offset, int whence); Position the file pointer. Should normally call its own C<Flush> method and then the C<Seek> method of next layer down. -=item Off_t (*Tell)(PerlIO *f); +Returns 0 on success, -1 on failure. + +=item Off_t (*Tell)(pTHX_ PerlIO *f); Return the file pointer. May be based on layers cached concept of position to avoid overhead. -=item IV (*Close)(PerlIO *f); +Returns -1 on failure to get the file pointer. + +=item IV (*Close)(pTHX_ PerlIO *f); Close the stream. Should normally call C<PerlIOBase_close()> to flush itself and close layers below, and then deallocate any data structures (buffers, translation tables, ...) not held directly in the data structure. -=item IV (*Flush)(PerlIO *f); +Returns 0 on success, -1 on failure. + +=item IV (*Flush)(pTHX_ PerlIO *f); Should make stream's state consistent with layers below. That is, any buffered write data should be written, and file position of lower layers adjusted for data read from below but not actually consumed. (Should perhaps C<Unread()> such data to the lower layer.) -=item IV (*Fill)(PerlIO *f); +Returns 0 on success, -1 on failure. + +=item IV (*Fill)(pTHX_ PerlIO *f); + +The buffer for this layer should be filled (for read) from layer +below. When you "subclass" PerlIOBuf layer, you want to use its +I<_read> method and to supply your own fill method, which fills the +PerlIOBuf's buffer. -The buffer for this layer should be filled (for read) from layer below. +Returns 0 on success, -1 on failure. -=item IV (*Eof)(PerlIO *f); +=item IV (*Eof)(pTHX_ PerlIO *f); Return end-of-file indicator. C<PerlIOBase_eof()> is normally sufficient. -=item IV (*Error)(PerlIO *f); +Returns 0 on end-of-file, 1 if not end-of-file, -1 on error. + +=item IV (*Error)(pTHX_ PerlIO *f); Return error indicator. C<PerlIOBase_error()> is normally sufficient. -=item void (*Clearerr)(PerlIO *f); +Returns 1 if there is an error (usually when C<PERLIO_F_ERROR> is set, +0 otherwise. + +=item void (*Clearerr)(pTHX_ PerlIO *f); Clear end-of-file and error indicators. Should call C<PerlIOBase_clearerr()> to set the C<PERLIO_F_XXXXX> flags, which may suffice. -=item void (*Setlinebuf)(PerlIO *f); +=item void (*Setlinebuf)(pTHX_ PerlIO *f); Mark the stream as line buffered. C<PerlIOBase_setlinebuf()> sets the PERLIO_F_LINEBUF flag and is normally sufficient. -=item STDCHAR * (*Get_base)(PerlIO *f); +=item STDCHAR * (*Get_base)(pTHX_ PerlIO *f); Allocate (if not already done so) the read buffer for this layer and -return pointer to it. +return pointer to it. Return NULL on failure. -=item Size_t (*Get_bufsiz)(PerlIO *f); +=item Size_t (*Get_bufsiz)(pTHX_ PerlIO *f); Return the number of bytes that last C<Fill()> put in the buffer. -=item STDCHAR * (*Get_ptr)(PerlIO *f); +=item STDCHAR * (*Get_ptr)(pTHX_ PerlIO *f); Return the current read pointer relative to this layer's buffer. -=item SSize_t (*Get_cnt)(PerlIO *f); +=item SSize_t (*Get_cnt)(pTHX_ PerlIO *f); Return the number of bytes left to be read in the current buffer. -=item void (*Set_ptrcnt)(PerlIO *f,STDCHAR *ptr,SSize_t cnt); +=item void (*Set_ptrcnt)(pTHX_ PerlIO *f,STDCHAR *ptr,SSize_t cnt); Adjust the read pointer and count of bytes to match C<ptr> and/or C<cnt>. The application (or layer above) must ensure they are consistent. @@ -625,6 +705,68 @@ implementation is being investigated. =back +=head1 TODO + +Things that need to be done to improve this document. + +=over + +=item * + +Explain how to make a valid fh without going through open()(i.e. apply +a layer). For example if the file is not opened through perl, but we +want to get back a fh, like it was opened by Perl. + +How PerlIO_apply_layera fits in, where its docs, was it made public? + +Currently the example could be something like this: + + PerlIO *foo_to_PerlIO(pTHX_ char *mode, ...) + { + char *mode; /* "w", "r", etc */ + const char *layers = ":APR"; /* the layer name */ + PerlIO *f = PerlIO_allocate(aTHX); + if (!f) { + return NULL; + } + + PerlIO_apply_layers(aTHX_ f, mode, layers); + + if (f) { + PerlIOAPR *st = PerlIOSelf(f, PerlIOAPR); + /* fill in the st struct, as in _open() */ + st->file = file; + PerlIOBase(f)->flags |= PERLIO_F_OPEN; + + return f; + } + return NULL; + } + +=item * + +fix/add the documentation in places marked as XXX. + +=item * + +The handling of errors by the layer is not specified. e.g. when $! +should be set explicitly, when the error handling should be just +delegated to the top layer. + +Probably give some hints on using SETERRNO() or pointers to where they +can be found. + +=item * + +I think it would help to give some concrete examples to make it easier +to understand the API. Of course I agree that the API has to be +concise, but since there is no second document that is more of a +guide, I think that it'd make it easier to start with the doc which is +an API, but has examples in it in places where things are unclear, to +a person who is not a PerlIO guru (yet). + +=back + =cut diff --git a/pod/perlrun.pod b/pod/perlrun.pod index 726a69dad8..2b7cca11ce 100644 --- a/pod/perlrun.pod +++ b/pod/perlrun.pod @@ -700,10 +700,11 @@ program will be searched for strictly on the PATH. =item B<-t> Like B<-T>, but taint checks will issue warnings rather than fatal -errors. Since these are warnings, the B<-w> switch (or C<use warnings>) -must be used along with this option. This is meant only to be used as -a temporary aid while securing code: for real production code always -use the real B<-T>. +errors. Since these are warnings, the B<-w> switch (or C<use +warnings>) must be used along with this option. B<NOTE: this is +not a substitute for -T.> This is meant only to be used as a temporary +aid while securing legacy code: for real production code and for new +secure code written from scratch always use the real B<-T>. =item B<-T> diff --git a/pod/perltie.pod b/pod/perltie.pod index 38128b925e..f959367d28 100644 --- a/pod/perltie.pod +++ b/pod/perltie.pod @@ -850,7 +850,7 @@ or C<sysread> functions. sub READ { my $self = shift; - my $$bufref = \$_[0]; + my $bufref = \$_[0]; my(undef,$len,$offset) = @_; print "READ called, \$buf=$bufref, \$len=$len, \$offset=$offset"; # add to $$bufref, set $len to number of characters read @@ -2258,7 +2258,7 @@ PP(pp_complement) while (tmps < send) { UV c = utf8n_to_uvchr(tmps, send-tmps, &l, UTF8_ALLOW_ANYUV); tmps += UTF8SKIP(tmps); - result = uvchr_to_utf8(result, ~c); + result = uvchr_to_utf8_flags(result, ~c, UNICODE_ALLOW_ANY); } *result = '\0'; result -= targlen; @@ -3148,7 +3148,8 @@ PP(pp_chr) if (value > 255 && !IN_BYTES) { SvGROW(TARG, UNISKIP(value)+1); - tmps = (char*)uvchr_to_utf8((U8*)SvPVX(TARG), value); + tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, + UNICODE_ALLOW_SUPER); SvCUR_set(TARG, tmps - SvPVX(TARG)); *tmps = '\0'; (void)SvPOK_only(TARG); @@ -832,6 +832,8 @@ PERL_CALLCONV UV Perl_utf8n_to_uvchr(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, PERL_CALLCONV UV Perl_utf8n_to_uvuni(pTHX_ U8 *s, STRLEN curlen, STRLEN* retlen, U32 flags); PERL_CALLCONV U8* Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv); PERL_CALLCONV U8* Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv); +PERL_CALLCONV U8* Perl_uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags); +PERL_CALLCONV U8* Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags); PERL_CALLCONV char* Perl_pv_uni_display(pTHX_ SV *dsv, U8 *spv, STRLEN len, STRLEN pvlim, UV flags); PERL_CALLCONV char* Perl_sv_uni_display(pTHX_ SV *dsv, SV *ssv, STRLEN pvlim, UV flags); PERL_CALLCONV void Perl_vivify_defelem(pTHX_ SV* sv); @@ -1690,17 +1690,15 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) if (exp == NULL) FAIL("NULL regexp argument"); - /* XXXX This looks very suspicious... */ - if (pm->op_pmdynflags & PMdf_CMP_UTF8) - RExC_utf8 = 1; - else - RExC_utf8 = 0; + RExC_utf8 = pm->op_pmdynflags & PMdf_CMP_UTF8; RExC_precomp = exp; - DEBUG_r(if (!PL_colorset) reginitcolors()); - DEBUG_r(PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n", - PL_colors[4],PL_colors[5],PL_colors[0], - (int)(xend - exp), RExC_precomp, PL_colors[1])); + DEBUG_r({ + if (!PL_colorset) reginitcolors(); + PerlIO_printf(Perl_debug_log, "%sCompiling REx%s `%s%*s%s'\n", + PL_colors[4],PL_colors[5],PL_colors[0], + (int)(xend - exp), RExC_precomp, PL_colors[1]); + }); RExC_flags16 = pm->op_pmflags; RExC_sawback = 0; @@ -3967,10 +3965,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state) } else #endif - for (i = prevvalue; i <= ceilvalue; i++) - ANYOF_BITMAP_SET(ret, i); + for (i = prevvalue; i <= ceilvalue; i++) + ANYOF_BITMAP_SET(ret, i); } - if (value > 255) { + if (value > 255 || UTF) { ANYOF_FLAGS(ret) |= ANYOF_UNICODE; if (prevvalue < value) Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", @@ -972,7 +972,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta if ( utf8_to_uvchr((U8*)s, &len) == c1 && (ln == 1 || ibcmp_utf8(s, do_utf8, strend - s, - m, UTF, ln)) ) + m, UTF, ln)) + && (norun || regtry(prog, s)) ) goto got_it; s += len; } @@ -982,7 +983,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta if ( (c == c1 || c == c2) && (ln == 1 || ibcmp_utf8(s, do_utf8, strend - s, - m, UTF, ln)) ) + m, UTF, ln)) + && (norun || regtry(prog, s)) ) goto got_it; s += len; } @@ -4110,9 +4112,12 @@ S_reginclass(pTHX_ register regnode *n, register U8* p, register bool do_utf8) match = TRUE; else if (flags & ANYOF_FOLD) { STRLEN ulen; - U8 tmpbuf[UTF8_MAXLEN_UCLC+1]; + U8 tmpbuf[UTF8_MAXLEN_FOLD+1]; - toLOWER_utf8(p, tmpbuf, &ulen); + to_utf8_fold(p, tmpbuf, &ulen); + if (swash_fetch(sw, tmpbuf, do_utf8)) + match = TRUE; + to_utf8_upper(p, tmpbuf, &ulen); if (swash_fetch(sw, tmpbuf, do_utf8)) match = TRUE; } diff --git a/t/op/each.t b/t/op/each.t index 556479ef70..8212264d55 100755 --- a/t/op/each.t +++ b/t/op/each.t @@ -135,7 +135,7 @@ ok ($i == 5); # Check for Unicode hash keys. %u = ("\x{12}", "f", "\x{123}", "fo", "\x{1234}", "foo"); $u{"\x{12345}"} = "bar"; -@u{"\x{123456}"} = "zap"; +@u{"\x{10FFFD}"} = "zap"; my %u2; foreach (keys %u) { diff --git a/t/op/pat.t b/t/op/pat.t index 6b4b0619bf..e4556ee1e7 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..757\n"; +print "1..770\n"; BEGIN { chdir 't' if -d 't'; @@ -1618,9 +1618,9 @@ EOT { # from Robin Houston - my $x = "\x{12345678}"; + my $x = "\x{10FFFD}"; $x =~ s/(.)/$1/g; - print "not " unless ord($x) == 0x12345678 && length($x) == 1; + print "not " unless ord($x) == 0x10FFFD && length($x) == 1; print "ok 587\n"; } @@ -2291,3 +2291,53 @@ print "# some Unicode properties\n"; print "not " unless "A\x{100}" =~ /A/i; print "ok 757\n"; } + +{ + use charnames ':full'; + + print "# LATIN LETTER A WITH GRAVE\n"; + my $lower = "\N{LATIN SMALL LETTER A WITH GRAVE}"; + my $UPPER = "\N{LATIN CAPITAL LETTER A WITH GRAVE}"; + + print $lower =~ m/$UPPER/i ? "ok 758\n" : "not ok 758\n"; + print $UPPER =~ m/$lower/i ? "ok 759\n" : "not ok 759\n"; + print $lower =~ m/[$UPPER]/i ? "ok 760\n" : "not ok 760\n"; + print $UPPER =~ m/[$lower]/i ? "ok 761\n" : "not ok 761\n"; + + print "# GREEK LETTER ALPHA WITH VRACHY\n"; + + $lower = "\N{GREEK CAPITAL LETTER ALPHA WITH VRACHY}"; + $UPPER = "\N{GREEK SMALL LETTER ALPHA WITH VRACHY}"; + + print $lower =~ m/$UPPER/i ? "ok 762\n" : "not ok 762\n"; + print $UPPER =~ m/$lower/i ? "ok 763\n" : "not ok 763\n"; + print $lower =~ m/[$UPPER]/i ? "ok 764\n" : "not ok 764\n"; + print $UPPER =~ m/[$lower]/i ? "ok 765\n" : "not ok 765\n"; + + print "# LATIN LETTER Y WITH DIAERESIS\n"; + + $lower = "\N{LATIN CAPITAL LETTER Y WITH DIAERESIS}"; + $UPPER = "\N{LATIN SMALL LETTER Y WITH DIAERESIS}"; + + print $lower =~ m/$UPPER/i ? "ok 766\n" : "not ok 766\n"; + print $UPPER =~ m/$lower/i ? "ok 767\n" : "not ok 767\n"; + print $lower =~ m/[$UPPER]/i ? "ok 768\n" : "not ok 768\n"; + print $UPPER =~ m/[$lower]/i ? "ok 769\n" : "not ok 769\n"; +} + +{ + use warnings; + use charnames ':full'; + + print "# GREEK CAPITAL LETTER SIGMA vs COMBINING GREEK PERISPOMENI\n"; + + my $SIGMA = "\N{GREEK CAPITAL LETTER SIGMA}"; + + my $hSIGMA = sprintf "%04x", ord $SIGMA; + + my $char = "\N{COMBINING GREEK PERISPOMENI}"; + my $code = sprintf "%04x", ord($char); + + # Before #13843 this was failing. + print "_:$char:_" =~ m/_:$SIGMA:_/i ? "not ok 770\n" : "ok 770\n"; +} @@ -60,4 +60,4 @@ is ("\x{000000000000000000000000000000000000000000000000000000000000000072}", chr 114); is ("\x{0_06_5}", chr 101); is ("\x{1234}", chr 4660); -is ("\x{98765432}", chr 2557891634); +is ("\x{10FFFD}", chr 1114109); @@ -27,15 +27,23 @@ /* Unicode support */ /* -=for apidoc A|U8 *|uvuni_to_utf8|U8 *d|UV uv +=for apidoc A|U8 *|uvuni_to_utf8_flags|U8 *d|UV uv|UV flags Adds the UTF8 representation of the Unicode codepoint C<uv> to the end of the string C<d>; C<d> should be have at least C<UTF8_MAXLEN+1> free bytes available. The return value is the pointer to the byte after the end of the new character. In other words, + d = uvuni_to_utf8_flags(d, uv, flags); + +or, in most cases, + d = uvuni_to_utf8(d, uv); +(which is equivalent to) + + d = uvuni_to_utf8_flags(d, uv, 0); + is the recommended Unicode-aware way of saying *(d++) = uv; @@ -44,13 +52,26 @@ is the recommended Unicode-aware way of saying */ U8 * -Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) +Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) { if (ckWARN_d(WARN_UTF8)) { - if (UNICODE_IS_SURROGATE(uv)) + if (UNICODE_IS_SURROGATE(uv) && + !(flags & UNICODE_ALLOW_SURROGATE)) Perl_warner(aTHX_ WARN_UTF8, "UTF-16 surrogate 0x%04"UVxf, uv); - else if ((uv >= 0xFDD0 && uv <= 0xFDEF) || - (uv == 0xFFFE || uv == 0xFFFF)) + else if ( + ((uv >= 0xFDD0 && uv <= 0xFDEF && + !(flags & UNICODE_ALLOW_FDD0)) + || + ((uv & 0xFFFF) == 0xFFFE && + !(flags & UNICODE_ALLOW_FFFE)) + || + ((uv & 0xFFFF) == 0xFFFF && + !(flags & UNICODE_ALLOW_FFFF))) && + /* UNICODE_ALLOW_SUPER includes + * FFFEs and FFFFs beyond 0x10FFFF. */ + ((uv <= PERL_UNICODE_MAX) || + !(flags & UNICODE_ALLOW_SUPER)) + ) Perl_warner(aTHX_ WARN_UTF8, "Unicode character 0x%04"UVxf" is illegal", uv); } @@ -138,7 +159,12 @@ Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) #endif #endif /* Loop style */ } - + +U8 * +Perl_uvuni_to_utf8(pTHX_ U8 *d, UV uv) +{ + return Perl_uvuni_to_utf8_flags(aTHX_ d, uv, 0); +} /* @@ -1041,6 +1067,36 @@ Perl_is_uni_xdigit_lc(pTHX_ UV c) return is_uni_xdigit(c); /* XXX no locale support yet */ } +U32 +Perl_to_uni_upper_lc(pTHX_ U32 c) +{ + /* XXX returns only the first character -- do not use XXX */ + /* XXX no locale support yet */ + STRLEN len; + U8 tmpbuf[UTF8_MAXLEN_UCLC+1]; + return (U32)to_uni_upper(c, tmpbuf, &len); +} + +U32 +Perl_to_uni_title_lc(pTHX_ U32 c) +{ + /* XXX returns only the first character XXX -- do not use XXX */ + /* XXX no locale support yet */ + STRLEN len; + U8 tmpbuf[UTF8_MAXLEN_UCLC+1]; + return (U32)to_uni_title(c, tmpbuf, &len); +} + +U32 +Perl_to_uni_lower_lc(pTHX_ U32 c) +{ + /* XXX returns only the first character -- do not use XXX */ + /* XXX no locale support yet */ + STRLEN len; + U8 tmpbuf[UTF8_MAXLEN_UCLC+1]; + return (U32)to_uni_lower(c, tmpbuf, &len); +} + bool Perl_is_utf8_alnum(pTHX_ U8 *p) { @@ -1514,9 +1570,14 @@ is the recommended wide native character-aware way of saying U8 * Perl_uvchr_to_utf8(pTHX_ U8 *d, UV uv) { - return Perl_uvuni_to_utf8(aTHX_ d, NATIVE_TO_UNI(uv)); + return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), 0); } +U8 * +Perl_uvchr_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags) +{ + return Perl_uvuni_to_utf8_flags(aTHX_ d, NATIVE_TO_UNI(uv), flags); +} /* =for apidoc A|UV|utf8n_to_uvchr|U8 *s|STRLEN curlen|STRLEN *retlen|U32 flags @@ -166,6 +166,17 @@ END_EXTERN_C #define UNICODE_BYTER_ORDER_MARK 0xfffe #define UNICODE_ILLEGAL 0xffff +/* Though our UTF-8 encoding can go beyond this, + * let's be conservative. */ +#define PERL_UNICODE_MAX 0x10FFFF + +#define UNICODE_ALLOW_SURROGATE 0x0001 /* Allow UTF-16 surrogates (EVIL) */ +#define UNICODE_ALLOW_FDD0 0x0002 /* Allow the U+FDD0...U+FDEF */ +#define UNICODE_ALLOW_FFFE 0x0004 /* Allow 0xFFFE, 0x1FFFE, ... */ +#define UNICODE_ALLOW_FFFF 0x0008 /* Allow 0xFFFE, 0x1FFFE, ... */ +#define UNICODE_ALLOW_SUPER 0x0010 /* Allow past 10xFFFF */ +#define UNICODE_ALLOW_ANY 0xFFFF + #define UNICODE_IS_SURROGATE(c) ((c) >= UNICODE_SURROGATE_FIRST && \ (c) <= UNICODE_SURROGATE_LAST) #define UNICODE_IS_REPLACEMENT(c) ((c) == UNICODE_REPLACEMENT) |