diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2002-01-15 08:19:08 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2002-01-15 08:19:08 +0000 |
commit | f1f8f8925a02a6ca4f9ef21b150b369edc63630d (patch) | |
tree | 6e3a25860d4c3ec95cf6b0fb42b32548e001155b | |
parent | a594c7b4495766324bc28504b0f4af2b5424ed02 (diff) | |
parent | 92e830a9086d75f086574c378b1c63ff2e00edcf (diff) | |
download | perl-f1f8f8925a02a6ca4f9ef21b150b369edc63630d.tar.gz |
Integrate rest of mainline
p4raw-id: //depot/perlio@14270
-rw-r--r-- | Changes | 204 | ||||
-rw-r--r-- | INSTALL | 11 | ||||
-rw-r--r-- | Makefile.SH | 2 | ||||
-rw-r--r-- | embed.h | 2 | ||||
-rw-r--r-- | embedvar.h | 9 | ||||
-rw-r--r-- | ext/B/B.xs | 2 | ||||
-rw-r--r-- | ext/List/Util/ChangeLog | 20 | ||||
-rw-r--r-- | ext/List/Util/Util.xs | 21 | ||||
-rw-r--r-- | ext/List/Util/lib/List/Util.pm | 8 | ||||
-rw-r--r-- | ext/List/Util/lib/Scalar/Util.pm | 5 | ||||
-rw-r--r-- | ext/Socket/socketpair.t | 2 | ||||
-rw-r--r-- | lib/Benchmark.t | 4 | ||||
-rw-r--r-- | lib/Cwd.pm | 4 | ||||
-rw-r--r-- | lib/Unicode/UCD.pm | 52 | ||||
-rw-r--r-- | lib/utf8_heavy.pl | 254 | ||||
-rw-r--r-- | patchlevel.h | 2 | ||||
-rw-r--r-- | perl.h | 2 | ||||
-rw-r--r-- | perlapi.h | 6 | ||||
-rw-r--r-- | pp.c | 22 | ||||
-rw-r--r-- | proto.h | 1 | ||||
-rw-r--r-- | regexec.c | 9 | ||||
-rw-r--r-- | sv.c | 30 | ||||
-rw-r--r-- | toke.c | 36 | ||||
-rw-r--r-- | utils/perldoc.PL | 26 |
24 files changed, 546 insertions, 188 deletions
@@ -31,6 +31,210 @@ or any other branch. Version v5.7.2 Development release working toward v5.8 -------------- ____________________________________________________________________________ +[ 14255] By: jhi on 2002/01/14 14:04:24 + Log: Retract #14251 (the op slab allocator from perlio) + until we figure out why 2.2.19 x86 debian gets + a circular sibling chain and therefore hangs in + the Perl_ck_subr() sibling for-loop. + Branch: perl + ! embed.fnc embed.h embedvar.h global.sym intrpvar.h op.c perl.h + ! perlapi.c perlapi.h proto.h +____________________________________________________________________________ +[ 14254] By: jhi on 2002/01/14 13:25:07 + Log: Big mktables rewrite from Jeffrey; + documentation not yet updated. + Branch: perl + + (add 171 files) + - (delete 182 files) + ! (edit 135 files) +____________________________________________________________________________ +[ 14253] By: ams on 2002/01/14 03:15:05 + Log: Subject: Re: [ID 20020113.006] Cwd.pm uses uninitialized $ENV{PATH} + From: Michael G Schwern <schwern@pobox.com> + Date: Sun, 13 Jan 2002 21:00:07 -0500 + Message-Id: <20020114020007.GB2877@blackrider> + Branch: perl + ! lib/Cwd.pm +____________________________________________________________________________ +[ 14252] By: jhi on 2002/01/14 00:27:41 + Log: Subject: Re: Win32 status - progress ! + From: Abe Timmerman <abe@ztreet.demon.nl> + Date: Mon, 14 Jan 2002 02:29:51 +0100 + Message-ID: <9rc44u4sl4hjfm32if71ggjlm0qpvvacs3@4ax.com> + Branch: perl + ! lib/ExtUtils/t/MM_Win32.t +____________________________________________________________________________ +[ 14251] By: jhi on 2002/01/13 23:30:05 + Log: Integrate perlio. + + Slab allocator for ops + - moved the statics to intrpvar.h + - implemented Slab_Free() + - uses PerlMemShared (for now) if distinction exists. + Branch: perl + !> embed.fnc embed.h embedvar.h global.sym intrpvar.h op.c perl.h + !> perlapi.c perlapi.h proto.h +____________________________________________________________________________ +[ 14249] By: jhi on 2002/01/13 19:55:10 + Log: Start a new test category: uni. Much of t/op/pat + should probably be moved here, but holding on that + until Jeffrey finishes his big mktables rewrite. + Bits and pieces of op/split, op/pack, op/append, + op/join, could probably be moved to respective uni + tests, too. + Branch: perl + + t/uni/fold.t t/uni/sprintf.t + - t/op/unifold.t t/op/unisprintf.t + ! MANIFEST t/TEST t/harness +____________________________________________________________________________ +[ 14248] By: jhi on 2002/01/13 19:52:02 + Log: Subject: [PATCH] enable Win32 to build again + From: "Mattia Barbon" <mbarbon@dsi.unive.it> + Date: Sun, 13 Jan 2002 21:49:24 +0100 + Message-ID: <3C4200E4.15926.23F6131@localhost> + + miniperl.exe.exe does not exe that well. + Branch: perl + ! lib/ExtUtils/MM_Unix.pm +____________________________________________________________________________ +[ 14247] By: jhi on 2002/01/13 19:50:23 + Log: Mention some of the load-affected tests. + Branch: perl + ! INSTALL +____________________________________________________________________________ +[ 14246] By: jhi on 2002/01/13 19:38:17 + Log: Move the UTF-8 conversion code earlier so that + op/ver is happy. + Branch: perl + ! sv.c t/op/ver.t +____________________________________________________________________________ +[ 14245] By: jhi on 2002/01/13 19:19:59 + Log: Upgrade to Scalar-List-Util 1.06. The Makefile.PLs + have diverged quite a bit so no updates there. + Branch: perl + ! ext/List/Util/ChangeLog ext/List/Util/Util.xs + ! ext/List/Util/lib/List/Util.pm + ! ext/List/Util/lib/Scalar/Util.pm +____________________________________________________________________________ +[ 14244] By: jhi on 2002/01/13 18:27:00 + Log: Make the crypt() pickier: if downgrading doesn't work, + croak. + Branch: perl + ! pod/perldiag.pod pod/perlfunc.pod pp.c t/op/crypt.t +____________________________________________________________________________ +[ 14243] By: jhi on 2002/01/13 18:12:23 + Log: Grow the buffer in case Unicode caused size changes + (and it very probably did). + Branch: perl + ! sv.c +____________________________________________________________________________ +[ 14242] By: jhi on 2002/01/13 17:46:45 + Log: Future-proofing from Jeffrey Friedl (for conflicting + In* and Is* names). + Branch: perl + ! lib/utf8_heavy.pl +____________________________________________________________________________ +[ 14241] By: jhi on 2002/01/13 17:40:29 + Log: Subject: [PATCH] Re: Warning in B.xs + From: "Mattia Barbon" <mbarbon@dsi.unive.it> + Date: Sun, 13 Jan 2002 19:39:52 +0100 + Message-ID: <3C41E288.20562.1C8C895@localhost> + Branch: perl + ! ext/B/B.xs +____________________________________________________________________________ +[ 14240] By: jhi on 2002/01/13 17:35:48 + Log: Subject: Re: lookbehind broken with latest bleedperl + From: Hugo van der Sanden <hv@crypt.compulink.co.uk> + Date: Sun, 13 Jan 2002 18:06:22 +0000 + Message-Id: <200201131806.g0DI6Mp20089@crypt.compulink.co.uk> + Branch: perl + ! regexec.c t/op/re_tests +____________________________________________________________________________ +[ 14239] By: jhi on 2002/01/13 17:24:51 + Log: Subject: [PATCH] more MM_Unix.t tests + From: Tels <perl_dummy@bloodgate.com> + Date: Sun, 13 Jan 2002 17:16:19 +0100 (CET) + Message-Id: <200201131621.SAA17925@tiku.hut.fi> + + Subject: [PATCH] 14220 misunderstood, I bet + From: andreas.koenig@anima.de (Andreas J. Koenig) + Date: 12 Jan 2002 21:31:58 +0100 + Message-ID: <m3k7un8g5d.fsf@anima.de> + Branch: perl + ! lib/ExtUtils/t/MM_Unix.t +____________________________________________________________________________ +[ 14238] By: jhi on 2002/01/13 17:14:25 + Log: Subject: [PATCH] Re: MM_UNIX::parse_version() and my $VERSION + From: Tels <perl_dummy@bloodgate.com> + Date: Sun, 13 Jan 2002 16:58:27 +0100 (CET) + Message-Id: <200201131602.SAA15687@tiku.hut.fi> + + Subject: RE: [PATCH] Re: MM_UNIX::parse_version() and my $VERSION + From: Tels <perl_dummy@bloodgate.com> + Date: Sun, 13 Jan 2002 17:05:23 +0100 (CET) + Message-Id: <200201131610.SAA18974@tiku.hut.fi> + Branch: perl + ! lib/ExtUtils/MM_Unix.pm +____________________________________________________________________________ +[ 14237] By: jhi on 2002/01/13 17:12:04 + Log: Subject: [PATCH] Re: [FAIL] Benchmark.t intermittent failure + From: Nicholas Clark <nick@unfortu.net> + Date: Sun, 13 Jan 2002 15:58:34 +0000 + Message-ID: <20020113155833.C314@Bagpuss.unfortu.net> + Branch: perl + ! lib/Benchmark.t +____________________________________________________________________________ +[ 14236] By: jhi on 2002/01/13 17:08:18 + Log: Anton Berezin did more reading and the uid setting story + gets more complex. + Branch: perl + ! pod/perltodo.pod utils/perldoc.PL +____________________________________________________________________________ +[ 14235] By: jhi on 2002/01/13 16:45:52 + Log: Integrate perlio; + + Win32 fixes: + - vmem.h hack to handle free-by-wrong-thread after eval "". + - Initialize timerid + Branch: perl + !> win32/perlhost.h win32/perllib.c win32/vmem.h win32/win32.c +____________________________________________________________________________ +[ 14234] By: jhi on 2002/01/13 16:44:07 + Log: Subject: Re: HiRes + From: "H.Merijn Brand" <h.m.brand@hccnet.nl> + Date: Sun, 13 Jan 2002 10:18:46 +0100 + Message-Id: <20020113100616.B6B5.H.M.BRAND@hccnet.nl> + + Give up on socketpair in HP-UX. + Branch: perl + ! ext/Socket/socketpair.t +____________________________________________________________________________ +[ 14231] By: jhi on 2002/01/13 05:15:01 + Log: *groan* + Branch: perl + ! t/op/pat.t +____________________________________________________________________________ +[ 14230] By: jhi on 2002/01/13 05:13:03 + Log: One more sharp s case found by Jeffrey. + Branch: perl + ! regexec.c t/op/pat.t +____________________________________________________________________________ +[ 14229] By: jhi on 2002/01/13 04:43:33 + Log: Comment tweak. + Branch: perl + ! regexec.c +____________________________________________________________________________ +[ 14228] By: jhi on 2002/01/13 04:30:45 + Log: Don't bother checking for the Greek special + cases if the node is too short. + Branch: perl + ! regcomp.c +____________________________________________________________________________ +[ 14227] By: jhi on 2002/01/12 21:00:04 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ [ 14225] By: jhi on 2002/01/12 20:28:05 Log: Subject: [PATCH 2] Re: [PATCH Configure ext/NDBM_File/hints/linux.pl] Support for linux systems with gdbm From: Jonathan Stowe <gellyfish@gellyfish.com> @@ -1927,11 +1927,12 @@ external program. =item Timing problems -Several tests in the test suite check timing functions, such as sleep(), -and see if they return in a reasonable amount of time. If your system is -quite busy and doesn't return quickly enough, these tests might fail. -If possible, try running the tests again with the system under a -lighter load. +Several tests in the test suite check timing functions, such as +sleep(), and see if they return in a reasonable amount of time. +If your system is quite busy and doesn't return quickly enough, +these tests might fail. If possible, try running the tests again with +the system under a lighter load. These tests include F<t/op/alarm.t>, +F<ext/Time/HiRes/HiRes.t>, and F<lib/Benchmark.t>. =item Out of memory diff --git a/Makefile.SH b/Makefile.SH index ced2b732c0..103f503374 100644 --- a/Makefile.SH +++ b/Makefile.SH @@ -1018,7 +1018,7 @@ test-notty: test_notty # Targets for Third Degree testing. -test_prep.third: test_prep perl.third perl.third$(EXE_EXT) +test_prep.third: test_prep perl.third PERL=./perl.third $(MAKE) _test_prep test.third check.third: test_prep.third perl.third @@ -938,7 +938,6 @@ #define apply_attrs_my S_apply_attrs_my # if defined(PL_OP_SLAB_ALLOC) #define Slab_Alloc S_Slab_Alloc -#define Slab_Free S_Slab_Free # endif #endif #if defined(PERL_IN_PERL_C) || defined(PERL_DECL_PROT) @@ -2482,7 +2481,6 @@ #define apply_attrs_my(a,b,c,d) S_apply_attrs_my(aTHX_ a,b,c,d) # if defined(PL_OP_SLAB_ALLOC) #define Slab_Alloc(a,b) S_Slab_Alloc(aTHX_ a,b) -#define Slab_Free(a) S_Slab_Free(aTHX_ a) # endif #endif #if defined(PERL_IN_PERL_C) || defined(PERL_DECL_PROT) diff --git a/embedvar.h b/embedvar.h index c6eb5fa7ae..16c8e46233 100644 --- a/embedvar.h +++ b/embedvar.h @@ -183,9 +183,6 @@ #define PL_Mem (PERL_GET_INTERP->IMem) #define PL_MemParse (PERL_GET_INTERP->IMemParse) #define PL_MemShared (PERL_GET_INTERP->IMemShared) -#define PL_OpPtr (PERL_GET_INTERP->IOpPtr) -#define PL_OpSlab (PERL_GET_INTERP->IOpSlab) -#define PL_OpSpace (PERL_GET_INTERP->IOpSpace) #define PL_Proc (PERL_GET_INTERP->IProc) #define PL_Sock (PERL_GET_INTERP->ISock) #define PL_StdIO (PERL_GET_INTERP->IStdIO) @@ -481,9 +478,6 @@ #define PL_Mem (vTHX->IMem) #define PL_MemParse (vTHX->IMemParse) #define PL_MemShared (vTHX->IMemShared) -#define PL_OpPtr (vTHX->IOpPtr) -#define PL_OpSlab (vTHX->IOpSlab) -#define PL_OpSpace (vTHX->IOpSpace) #define PL_Proc (vTHX->IProc) #define PL_Sock (vTHX->ISock) #define PL_StdIO (vTHX->IStdIO) @@ -782,9 +776,6 @@ #define PL_IMem PL_Mem #define PL_IMemParse PL_MemParse #define PL_IMemShared PL_MemShared -#define PL_IOpPtr PL_OpPtr -#define PL_IOpSlab PL_OpSlab -#define PL_IOpSpace PL_OpSpace #define PL_IProc PL_Proc #define PL_ISock PL_Sock #define PL_IStdIO PL_StdIO diff --git a/ext/B/B.xs b/ext/B/B.xs index c9ca8b1962..9b7fa9d683 100644 --- a/ext/B/B.xs +++ b/ext/B/B.xs @@ -1013,7 +1013,7 @@ SvSTASH(sv) #define MgFLAGS(mg) mg->mg_flags #define MgOBJ(mg) mg->mg_obj #define MgLENGTH(mg) mg->mg_len -#define MgREGEX(mg) ((IV)(mg->mg_obj)) +#define MgREGEX(mg) PTR2IV(mg->mg_obj) MODULE = B PACKAGE = B::MAGIC PREFIX = Mg diff --git a/ext/List/Util/ChangeLog b/ext/List/Util/ChangeLog index 3d2295b4ae..5ab668b155 100644 --- a/ext/List/Util/ChangeLog +++ b/ext/List/Util/ChangeLog @@ -1,3 +1,23 @@ +Change 645 on 2001/09/07 by <gbarr@pobox.com> (Graham Barr) + + Some platforms require the main executable to export symbols + needed by modules. In 5.7.2 and prior releases of perl + Perl_cxinc was not exported so we need to duplicate its + functionality + +Change 644 on 2001/09/07 by <gbarr@pobox.com> (Graham Barr) + + Generate a typemap for NV for all perl version up to and + including 5.006 + +Change 643 on 2001/09/07 by <gbarr@pobox.com> (Graham Barr) + + Document problems known with specific versions of perl + +Change 642 on 2001/09/05 by <gbarr@pobox.com> (Graham Barr) + + Release 1.05 + Change 641 on 2001/09/05 by <gbarr@pobox.com> (Graham Barr) Fix shuffle() to compile with threaded perl diff --git a/ext/List/Util/Util.xs b/ext/List/Util/Util.xs index 20b6319d40..92ee08499e 100644 --- a/ext/List/Util/Util.xs +++ b/ext/List/Util/Util.xs @@ -16,6 +16,27 @@ #ifndef aTHX # define aTHX +# define pTHX +#endif + +/* Some platforms have strict exports. And before 5.7.3 cxinc (or Perl_cxinc) + was not exported. Therefore platforms like win32, VMS etc have problems + so we redefine it here -- GMB +*/ +#if PERL_VERSION < 7 +/* Not in 5.6.1. */ +# define SvUOK(sv) SvIOK_UV(sv) +# ifdef cxinc +# undef cxinc +# endif +# define cxinc() my_cxinc(aTHX) +static I32 +my_cxinc(pTHX) +{ + cxstack_max = cxstack_max * 3 / 2; + Renew(cxstack, cxstack_max + 1, struct context); /* XXX should fix CXINC macro */ + return cxstack_ix + 1; +} #endif #if PERL_VERSION < 6 diff --git a/ext/List/Util/lib/List/Util.pm b/ext/List/Util/lib/List/Util.pm index b61e13c28e..91dbcdb7b6 100644 --- a/ext/List/Util/lib/List/Util.pm +++ b/ext/List/Util/lib/List/Util.pm @@ -11,7 +11,7 @@ require DynaLoader; our @ISA = qw(Exporter DynaLoader); our @EXPORT_OK = qw(first min max minstr maxstr reduce sum shuffle); -our $VERSION = "1.05_00"; +our $VERSION = "1.06_00"; bootstrap List::Util $VERSION; @@ -148,6 +148,12 @@ This function could be implemented using C<reduce> like this =back +=head1 KNOWN BUGS + +With perl versions prior to 5.005 there are some cases where reduce +will return an incorrect result. This will show up as test 7 of +reduce.t failing. + =head1 SUGGESTED ADDITIONS The following are additions that have been requested, but I have been reluctant diff --git a/ext/List/Util/lib/Scalar/Util.pm b/ext/List/Util/lib/Scalar/Util.pm index 432361f28c..1329d1a48a 100644 --- a/ext/List/Util/lib/Scalar/Util.pm +++ b/ext/List/Util/lib/Scalar/Util.pm @@ -115,6 +115,11 @@ prevent the object being DESTROY-ed at its usual time. =back +=head1 KNOWN BUGS + +There is a bug in perl5.6.0 with UV's that are >= 1<<31. This will +show up as tests 8 and 9 of dualvar.t failing + =head1 COPYRIGHT Copyright (c) 1997-2001 Graham Barr <gbarr@pobox.com>. All rights reserved. diff --git a/ext/Socket/socketpair.t b/ext/Socket/socketpair.t index e90b31a514..639606a3e9 100644 --- a/ext/Socket/socketpair.t +++ b/ext/Socket/socketpair.t @@ -9,7 +9,7 @@ BEGIN { require Config; import Config; $can_fork = $Config{d_fork} || ($^O eq 'MSWin32' && $Config{useithreads}); - if ($Config{'extensions'} !~ /\bSocket\b/ && + if ($^O eq "hpux" or $Config{'extensions'} !~ /\bSocket\b/ && !(($^O eq 'VMS') && $Config{d_socket})) { print "1..0\n"; exit 0; diff --git a/lib/Benchmark.t b/lib/Benchmark.t index 25a3478c1b..dddce3dcb3 100644 --- a/lib/Benchmark.t +++ b/lib/Benchmark.t @@ -12,7 +12,7 @@ use Test::More tests => 159; use Benchmark qw(:all); -my $delta = 0.2; +my $delta = 0.3; # Some timing ballast sub fib { @@ -59,7 +59,7 @@ my $in_threesecs = $threesecs->iters; print "# $in_threesecs iterations\n"; ok ($in_threesecs > 0, "iters returned positive iterations"); -my $estimate = int ($in_threesecs / 3); +my $estimate = int (100 * $in_threesecs / 3) / 100; print "# from the 3 second run estimate $estimate iterations in 1 second...\n"; $baz = 0; my $onesec = countit(1, $coderef); diff --git a/lib/Cwd.pm b/lib/Cwd.pm index 63a14fdcbc..0db9410cc6 100644 --- a/lib/Cwd.pm +++ b/lib/Cwd.pm @@ -200,7 +200,9 @@ sub _backtick_pwd { unless(defined &cwd) { # The pwd command is not available in some chroot(2)'ed environments - if($^O eq 'MacOS' || grep { -x "$_/pwd" } split(':', $ENV{PATH})) { + if( $^O eq 'MacOS' || (defined $ENV{PATH} && + grep { -x "$_/pwd" } split(':', $ENV{PATH})) ) + { *cwd = \&_backtick_pwd; } else { diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index ff9cc8fc05..b239c16fc1 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -108,7 +108,7 @@ as defined by the Unicode standard: title titlecase equivalent mapping block block the character belongs to (used in \p{In...}) - script script the character belongs to + script script the character belongs to If no match is found, a reference to an empty hash is returned. @@ -280,13 +280,12 @@ positions within all blocks are defined. See also L</Blocks versus Scripts>. -If supplied with an argument that can't be a code point, charblock() -tries to do the opposite and interpret the argument as a character -block. The return value is a I<range>: an anonymous list that -contains anonymous lists, which in turn contain I<start-of-range>, -I<end-of-range> code point pairs. You can test whether a code point -is in a range using the L</charinrange> function. If the argument is -not a known charater block, C<undef> is returned. +If supplied with an argument that can't be a code point, charblock() tries +to do the opposite and interpret the argument as a character block. The +return value is a I<range>: an anonymous list of lists that contain +I<start-of-range>, I<end-of-range> code point pairs. You can test whether a +code point is in a range using the L</charinrange> function. If the +argument is not a known charater block, C<undef> is returned. =cut @@ -342,13 +341,12 @@ character belongs to, e.g. C<Latin>, C<Greek>, C<Han>. See also L</Blocks versus Scripts>. -If supplied with an argument that can't be a code point, charscript() -tries to do the opposite and interpret the argument as a character -script. The return value is a I<range>: an anonymous list that -contains anonymous lists, which in turn contain I<start-of-range>, -I<end-of-range> code point pairs. You can test whether a code point -is in a range using the L</charinrange> function. If the argument is -not a known charater script, C<undef> is returned. +If supplied with an argument that can't be a code point, charscript() tries +to do the opposite and interpret the argument as a character script. The +return value is a I<range>: an anonymous list of lists that contain +I<start-of-range>, I<end-of-range> code point pairs. You can test whether a +code point is in a range using the L</charinrange> function. If the +argument is not a known charater script, C<undef> is returned. =cut @@ -433,13 +431,13 @@ sub charscripts { The difference between a block and a script is that scripts are closer to the linguistic notion of a set of characters required to present languages, while block is more of an artifact of the Unicode character -numbering and separation into blocks of 256 characters. +numbering and separation into blocks of (mostly) 256 characters. For example the Latin B<script> is spread over several B<blocks>, such as C<Basic Latin>, C<Latin 1 Supplement>, C<Latin Extended-A>, and C<Latin Extended-B>. On the other hand, the Latin script does not contain all the characters of the C<Basic Latin> block (also known as -the ASCII): it includes only the letters, not for example the digits +the ASCII): it includes only the letters, and not, for example, the digits or the punctuation. For blocks see http://www.unicode.org/Public/UNIDATA/Blocks.txt @@ -448,23 +446,15 @@ For scripts see UTR #24: http://www.unicode.org/unicode/reports/tr24/ =head2 Matching Scripts and Blocks -Both scripts and blocks can be matched using the regular expression -construct C<\p{In...}> and its negation C<\P{In...}>. - -The name of the script or the block comes after the C<In>, for example -C<\p{InCyrillic}>, C<\P{InBasicLatin}>. Spaces and dashes ('-') are -removed from the names for the C<\p{In...}>, for example -C<LatinExtendedA> instead of C<Latin Extended-A>. - -There are a few cases where there is both a script and a block by the -same name, in these cases the block version has C<Block> appended to -its name: C<\p{InKatakana}> is the script, C<\p{InKatakanaBlock}> is -the block. +Scripts are matched with the regular-expression construct +C<\p{...}> (e.g. C<\p{Tibetan}> matches characters of the Tibetan script), +while C<\p{In...}> is used for blocks (e.g. C<\p{InTibetan}> matches +any of the 256 code points in the Tibetan block). =head2 Code Point Arguments -A <code point argument> is either a decimal or a hexadecimal scalar -designating a Unicode character, or "U+" followed by hexadecimals +A I<code point argument> is either a decimal or a hexadecimal scalar +designating a Unicode character, or C<U+> followed by hexadecimals designating a Unicode character. Note that Unicode is B<not> limited to 16 bits (the number of Unicode characters is open-ended, in theory unlimited): you may have more than 4 hexdigits. diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 4fbb112c23..e82c06b520 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -1,4 +1,6 @@ package utf8; +use strict; +use warnings; sub DEBUG () { 0 } @@ -12,100 +14,183 @@ sub SWASHNEW { print STDERR "SWASHNEW @_\n" if DEBUG; - if ($type and ref ${"${class}::{$type}"} eq $class) { - warn qq/Found \${"${class}::{$type}"}\n/ if DEBUG; - return ${"${class}::{$type}"}; # Already there... + ## check to see if we've already got it. + { + no strict 'refs'; + if ($type and ref ${"${class}::{$type}"} eq $class) { + warn qq/Found \${"${class}::{$type}"}\n/ if DEBUG; + return ${"${class}::{$type}"}; + } } - if ($type) { - $type =~ s/^\s+//; - $type =~ s/\s+$//; - - print "type = $type\n" if DEBUG; - - my $file; - - unless (defined $file) { - defined %utf8::Is || do "unicore/Is.pl"; - if ($type =~ /^(?:Is|Category\s*=\s*)?[- _]?([A-Z].*)$/i) { - my $istype = $1; - print "istype = $istype\n" if DEBUG; - unless ($list = do "unicore/Is/$istype.pl") { - if (exists $utf8::Is{$istype}) { - $file = "unicore/Is/$utf8::Is{$istype}"; - } else { - my $isprefix = substr(lc($istype), 0, 2); - print "isprefix = $isprefix\n" if DEBUG; - if (exists $utf8::IsPat{$isprefix}) { - my $Is = $istype; - print "isprefix = $isprefix, Is = $Is\n" if DEBUG; - for my $k (keys %{$utf8::IsPat{$isprefix}}) { - print "isprefix = $isprefix, Is = $Is, k = $k\n" if DEBUG; - if ($Is =~ /^$k$/i) { - $file = "unicore/Is/$utf8::IsPat{$isprefix}->{$k}"; - print "isprefix = $isprefix, Is = $Is, k = $k, file = $file\n" if DEBUG; - last; - } - } - } - } - } - } + ## + ## Get the list of codepoints for the type. + ## Called from utf8.c + ## + ## Given a $type, our goal is to fill $list with the set of codepoint + ## ranges. As we try various interpretations of $type, sometimes we'll + ## end up with the $list directly, and sometimes we'll end up with a + ## $file name that holds the list data. + ## + ## To make the parsing of $type clear, this code takes the a rather + ## unorthadox approach of last'ing out of the block once we have the + ## info we need. Were this to be a subroutine, the 'last' would just + ## be a 'return'. + ## + if ($type) + { + $type =~ s/^\s+//; + $type =~ s/\s+$//; - unless (defined $file) { - defined %utf8::In || do "unicore/In.pl"; - $type = 'Lampersand' if $type =~ /^(?:Is)?L&$/; - $type = 'Assigned' if $type =~ /^(?:Is)?Assigned$/i; - $type = 'Unassigned' if $type =~ /^(?:Is)?Unassigned$/i; - if ($type =~ /^(In|(?:Script|Block)\s*=\s*)?[- _]?(?!herited$)(.+)/i) { - my $incat = $1 || ''; - my $intype = $2; - print "incat = $incat, intype = $intype\n" if DEBUG; - if (exists $utf8::In{$intype}) { - $file = "unicore/In/$utf8::In{$intype}"; - } else { - my $inprefix = substr(lc($intype), 0, 2); - print "inprefix = $inprefix\n" if DEBUG; - if (exists $utf8::InPat{$inprefix}) { - my $In = $intype; - print "inprefix = $inprefix, In = $In\n" if DEBUG; - for my $k (keys %{$utf8::InPat{$inprefix}}) { - print "inprefix = $inprefix, In = $In, k = $k\n" if DEBUG; - if ($In =~ /^$k$/i) { - my $i = $utf8::InPat{$inprefix}->{$k}; - print "inprefix = $inprefix, In = $In, k = $k, i = $i\n" if DEBUG; - next if $incat =~ /^S/ && - !exists $utf8::InScript{$i}; - next if $incat =~ /^B/ && - !exists $utf8::InBlock{$i}; - $file = "unicore/In/$i"; - print "inprefix = $inprefix, In = $In, k = $k, file = $file\n" if DEBUG; - last; - } - } - } - } - } - } + print "type = $type\n" if DEBUG; - unless (defined $file) { - if ($type =~ /^To([A-Z][A-Za-z]+)$/) { - $file = "unicore/To/$1"; - } - } - } + my $file; + ## Figure out what file to load to get the data.... + GETFILE: + { + ## + ## First, see if it's an "Is" name (the 'Is' is optional) + ## + ## Because we check "Is" names first, they have precidence over + ## "In" names. For example, "Greek" is both a script and a + ## block. "IsGreek" always gets the script, while "InGreek" + ## always gets the block. "Greek" gets the script because we + ## check "Is" names first. + ## + if ($type =~ m{^ + ## "Is" prefix, or "Script=" or "Category=" + (?: Is [- _]? | (?:Script|Category)\s*=\s* )? + ## name to check in the "Is" symbol table. + ([A-Z].*) + $ + }ix) + { + my $istype = $1; + ## + ## Input ($type) Name To Check ($istype) + ## ------------- ----------------------- + ## IsLu Lu + ## Lu Lu + ## Category = Lu Lu + ## Foo Foo + ## Script = Greek Greek + ## - if (defined $file) { - $list = do "$file.pl"; - } + print "istype = $istype\n" if DEBUG; - croak("Can't find Unicode character property \"$type\"") - unless $list; + ## Load "Is" mapping data, if not yet loaded. + do "unicore/Is.pl" if not defined %utf8::Is; + + ## + ## If the "Is" mapping data has an exact match, it points + ## to the file we need. + ## + if (exists $utf8::Is{$istype}) + { + $file = "unicore/Is/$utf8::Is{$istype}.pl"; + last GETFILE; + } + + ## + ## Need to look at %utf8::IsPat (loaded from "unicore/Is.pl") + ## to see if there's a regex that matches this $istype. + ## If so, the associated name is the file we need. + ## + my $prefix = substr(lc($istype), 0, 2); + if (exists $utf8::IsPat{$prefix}) + { + while (my ($pat, $name) = each %{$utf8::IsPat{$prefix}}) + { + print "isprefix = $prefix, Is = $istype, pat = $pat\n" if DEBUG; + ## + ## The following regex probably need not be cached, + ## since every time there's a match, the results of + ## the entire call to SWASHNEW() is cached, so there's + ## a very limited number of times any one $pat will + ## be evaluated as a regex, at least with "reasonable" + ## code that doesn't try a baziilion \p{Random} names. + ## + if ($istype =~ /^$pat$/i) + { + $file = "unicore/Is/$name.pl"; + last GETFILE; + } + } + } + } + + ## + ## Couldn't find via "Is" -- let's try via "In"..... + ## + if ($type =~ m{^ + ( In(?!herited$)[- _]? | Block\s*=\s*)? + ([A-Z].*) + $ + }xi) + { + my $intype = $2; + print "intype = $intype\n" if DEBUG; + + ## + ## Input ($type) Name To Check ($intype) + ## ------------- ----------------------- + ## Inherited Inherited + ## InGreek Greek + ## Block = Greek Greek + ## + + ## Load "In" mapping data, if not yet loaded. + do "unicore/In.pl" if not defined %utf8::In; + + ## If there's a direct match, it points to the file we need + if (exists $utf8::In{$intype}) { + $file = "unicore/In/$utf8::In{$intype}.pl"; + last GETFILE; + } + + my $prefix = substr(lc($intype), 0, 2); + if (exists $utf8::InPat{$prefix}) + { + print "inprefix = $prefix, In = $intype\n" if DEBUG; + while (my ($pat, $name) = each %{$utf8::InPat{$prefix}}) + { + print "inprefix = $prefix, In = $intype, k = $pat\n" if DEBUG; + if ($intype =~ /^$pat$/i) { + $file = "unicore/In/$name.pl"; + print "inprefix = $prefix, In = $intype, k = $pat, file = $file\n" if DEBUG; + last GETFILE; + } + } + } + } + + ## + ## Last attempt -- see if it's a "To" name (e.g. "ToLower") + ## + if ($type =~ /^To([A-Z][A-Za-z]+)$/) + { + $file = "unicore/To/$1.pl"; + ## would like to test to see if $file actually exists.... + last GETFILE; + } + + ## + ## If we reach this line, it's because we couldn't figure + ## out what to do with $type. Ouch. + ## + croak("Can't find Unicode character property \"$type\""); + } + + ## + ## If we reach here, it was due to a 'last GETFILE' above, so we + ## have a filename, so now we load it. + ## + $list = do $file; } my $extras; my $bits; - + if ($list) { my @tmp = split(/^/m, $list); my %seen; @@ -155,6 +240,7 @@ sub SWASHNEW { print STDERR "CLASS = $class, TYPE => $type, BITS => $bits, NONE => $none\nEXTRAS =>\n$extras\nLIST =>\n$list\n" if DEBUG; + no strict 'refs'; ${"${class}::{$type}"} = bless { TYPE => $type, BITS => $bits, diff --git a/patchlevel.h b/patchlevel.h index d924c17ad2..77d5e92f8f 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -70,7 +70,7 @@ #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT) static char *local_patches[] = { NULL - ,"DEVEL14225" + ,"DEVEL14255" ,NULL }; @@ -9,8 +9,6 @@ #ifndef H_PERL #define H_PERL 1 -#define PL_OP_SLAB_ALLOC - #ifdef PERL_FOR_X2P /* * This file is being used for x2p stuff. @@ -103,12 +103,6 @@ END_EXTERN_C #define PL_MemParse (*Perl_IMemParse_ptr(aTHX)) #undef PL_MemShared #define PL_MemShared (*Perl_IMemShared_ptr(aTHX)) -#undef PL_OpPtr -#define PL_OpPtr (*Perl_IOpPtr_ptr(aTHX)) -#undef PL_OpSlab -#define PL_OpSlab (*Perl_IOpSlab_ptr(aTHX)) -#undef PL_OpSpace -#define PL_OpSpace (*Perl_IOpSpace_ptr(aTHX)) #undef PL_Proc #define PL_Proc (*Perl_IProc_ptr(aTHX)) #undef PL_Sock @@ -3178,26 +3178,22 @@ PP(pp_crypt) STRLEN n_a; STRLEN len; char *tmps = SvPV(left, len); - char *t = 0; if (DO_UTF8(left)) { - /* If Unicode take the crypt() of the low 8 bits of - * the characters of the string. Yes, we made this up. */ - char *s = tmps; - char *send = tmps + len; - STRLEN i = 0; - Newz(688, t, len + 1, char); - while (s < send) { - t[i++] = utf8_to_uvchr((U8*)s, 0) & 0xFF; - s += UTF8SKIP(s); - } - tmps = t; + /* If Unicode, try to dowgrade. + * If not possible, croak. + * Yes, we made this up. */ + SV* tsv = sv_2mortal(newSVsv(left)); + + SvUTF8_on(tsv); + if (!sv_utf8_downgrade(tsv, FALSE)) + Perl_croak(aTHX_ "Wide character in crypt"); + tmps = SvPVX(tsv); } # ifdef FCRYPT sv_setpv(TARG, fcrypt(tmps, SvPV(right, n_a))); # else sv_setpv(TARG, PerlProc_crypt(tmps, SvPV(right, n_a))); # endif - Safefree(t); #else DIE(aTHX_ "The crypt() function is unimplemented due to excessive paranoia."); @@ -1045,7 +1045,6 @@ STATIC void S_apply_attrs(pTHX_ HV *stash, SV *target, OP *attrs, bool for_my); STATIC void S_apply_attrs_my(pTHX_ HV *stash, OP *target, OP *attrs, OP **imopsp); # if defined(PL_OP_SLAB_ALLOC) STATIC void* S_Slab_Alloc(pTHX_ int m, size_t sz); -STATIC void S_Slab_Free(pTHX_ void *); # endif #endif @@ -140,13 +140,18 @@ PL_regkind[(U8)OP(rn)] == EXACT || PL_regkind[(U8)OP(rn)] == REF \ ) +/* + Search for mandatory following text node; for lookahead, the text must + follow but for lookbehind (rn->flags != 0) we skip to the next step. +*/ #define FIND_NEXT_IMPT(rn) STMT_START { \ while (JUMPABLE(rn)) \ - if (OP(rn) == SUSPEND || OP(rn) == IFMATCH || \ - PL_regkind[(U8)OP(rn)] == CURLY) \ + if (OP(rn) == SUSPEND || PL_regkind[(U8)OP(rn)] == CURLY) \ rn = NEXTOPER(NEXTOPER(rn)); \ else if (OP(rn) == PLUS) \ rn = NEXTOPER(rn); \ + else if (OP(rn) == IFMATCH) \ + rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \ else rn += NEXT_OFF(rn); \ } STMT_END @@ -8337,6 +8337,22 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV continue; /* not "break" */ } + if (is_utf8 != has_utf8) { + if (is_utf8) { + if (SvCUR(sv)) + sv_utf8_upgrade(sv); + } + else { + SV *nsv = sv_2mortal(newSVpvn(eptr, elen)); + sv_utf8_upgrade(nsv); + eptr = SvPVX(nsv); + elen = SvCUR(nsv); + } + SvGROW(sv, SvCUR(sv) + elen + 1); + p = SvEND(sv); + *p = '\0'; + } + have = esignlen + zeros + elen; need = (have > width ? have : width); gap = need - have; @@ -8360,20 +8376,6 @@ Perl_sv_vcatpvfn(pTHX_ SV *sv, const char *pat, STRLEN patlen, va_list *args, SV *p++ = '0'; } if (elen) { - if (is_utf8 != has_utf8) { - if (is_utf8) { - if (SvCUR(sv)) { - sv_utf8_upgrade(sv); - p = SvEND(sv); - } - } - else { - SV *nsv = sv_2mortal(newSVpvn(eptr, elen)); - sv_utf8_upgrade(nsv); - eptr = SvPVX(nsv); - elen = SvCUR(nsv); - } - } Copy(eptr, p, elen, char); p += elen; } @@ -6657,6 +6657,9 @@ S_scan_inputsymbol(pTHX_ char *start) return s; } else { + bool readline_overriden = FALSE; + GV *gv_readline = Nullgv; + GV **gvp; /* we're in a filehandle read situation */ d = PL_tokenbuf; @@ -6664,6 +6667,15 @@ S_scan_inputsymbol(pTHX_ char *start) if (!len) (void)strcpy(d,"ARGV"); + /* Check whether readline() is overriden */ + if ((gv_readline = gv_fetchpv("readline", FALSE, SVt_PVCV)) + && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline) + || + (gvp = (GV**)hv_fetch(PL_globalstash, "readline", 8, FALSE)) + && (gv_readline = *gvp) != (GV*)&PL_sv_undef + && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)) + readline_overriden = TRUE; + /* if <$fh>, create the ops to turn the variable into a filehandle */ @@ -6685,7 +6697,11 @@ S_scan_inputsymbol(pTHX_ char *start) else { OP *o = newOP(OP_PADSV, 0); o->op_targ = tmp; - PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, o); + PL_lex_op = readline_overriden + ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, + append_elem(OP_LIST, o, + newCVREF(0, newGVOP(OP_GV,0,gv_readline)))) + : (OP*)newUNOP(OP_READLINE, 0, o); } } else { @@ -6697,9 +6713,14 @@ intro_sym: ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADDMULTI), SVt_PV); - PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, - newUNOP(OP_RV2SV, 0, - newGVOP(OP_GV, 0, gv))); + PL_lex_op = readline_overriden + ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, + append_elem(OP_LIST, + newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)), + newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) + : (OP*)newUNOP(OP_READLINE, 0, + newUNOP(OP_RV2SV, 0, + newGVOP(OP_GV, 0, gv))); } PL_lex_op->op_flags |= OPf_SPECIAL; /* we created the ops in PL_lex_op, so make yylval.ival a null op */ @@ -6710,7 +6731,12 @@ intro_sym: (<Foo::BAR> or <FOO>) so build a simple readline OP */ else { GV *gv = gv_fetchpv(d,TRUE, SVt_PVIO); - PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); + PL_lex_op = readline_overriden + ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED, + append_elem(OP_LIST, + newGVOP(OP_GV, 0, gv), + newCVREF(0, newGVOP(OP_GV, 0, gv_readline)))) + : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv)); yylval.ival = OP_NULL; } } diff --git a/utils/perldoc.PL b/utils/perldoc.PL index 62a82f381f..2f60c6ed2e 100644 --- a/utils/perldoc.PL +++ b/utils/perldoc.PL @@ -169,13 +169,27 @@ if (!($Is_VMS || $Is_MSWin32 || $Is_Dos || $Is_OS2) && ($> == 0 || $< == 0) my $id = eval { getpwnam("nobody") }; $id = eval { getpwnam("nouser") } unless defined $id; $id = -2 unless defined $id; + # + # According to Stevens' APUE and various + # (BSD, Solaris, HP-UX) man pages setting + # the real uid first and effective uid second + # is the way to go if one wants to drop privileges, + # because if one changes into an effective uid of + # non-zero, one cannot change the real uid any more. + # + # Actually, it gets even messier. There is + # a third uid, called the saved uid, and as + # long as that is zero, one can get back to + # uid of zero. Setting the real-effective *twice* + # helps in *most* systems (FreeBSD and Solaris) + # but apparently in HP-UX even this doesn't help: + # the saved uid stays zero (apparently the only way + # in HP-UX to change saved uid is to call setuid() + # when the effective uid is zero). + # eval { - # According to Stevens' APUE and various - # (BSD, Solaris, HP-UX) man pages setting - # the real uid first and effective uid second - # is the way to go if one wants to drop privileges, - # because if one changes into an effective uid of - # non-zero, one cannot change the real uid any more. + $< = $id; # real uid + $> = $id; # effective uid $< = $id; # real uid $> = $id; # effective uid }; |