Integrate mainline.

p4raw-id: //depot/perlio@9074
author: Nick Ing-Simmons <nik@tiuk.ti.com> 2001-03-07 21:39:29 +0000
committer: Nick Ing-Simmons <nik@tiuk.ti.com> 2001-03-07 21:39:29 +0000
commit: f127762ae8ed5af83de18a858019907e3c7f32ea (patch)
tree: 1874457638eaca8c61c53a759ef3ebfddc63031a
parent: 4fd193acd0d951124e02044838daf1b0316a9058 (diff)
parent: 4bad07d97843302fe0c8fcda3be736e92b097422 (diff)
download: perl-f127762ae8ed5af83de18a858019907e3c7f32ea.tar.gz
18 files changed, 914 insertions, 579 deletions
diff --git a/Changes b/Changes
index 5e5781e185..bcbeb354a1 100644
--- a/Changes
+++ b/Changes
@@ -32,6 +32,392 @@ Version v5.7.1		Development release working toward v5.8
 --------------
 
 ____________________________________________________________________________
+[  9072] By: jhi                                   on 2001/03/07  16:32:30
+        Log: Subject: [ID 20010306.004] || != named unary operator
+             From: abela@geneanet.org
+             Date: Tue,  6 Mar 2001 15:57:57 +0100 (CET)
+             Message-Id: <20010306145757.0CB03D183@little-roots.geneanet.org>
+     Branch: perl
+	   ! pod/perlop.pod
+____________________________________________________________________________
+[  9071] By: jhi                                   on 2001/03/07  15:14:11
+        Log: Subject: downgrading to bytes for common functions
+             From: andrew@pimlott.ne.mediaone.net (Andrew Pimlott)
+             Date: Wed, 7 Mar 2001 02:36:55 -0500
+             Message-ID: <20010307023655.J24024@pimlott.ne.mediaone.net>
+     Branch: perl
+	   ! pp_sys.c
+____________________________________________________________________________
+[  9070] By: jhi                                   on 2001/03/07  15:04:58
+        Log: Clarify the description differentiating for and while; inspired by
+             
+             Subject: [ID 20010306.002] for/while difference in for definition
+             From: abela@geneanet.org
+             Date: Tue,  6 Mar 2001 09:40:36 +0100 (CET)
+             Message-Id: <20010306084036.7BFD0D17F@little-roots.geneanet.org>
+     Branch: perl
+	   ! pod/perlsyn.pod
+____________________________________________________________________________
+[  9069] By: jhi                                   on 2001/03/07  14:55:30
+        Log: Subject: 'no *POSIX' Patch speeding up make on BS2000 
+             From: Dorner Thomas <Thomas.Dorner@start.de>
+             Date: Wed, 7 Mar 2001 14:13:31 +0100
+             Message-ID: <6727B1DACFCDD311A757009027CA8D69010A88CB@Ex02.inhouse.start.de>
+     Branch: perl
+	   ! Makefile.SH
+____________________________________________________________________________
+[  9068] By: jhi                                   on 2001/03/07  14:51:17
+        Log: Subject: Re: [ID 20010305.012] chop() against list assignment returns char chopped from el zero
+             From: Radu Greab <radu@netsoft.ro>
+             Date: Tue, 6 Mar 2001 23:04:44 +0200 (EET)
+             Message-ID: <15013.20716.201459.540421@ix.netsoft.ro>
+     Branch: perl
+	   ! pp.c t/op/chop.t
+____________________________________________________________________________
+[  9067] By: jhi                                   on 2001/03/07  14:48:57
+        Log: Subject: Re: [PATCH: 5.6.1 trial2] DynaLoading for OS/390 build option
+             From: Simon Cozens <simon@netthink.co.uk>
+             Date: Wed, 7 Mar 2001 13:59:28 +0000
+             Message-ID: <20010307135928.A21620@pembro26.pmb.ox.ac.uk>
+             
+             In order to allow MakeMaker to build correct Makefiles,
+             you need to change the order of the arguments in MM_Unix.pm
+             as well. (Unless you have C89_CCMODE set, but we're trying
+             to be POSIXly correct)
+             
+             Thanks to Merijn Broeren for tracking this down.
+     Branch: perl
+	   ! lib/ExtUtils/MM_Unix.pm
+____________________________________________________________________________
+[  9066] By: jhi                                   on 2001/03/07  14:30:02
+        Log: Floating point too messy.
+     Branch: perl
+	   ! t/camel-III/vstring.t
+____________________________________________________________________________
+[  9065] By: jhi                                   on 2001/03/07  14:17:44
+        Log: Integrate change #9064 from maintperl into mainline.
+             
+             fix memory leak in pack("Bb",...)
+     Branch: perl
+	  !> perl.c
+____________________________________________________________________________
+[  9064] By: gsar                                  on 2001/03/07  06:29:24
+        Log: fix memory leak in pack("Bb",...)
+     Branch: maint-5.6/perl
+	   ! perl.c
+____________________________________________________________________________
+[  9063] By: jhi                                   on 2001/03/07  00:55:04
+        Log: Major utf8 test reorganisation and rewrite.
+             Hopefully no tests were lost in the shuffle.
+             (The beginning of pragma/utf8 was lost intentionally,
+             the tests were rather bogus and incomplete.)
+     Branch: perl
+	   ! t/lib/charnames.t t/op/pack.t t/op/pat.t t/op/split.t
+	   ! t/pragma/utf8.t
+____________________________________________________________________________
+[  9062] By: jhi                                   on 2001/03/07  00:41:16
+        Log: More tweakage on the Unicode character class descriptions.
+     Branch: perl
+	   ! lib/unicode/mktables.PL pod/perlretut.pod
+____________________________________________________________________________
+[  9061] By: jhi                                   on 2001/03/06  23:12:38
+        Log: The perlretut was still talking about the old \p and \P
+             definitions.
+     Branch: perl
+	   ! pod/perlretut.pod
+____________________________________________________________________________
+[  9060] By: nick                                  on 2001/03/06  20:52:37
+        Log: Integrate mainline (make test works again now).
+     Branch: perlio
+	  !> op.c op.h regcomp.c t/camel-III/vstring.t t/op/pat.t
+____________________________________________________________________________
+[  9059] By: jhi                                   on 2001/03/06  15:36:14
+        Log: 0.999... does equal 1.0, doesn't it?
+     Branch: perl
+	   ! t/camel-III/vstring.t
+____________________________________________________________________________
+[  9058] By: jhi                                   on 2001/03/06  15:34:00
+        Log: Make /x{abcd}/ to work without use utf8.
+     Branch: perl
+	   ! regcomp.c
+____________________________________________________________________________
+[  9057] By: jhi                                   on 2001/03/06  03:02:36
+        Log: Easier to outcomment all the three reset() tests for now.
+     Branch: perl
+	   ! t/op/pat.t
+____________________________________________________________________________
+[  9056] By: jhi                                   on 2001/03/06  02:30:02
+        Log: Comment out the deadly reset; until the 20010301.005
+             is finally fixed.
+     Branch: perl
+	   ! t/op/pat.t
+____________________________________________________________________________
+[  9055] By: jhi                                   on 2001/03/06  02:21:26
+        Log: Integrate the change #9054 from mainline:
+             retract the PMOP cleanup patch pending further investigation.
+     Branch: maint-5.6/perl
+	  !> op.c op.h
+____________________________________________________________________________
+[  9054] By: jhi                                   on 2001/03/06  02:19:57
+        Log: Retract the PMOP cleanup patch: I'm seeing strange core dumps
+             with oddly familiar stack traces.
+     Branch: perl
+	   ! op.c op.h
+____________________________________________________________________________
+[  9053] By: jhi                                   on 2001/03/05  23:20:28
+        Log: Integrate perlio.
+     Branch: perl
+	  +> ext/Encode/Encode/koi8-r.ucm
+	  !> MANIFEST ext/Encode/Encode.pm ext/Encode/Makefile.PL
+	  !> makedef.pl
+____________________________________________________________________________
+[  9052] By: nick                                  on 2001/03/05  23:08:17
+        Log: skipping USE_PERLIO symbols correction.
+     Branch: perlio
+	   ! makedef.pl
+____________________________________________________________________________
+[  9051] By: nick                                  on 2001/03/05  22:35:07
+        Log: More Encode alias tidying.
+     Branch: perlio
+	   + ext/Encode/Encode/koi8-r.ucm
+	   ! MANIFEST ext/Encode/Encode.pm ext/Encode/Makefile.PL
+____________________________________________________________________________
+[  9050] By: jhi                                   on 2001/03/05  21:44:29
+        Log: Integrate changes #9033 and #9044 from mainline into maintperl,
+             Sarathy's fix for ID 20010301.005.
+     Branch: maint-5.6/perl
+	  !> op.c op.h
+____________________________________________________________________________
+[  9049] By: nick                                  on 2001/03/05  19:54:04
+        Log: Integrate Jarkko's Encode.pm tweak.
+     Branch: perlio
+	  !> ext/Encode/Encode.pm
+____________________________________________________________________________
+[  9048] By: nick                                  on 2001/03/05  19:47:57
+        Log: Integrate mainline (mostly - holding of on Encode.pm for a bit.)
+     Branch: perlio
+	  +> os2/os2add.sym
+	  !> lib/ExtUtils/MM_OS2.pm lib/ExtUtils/MM_Unix.pm makedef.pl op.c
+	  !> op.h os2/Changes os2/Makefile.SHs os2/OS2/REXX/Makefile.PL
+	  !> os2/OS2/REXX/REXX.pm os2/OS2/REXX/REXX.xs
+	  !> os2/OS2/REXX/t/rx_cmprt.t os2/os2.c os2/os2.sym os2/os2ish.h
+	  !> pod/perlapi.pod t/op/pat.t t/op/tr.t
+____________________________________________________________________________
+[  9047] By: jhi                                   on 2001/03/05  19:25:26
+        Log: Regen perlapi.
+     Branch: perl
+	   ! pod/perlapi.pod
+____________________________________________________________________________
+[  9046] By: jhi                                   on 2001/03/05  19:12:14
+        Log: Integrate perlio.
+     Branch: perl
+	  +> t/camel-III/vstring.t
+	  !> MANIFEST pp.h pp_sys.c t/TEST t/harness
+____________________________________________________________________________
+[  9045] By: jhi                                   on 2001/03/05  19:10:36
+        Log: The mapping of ISO Latin X to ISO 8859-Y is tricky.
+     Branch: perl
+	   ! ext/Encode/Encode.pm
+____________________________________________________________________________
+[  9044] By: jhi                                   on 2001/03/05  18:49:12
+        Log: Forgot the other half of the PMOP patch (#9033).
+     Branch: perl
+	   ! op.h
+____________________________________________________________________________
+[  9043] By: nick                                  on 2001/03/05  18:14:47
+        Log: Update MANIFEST for new test.
+     Branch: perlio
+	   ! MANIFEST
+____________________________________________________________________________
+[  9042] By: nick                                  on 2001/03/05  18:12:41
+        Log: Some tests for Camel 3rd edition features.
+             Make gethostbyaddr() test in above work.
+     Branch: perlio
+	   + t/camel-III/vstring.t
+	   ! pp.h pp_sys.c t/TEST t/harness
+____________________________________________________________________________
+[  9041] By: jhi                                   on 2001/03/05  17:41:48
+        Log: Add more ISO 8859 encoding aliases.
+     Branch: perl
+	   ! ext/Encode/Encode.pm
+____________________________________________________________________________
+[  9040] By: jhi                                   on 2001/03/05  17:30:43
+        Log: Integrate perlio.
+     Branch: perl
+	  !> MANIFEST sv.c
+____________________________________________________________________________
+[  9039] By: nick                                  on 2001/03/05  17:11:36
+        Log: Missed added file.
+     Branch: perlio
+	   ! MANIFEST
+____________________________________________________________________________
+[  9038] By: jhi                                   on 2001/03/05  15:58:01
+        Log: Add tr tests for EBCDIC, from Karsten Sperling.
+     Branch: perl
+	   ! t/op/tr.t
+____________________________________________________________________________
+[  9037] By: nick                                  on 2001/03/05  15:56:35
+        Log: Long-standing "defined but not used" SvPVbyte*() are about to get used
+             for auto-downgrade hooks in gethostbyaddr() etc. (as per Camel-III).
+             
+             Make SvPVbyte*() symetric with respect to SvPVutf8*() in that:
+             - it does a sv_utf8_downgrade (c.f. SvPVutf8's sv_utf8_upgrade())
+             - uses SvPV() rather than sv_2pv() in case it isn't a string.
+     Branch: perlio
+	   ! sv.c
+____________________________________________________________________________
+[  9036] By: jhi                                   on 2001/03/05  15:49:15
+        Log: Add matching tests currently failing in EBCDIC,
+             suggested by Karsten Sperling.
+     Branch: perl
+	   ! t/op/pat.t
+____________________________________________________________________________
+[  9035] By: jhi                                   on 2001/03/05  15:36:55
+        Log: The #9034 needs a MANIFESTation.
+     Branch: perl
+	   ! MANIFEST
+____________________________________________________________________________
+[  9034] By: jhi                                   on 2001/03/05  15:35:14
+        Log: Integrate perlio.
+     Branch: perl
+	  +> ext/Encode/Encode/Tcl.pm
+	  !> ext/Encode/Encode.pm ext/Encode/Encode.xs
+	  !> ext/Encode/Encode/ascii.ucm ext/Encode/Encode/cp1250.ucm
+	  !> ext/Encode/Encode/iso8859-1.ucm
+	  !> ext/Encode/Encode/iso8859-10.ucm
+	  !> ext/Encode/Encode/iso8859-13.ucm
+	  !> ext/Encode/Encode/iso8859-14.ucm
+	  !> ext/Encode/Encode/iso8859-15.ucm
+	  !> ext/Encode/Encode/iso8859-16.ucm
+	  !> ext/Encode/Encode/iso8859-2.ucm
+	  !> ext/Encode/Encode/iso8859-3.ucm
+	  !> ext/Encode/Encode/iso8859-4.ucm
+	  !> ext/Encode/Encode/iso8859-5.ucm
+	  !> ext/Encode/Encode/iso8859-6.ucm
+	  !> ext/Encode/Encode/iso8859-7.ucm
+	  !> ext/Encode/Encode/iso8859-8.ucm
+	  !> ext/Encode/Encode/iso8859-9.ucm ext/Encode/compile
+	  !> t/lib/encode.t
+____________________________________________________________________________
+[  9033] By: jhi                                   on 2001/03/05  15:33:18
+        Log: Subject: Re: [ID 20010301.005] corrupt memory (since @8531, in 5.6.1-T2)
+             From: Gurusamy Sarathy <gsar@ActiveState.com>
+             Date: Sun, 04 Mar 2001 22:33:15 -0800
+             Message-Id: <200103050633.f256XFo06998@smtp3.ActiveState.com>
+             
+             PMOPs need to remember their own stash so that they can
+             gracefully remove themselves from their linked list.
+     Branch: perl
+	   ! op.c
+____________________________________________________________________________
+[  9032] By: nick                                  on 2001/03/05  14:51:50
+        Log: Encode implementation "completion"
+             Implement and document define_encoding()
+             Implement and document encoding aliases including define_alias()
+             Make Encode::XS use define_encoding() rather than back-door.
+             Move run-time *.enc to separate Encode::Tcl module.
+             Make 'compile' honour <codeset_name>
+             Change canonical names of to iso-8859-* and US-ascii.
+     Branch: perlio
+	   + ext/Encode/Encode/Tcl.pm
+	   ! ext/Encode/Encode.pm ext/Encode/Encode.xs
+	   ! ext/Encode/Encode/ascii.ucm ext/Encode/Encode/cp1250.ucm
+	   ! ext/Encode/Encode/iso8859-1.ucm
+	   ! ext/Encode/Encode/iso8859-10.ucm
+	   ! ext/Encode/Encode/iso8859-13.ucm
+	   ! ext/Encode/Encode/iso8859-14.ucm
+	   ! ext/Encode/Encode/iso8859-15.ucm
+	   ! ext/Encode/Encode/iso8859-16.ucm
+	   ! ext/Encode/Encode/iso8859-2.ucm
+	   ! ext/Encode/Encode/iso8859-3.ucm
+	   ! ext/Encode/Encode/iso8859-4.ucm
+	   ! ext/Encode/Encode/iso8859-5.ucm
+	   ! ext/Encode/Encode/iso8859-6.ucm
+	   ! ext/Encode/Encode/iso8859-7.ucm
+	   ! ext/Encode/Encode/iso8859-8.ucm
+	   ! ext/Encode/Encode/iso8859-9.ucm ext/Encode/compile
+	   ! t/lib/encode.t
+____________________________________________________________________________
+[  9031] By: jhi                                   on 2001/03/05  13:51:17
+        Log: Integrate change #9030 from maintperl into mainline.
+             
+             Subject: [PATCH 5.6.1] OS/2 cleanup
+     Branch: perl
+	  +> os2/os2add.sym
+	  !> MANIFEST lib/ExtUtils/MM_OS2.pm lib/ExtUtils/MM_Unix.pm
+	  !> makedef.pl os2/Changes os2/Makefile.SHs
+	  !> os2/OS2/REXX/Makefile.PL os2/OS2/REXX/REXX.pm
+	  !> os2/OS2/REXX/REXX.xs os2/OS2/REXX/t/rx_cmprt.t os2/os2.c
+	  !> os2/os2.sym os2/os2ish.h
+____________________________________________________________________________
+[  9030] By: jhi                                   on 2001/03/05  13:46:49
+        Log: Subject: [PATCH 5.6.1] OS/2 cleanup
+             From: Ilya Zakharevich <ilya@math.ohio-state.edu>
+             Date: Mon, 5 Mar 2001 02:29:44 -0500
+             Message-ID: <20010305022944.A10117@math.ohio-state.edu>
+     Branch: maint-5.6/perl
+	   + os2/os2add.sym
+	   ! MANIFEST lib/ExtUtils/MM_OS2.pm lib/ExtUtils/MM_Unix.pm
+	   ! makedef.pl os2/Changes os2/Makefile.SHs
+	   ! os2/OS2/REXX/Makefile.PL os2/OS2/REXX/REXX.pm
+	   ! os2/OS2/REXX/REXX.xs os2/OS2/REXX/t/rx_cmprt.t os2/os2.c
+	   ! os2/os2.sym os2/os2ish.h
+____________________________________________________________________________
+[  9029] By: jhi                                   on 2001/03/05  13:40:35
+        Log: Integrate change #9028 from maintperl into mainline.
+             
+             various nits in MM_Unix.pm found by disabling SelfLoader
+     Branch: perl
+	  !> lib/ExtUtils/MM_Unix.pm
+____________________________________________________________________________
+[  9028] By: gsar                                  on 2001/03/05  09:58:38
+        Log: various nits in MM_Unix.pm found by disabling SelfLoader
+     Branch: maint-5.6/perl
+	   ! lib/ExtUtils/MM_Unix.pm
+____________________________________________________________________________
+[  9027] By: nick                                  on 2001/03/05  08:35:22
+        Log: Integrate mainline
+     Branch: perlio
+	  !> (integrate 28 files)
+____________________________________________________________________________
+[  9026] By: jhi                                   on 2001/03/05  02:14:59
+        Log: Integrate change #9025 from mainline to maintperl,
+             retract \N{U+HHHH}.
+     Branch: maint-5.6/perl
+	  !> lib/charnames.pm pod/perldiag.pod pod/perlretut.pod
+	  !> t/lib/charnames.t toke.c
+____________________________________________________________________________
+[  9025] By: jhi                                   on 2001/03/05  02:13:09
+        Log: Retracting \N{U+HHHH}.
+     Branch: perl
+	   ! lib/charnames.pm pod/perldiag.pod pod/perlretut.pod
+	   ! t/lib/charnames.t toke.c
+____________________________________________________________________________
+[  9024] By: jhi                                   on 2001/03/04  20:51:07
+        Log: Subject: Re: Smoking patch 8898 for perl v5.7.0 on aix 4.3.0.0
+             From: "H.Merijn Brand" <h.m.brand@hccnet.nl> 
+             Date: Fri, 02 Mar 2001 18:25:26 +0100
+             Message-Id: <20010302182416.1BF4.H.M.BRAND@hccnet.nl>
+             
+             Casting problem.
+     Branch: perl
+	   ! ext/Storable/Storable.xs
+____________________________________________________________________________
+[  9023] By: jhi                                   on 2001/03/04  19:46:45
+        Log: Subject: [ID 20010303.009] SOCKS5 work around breaks other sockets
+             From: nick@talking.bollo.cx
+             Date: Sat, 03 Mar 2001 21:41:33 +0000
+             Message-Id: <E14ZJmP-0000mE-00@Bagpuss.unfortu.net>
+             
+             Revert the workaround.
+     Branch: perl
+	   ! perlio.c
+____________________________________________________________________________
+[  9022] By: jhi                                   on 2001/03/04  19:36:28
+        Log: Update Changes.
+     Branch: perl
+	   ! Changes patchlevel.h
+____________________________________________________________________________
 [  9021] By: jhi                                   on 2001/03/04  18:41:00
         Log: makedef.pl updates suggested by Chris Nandor; double-checking
              the changes in AIX, OS/2 Win32 would be nice.
diff --git a/Makefile.SH b/Makefile.SH
index 43a1cc37b3..1799253fee 100644
--- a/Makefile.SH
+++ b/Makefile.SH
@@ -132,6 +132,15 @@ case "$osname" in
 	;;
 esac
 
+# Handle the usage of different yaccs in posix-bc (During Configure we
+# us yacc for perly.y and byacc for a2p.y.  The makefiles must use the
+# same configuration for run_byacc!):
+case "$osname" in
+    posix-bc)
+	byacc=$yacc
+	;;
+esac
+
 echo "Extracting Makefile (with variable substitutions)"
 $spitshell >Makefile <<!GROK!THIS!
 # Makefile.SH
diff --git a/lib/ExtUtils/MM_Unix.pm b/lib/ExtUtils/MM_Unix.pm
index e043b3cb3f..0f441d0e82 100644
--- a/lib/ExtUtils/MM_Unix.pm
+++ b/lib/ExtUtils/MM_Unix.pm
@@ -1097,8 +1097,8 @@ $(INST_DYNAMIC): $(OBJECT) $(MYEXTLIB) $(BOOTSTRAP) $(INST_ARCHAUTODIR)/.exists
     push(@m,'	$(RM_F) $@
 ');
 
-    push(@m,'	LD_RUN_PATH="$(LD_RUN_PATH)" $(LD) -o $@ '.$ldrun.' $(LDDLFLAGS) '.$ldfrom.
-		' $(OTHERLDFLAGS) $(MYEXTLIB) $(PERL_ARCHIVE) $(LDLOADLIBS) $(PERL_ARCHIVE_AFTER) $(EXPORT_LIST)');
+    push(@m,'	LD_RUN_PATH="$(LD_RUN_PATH)" $(LD) '.$ldrun.' $(LDDLFLAGS) '.$ldfrom.
+		' $(OTHERLDFLAGS) -o $@ $(MYEXTLIB) $(PERL_ARCHIVE) $(LDLOADLIBS) $(PERL_ARCHIVE_AFTER) $(EXPORT_LIST)');
     push @m, '
 	$(CHMOD) $(PERM_RWX) $@
 ';
diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL
index bb253055d2..dbc2ed1842 100755
--- a/lib/unicode/mktables.PL
+++ b/lib/unicode/mktables.PL
@@ -31,7 +31,8 @@ mkdir "To", 0755;
     ['IsSpacePerl',
                  '$cat  =~ /^Z/ ||
                   $code =~ /^(0009|000A|000C|000D)$/',		''],
-    ['IsBlank',  '$cat  =~ /^Z[^lp]$/ ||  $code eq "0009"',	''],
+    ['IsBlank',  '$code =~ /^(0020|0009)$/ ||
+		  $cat  =~ /^Z[^lp]$/',	''],
     ['IsDigit',  '$cat =~ /^Nd$/',	''],
     ['IsUpper',  '$cat =~ /^L[ut]$/',	''],
     ['IsLower',  '$cat =~ /^Ll$/',	''],
diff --git a/patchlevel.h b/patchlevel.h
index 1037fa580b..9c94a042e2 100644
--- a/patchlevel.h
+++ b/patchlevel.h
@@ -70,7 +70,7 @@
 #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT)
 static	char	*local_patches[] = {
         NULL
-	,"DEVEL9021"
+	,"DEVEL9072"
 	,NULL
 };
 
diff --git a/perl.c b/perl.c
index b1d70b17c8..03b2ec1118 100644
--- a/perl.c
+++ b/perl.c
@@ -745,6 +745,7 @@ perl_destruct(pTHXx)
     Safefree(PL_op_mask);
     Safefree(PL_psig_ptr);
     Safefree(PL_psig_name);
+    Safefree(PL_bitcount);
     Safefree(PL_psig_pend);
     nuke_stacks();
     PL_hints = 0;		/* Reset hints. Should hints be per-interpreter ? */
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 8f2ecde031..9e6634a5c3 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -242,14 +242,15 @@ operators, like C<-f>, C<-M>, etc.  See L<perlfunc>.
 If any list operator (print(), etc.) or any unary operator (chdir(), etc.)
 is followed by a left parenthesis as the next token, the operator and
 arguments within parentheses are taken to be of highest precedence,
-just like a normal function call.  Examples:
+just like a normal function call.  For example,
+because named unary operators are higher precedence than ||:
 
     chdir $foo    || die;	# (chdir $foo) || die
     chdir($foo)   || die;	# (chdir $foo) || die
     chdir ($foo)  || die;	# (chdir $foo) || die
     chdir +($foo) || die;	# (chdir $foo) || die
 
-but, because * is higher precedence than ||:
+but, because * is higher precedence than named operators:
 
     chdir $foo * 20;	# chdir ($foo * 20)
     chdir($foo) * 20;	# (chdir $foo) * 20
diff --git a/pod/perlretut.pod b/pod/perlretut.pod
index a77b87e125..fa6479c0c4 100644
--- a/pod/perlretut.pod
+++ b/pod/perlretut.pod
@@ -1720,29 +1720,32 @@ characters,
     $x =~ /^\p{IsLower}/;   # doesn't match, lowercase char class
     $x =~ /^\P{IsLower}/;   # matches, char class sans lowercase
 
-If a C<name> is just one letter, the braces can be dropped.  For
-instance, C<\pM> is the character class of Unicode 'marks'.  Here is
-the association between some Perl named classes and the traditional
-Unicode classes:
+Here is the association between some Perl named classes and the
+traditional Unicode classes:
 
-    Perl class name  Unicode class name
+    Perl class name  Unicode class name or regular expression
 
-    IsAlpha          Lu, Ll, or Lo
-    IsAlnum          Lu, Ll, Lo, or Nd
-    IsASCII          $code le 127
-    IsCntrl          C
+    IsAlpha          /^[LM]/
+    IsAlnum          /^[LMN]/
+    IsASCII          $code <= 127
+    IsCntrl          /^C/
+    IsBlank          $code =~ /^(0020|0009)$/ || /^Z[^lp]/
     IsDigit          Nd
-    IsGraph          [^C] and $code ne "0020"
+    IsGraph          /^([LMNPS]|Co)/
     IsLower          Ll
-    IsPrint          [^C]
-    IsPunct          P
-    IsSpace          Z, or ($code lt "0020" and chr(hex $code) is a \s)
-    IsUpper          Lu
-    IsWord           Lu, Ll, Lo, Nd or $code eq "005F"
+    IsPrint          /^([LMNPS]|Co|Zs)/
+    IsPunct          /^P/
+    IsSpace          /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/
+    IsSpacePerl      /^Z/ || ($code =~ /^(0009|000A|000C|000D)$/
+    IsUpper          /^L[ut]/
+    IsWord           /^[LMN]/ || $code eq "005F"
     IsXDigit         $code =~ /^00(3[0-9]|[46][1-6])$/
 
-For a full list of Perl class names, consult the mktables.PL program
-in the lib/perl5/5.6.0/unicode directory.
+You can also use the official Unicode class names with the C<\p> and
+C<\P>, like C<\p{L}> for Unicode 'letters', or C<\p{Lu}> for uppercase
+letters, or C<\P{Nd}> for non-digits.  If a C<name> is just one
+letter, the braces can be dropped.  For instance, C<\pM> is the
+character class of Unicode 'marks'.
 
 C<\X> is an abbreviation for a character class sequence that includes
 the Unicode 'combining character sequences'.  A 'combining character
diff --git a/pod/perlsyn.pod b/pod/perlsyn.pod
index e6b420e5db..aad4efd2f7 100644
--- a/pod/perlsyn.pod
+++ b/pod/perlsyn.pod
@@ -263,7 +263,7 @@ available.   Replace any occurrence of C<if BLOCK> by C<if (do BLOCK)>.
 
 =head2 For Loops
 
-Perl's C-style C<for> loop works exactly like the corresponding C<while> loop;
+Perl's C-style C<for> loop works like the corresponding C<while> loop;
 that means that this:
 
     for ($i = 1; $i < 10; $i++) {
@@ -279,8 +279,10 @@ is the same as this:
 	$i++;
     }
 
-(There is one minor difference: The first form implies a lexical scope
-for variables declared with C<my> in the initialization expression.)
+There is one minor difference: if variables are declared with C<my>
+in the initialization section of the C<for>, the lexical scope of
+those variables is exactly the C<for> loop (the body of the loop
+and the control sections).
 
 Besides the normal array index looping, C<for> can lend itself
 to many other interesting applications.  Here's one that avoids the
diff --git a/pp.c b/pp.c
index 1bbb1086ad..4c21f1b5b4 100644
--- a/pp.c
+++ b/pp.c
@@ -745,9 +745,10 @@ PP(pp_schop)
 
 PP(pp_chop)
 {
-    dSP; dMARK; dTARGET;
-    while (SP > MARK)
-	do_chop(TARG, POPs);
+    dSP; dMARK; dTARGET; dORIGMARK;
+    while (MARK < SP)
+	do_chop(TARG, *++MARK);
+    SP = ORIGMARK;
     PUSHTARG;
     RETURN;
 }
diff --git a/pp_sys.c b/pp_sys.c
index c6e407b340..ab430367fa 100644
--- a/pp_sys.c
+++ b/pp_sys.c
@@ -4622,7 +4622,7 @@ PP(pp_gnetent)
 
     if (which == OP_GNBYNAME)
 #ifdef HAS_GETNETBYNAME
-	nent = PerlSock_getnetbyname(POPpx);
+	nent = PerlSock_getnetbyname(POPpbytex);
 #else
         DIE(aTHX_ PL_no_sock_func, "getnetbyname");
 #endif
@@ -4710,7 +4710,7 @@ PP(pp_gprotoent)
 
     if (which == OP_GPBYNAME)
 #ifdef HAS_GETPROTOBYNAME
-	pent = PerlSock_getprotobyname(POPpx);
+	pent = PerlSock_getprotobyname(POPpbytex);
 #else
 	DIE(aTHX_ PL_no_sock_func, "getprotobyname");
 #endif
@@ -4793,8 +4793,8 @@ PP(pp_gservent)
 
     if (which == OP_GSBYNAME) {
 #ifdef HAS_GETSERVBYNAME
-	char *proto = POPpx;
-	char *name = POPpx;
+	char *proto = POPpbytex;
+	char *name = POPpbytex;
 
 	if (proto && !*proto)
 	    proto = Nullch;
@@ -4806,7 +4806,7 @@ PP(pp_gservent)
     }
     else if (which == OP_GSBYPORT) {
 #ifdef HAS_GETSERVBYPORT
-	char *proto = POPpx;
+	char *proto = POPpbytex;
 	unsigned short port = POPu;
 
 #ifdef HAS_HTONS
@@ -5040,7 +5040,7 @@ PP(pp_gpwent)
 
     switch (which) {
     case OP_GPWNAM:
-	pwent  = getpwnam(POPpx);
+	pwent  = getpwnam(POPpbytex);
 	break;
     case OP_GPWUID:
 	pwent = getpwuid((Uid_t)POPi);
@@ -5241,7 +5241,7 @@ PP(pp_ggrent)
     STRLEN n_a;
 
     if (which == OP_GGRNAM)
-	grent = (struct group *)getgrnam(POPpx);
+	grent = (struct group *)getgrnam(POPpbytex);
     else if (which == OP_GGRGID)
 	grent = (struct group *)getgrgid(POPi);
     else
diff --git a/t/camel-III/vstring.t b/t/camel-III/vstring.t
index a252a3b3c8..7360ae7654 100644
--- a/t/camel-III/vstring.t
+++ b/t/camel-III/vstring.t
@@ -4,7 +4,7 @@ BEGIN {
     @INC = '../lib';
 }
 use Test;
-plan test => 6;
+plan test => 5;
 # Error messages may have wide chars, say that is okay - if we can.
 eval { binmode STDOUT,":utf8" };
 
@@ -33,7 +33,7 @@ else
 
 # Chapter 28, pp671
 ok(v5.6.0 lt v5.7.0,1,"v5.6.0 lt v5.7.0 fails");
-# Some floating-point risk here ...
-my $v = ord($^V)+ord(substr($^V,1,1))/1000+ord(substr($^V,2,1))/1000000;
-$v =~ s/^5\.006999\d+/5.007/; # floating point fun
-ok($v,$],"\$^V and \$] do not match");
+
+# floating point too messy
+# my $v = ord($^V)+ord(substr($^V,1,1))/1000+ord(substr($^V,2,1))/1000000;
+# ok($v,$],"\$^V and \$] do not match");
diff --git a/t/lib/charnames.t b/t/lib/charnames.t
index 6a8a8be32d..9773a2025d 100644
--- a/t/lib/charnames.t
+++ b/t/lib/charnames.t
@@ -8,7 +8,7 @@ BEGIN {
 }
 
 $| = 1;
-print "1..15\n";
+print "1..16\n";
 
 use charnames ':full';
 
@@ -103,6 +103,18 @@ sub to_bytes {
   print "not "
       unless to_bytes("\N{DESERET SMALL LETTER ENG}") eq $encoded_deseng;
   print "ok 15\n";
+}
 
+{
+  # 20001114.001	
+
+  if (ord("�") == 0xc4) { # Try to do this only on Latin-1.
+      use charnames ':full';
+      my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
+      print "not " unless $text eq "\xc4" && ord($text) == 0xc4;
+      print "ok 16\n";
+  } else {
+      print "ok 16 # Skip: not Latin-1\n";
+  }
 }
 
diff --git a/t/op/chop.t b/t/op/chop.t
index 9edddedaf7..1b55f11832 100755
--- a/t/op/chop.t
+++ b/t/op/chop.t
@@ -1,6 +1,6 @@
 #!./perl
 
-print "1..34\n";
+print "1..37\n";
 
 # optimized
 
@@ -104,6 +104,15 @@ $_ = "\x{1234}\x{2345}";
 chop;
 print $_ eq "\x{1234}" ? "ok 33\n" : "not ok 33\n";
 
-# TODO!  Make sure chop(LIST) returns the right value.
 my @stuff = qw(this that);
-print chop(@stuff[0,1]) eq 't' ? "ok 34 # TODO\n" : "not ok 34 # TODO\n";
+print chop(@stuff[0,1]) eq 't' ? "ok 34\n" : "not ok 34\n";
+
+# bug id 20010305.012
+@stuff = qw(ab cd ef);
+print chop(@stuff = @stuff) eq 'f' ? "ok 35\n" : "not ok 35\n";
+
+@stuff = qw(ab cd ef);
+print chop(@stuff[0, 2]) eq 'f' ? "ok 36\n" : "not ok 36\n";
+
+my %stuff = (1..4);
+print chop(@stuff{1, 3}) eq '4' ? "ok 37\n" : "not ok 37\n";
diff --git a/t/op/pack.t b/t/op/pack.t
index 67bd547c5b..4c169917ae 100755
--- a/t/op/pack.t
+++ b/t/op/pack.t
@@ -6,7 +6,7 @@ BEGIN {
     require Config; import Config;
 }
 
-print "1..159\n";
+print "1..160\n";
 
 $format = "c2 x5 C C x s d i l a6";
 # Need the expression in here to force ary[5] to be numeric.  This avoids
@@ -416,3 +416,6 @@ print 'not ' unless v1.20.300.4000 ne
                     sprintf "%vd", pack("C0U*",1,20,300,4000); 
 print "ok $test\n"; $test++;
 
+# 160
+print "not " unless join(" ", unpack("C*", chr(0x1e2))) eq "199 162";
+print "ok $test\n"; $test++;
diff --git a/t/op/pat.t b/t/op/pat.t
index 0c8810356b..711f9f08e9 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -4,7 +4,7 @@
 # the format supported by op/regexp.t.  If you want to add a test
 # that does fit that format, add it to op/re_tests, not here.
 
-print "1..245\n";
+print "1..580\n";
 
 BEGIN {
     chdir 't' if -d 't';
@@ -1228,3 +1228,310 @@ if (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC
     print "ok $_ # Skip: not EBCDIC\n";
   }
 }
+
+print "not " unless "\x{ab}" =~ /\x{ab}/;
+print "ok 246\n";
+
+print "not " unless "\x{abcd}" =~ /\x{abcd}/;
+print "ok 247\n";
+
+{
+    # bug id 20001008.001
+
+    use utf8; # BUG - should not be needed, but is, otherwise core dump
+
+    my $test = 248;
+    my @x = ("stra\337e 138","stra\337e 138");
+    for (@x) {
+	s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
+	my($latin) = /^(.+)(?:\s+\d)/;
+	print $latin eq "stra\337e" ? "ok $test\n" :	# 248,249
+	    "#latin[$latin]\nnot ok $test\n";
+	$test++;
+	$latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
+	use utf8;
+	$latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
+    }
+}
+
+{
+    print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+    print "ok 250\n";
+
+    print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+    print "ok 251\n";
+
+    print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
+    print "ok 252\n";
+
+    print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
+    print "ok 253\n";
+
+    print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+    print "ok 254\n";
+
+    print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+    print "ok 255\n";
+
+    print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
+    print "ok 256\n";
+
+    print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
+    print "ok 257\n";
+}
+
+{
+    # the first half of 20001028.003
+
+    my $X = chr(1448);
+    my ($Y) = $X =~ /(.*)/;
+    print "not " unless $Y eq v1448 && length($Y) == 1;
+    print "ok 258\n";
+}
+
+{
+    # 20001108.001
+
+    my $X = "Szab\x{f3},Bal\x{e1}zs";
+    my $Y = $X;
+    $Y =~ s/(B)/$1/ for 0..3;
+    print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs";
+    print "ok 259\n";
+}
+
+{
+    # the second half of 20001028.003
+
+    $X =~ s/^/chr(1488)/e;
+    print "not " unless length $X == 1 && ord($X) == 1488;
+    print "ok 260\n";
+}
+
+{
+    # 20000517.001
+
+    my $x = "\x{100}A";
+
+    $x =~ s/A/B/;
+
+    print "not " unless $x eq "\x{100}B" && length($x) == 2;
+    print "ok 261\n";
+}
+
+{
+    # bug id 20001230.002
+
+    print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c';
+    print "ok 262\n";
+
+    print "not " unless "École" =~ /^\C\C(c)/;
+    print "ok 263\n";
+}
+
+{
+    my $test = 264; # till 575
+
+    use charnames ':full';
+
+    # This is far from complete testing, there are dozens of character
+    # classes in Unicode.  The mixing of literals and \N{...} is
+    # intentional so that in non-Latin-1 places we test the native
+    # characters, not the Unicode code points.
+
+    my %s = (
+	     "a" 				=> 'Ll',
+	     "\N{CYRILLIC SMALL LETTER A}"	=> 'Ll',
+	     "A" 				=> 'Lu',
+	     "\N{GREEK CAPITAL LETTER ALPHA}"	=> 'Lu',
+	     "\N{HIRAGANA LETTER SMALL A}"	=> 'Lo',
+	     "\N{COMBINING GRAVE ACCENT}"	=> 'Mn',
+	     "0"				=> 'Nd',
+	     "\N{ARABIC-INDIC DIGIT ZERO}"	=> 'Nd',
+	     "_"				=> 'N',
+	     "!"				=> 'P',
+	     " "				=> 'Zs',
+	     "\0"				=> 'Cc',
+	     );
+	     
+    for my $char (keys %s) {
+	my $class = $s{$char};
+	my $code  = sprintf("%04x", ord($char));
+	printf "# 0x$code\n";
+	print "# IsAlpha\n";
+	if ($class =~ /^[LM]/) {
+	    print "not " unless $char =~ /\p{IsAlpha}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsAlpha}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsAlpha}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsAlpha}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsAlnum\n";
+	if ($class =~ /^[LMN]/ && $char ne "_") {
+	    print "not " unless $char =~ /\p{IsAlnum}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsAlnum}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsAlnum}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsAlnum}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsASCII\n";
+	if ($code <= 127) {
+	    print "not " unless $char =~ /\p{IsASCII}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsASCII}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsASCII}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsASCII}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsCntrl\n";
+	if ($class =~ /^C/) {
+	    print "not " unless $char =~ /\p{IsCntrl}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsCntrl}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsCntrl}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsCntrl}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsBlank\n";
+	if ($class =~ /^Z[lp]/ || $char eq " ") {
+	    print "not " unless $char =~ /\p{IsBlank}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsBlank}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsBlank}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsBlank}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsDigit\n";
+	if ($class =~ /^Nd$/) {
+	    print "not " unless $char =~ /\p{IsDigit}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsDigit}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsDigit}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsDigit}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsGraph\n";
+	if ($class =~ /^([LMNPS])|Co/) {
+	    print "not " unless $char =~ /\p{IsGraph}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsGraph}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsGraph}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsGraph}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsLower\n";
+	if ($class =~ /^Ll$/) {
+	    print "not " unless $char =~ /\p{IsLower}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsLower}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsLower}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsLower}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsPrint\n";
+	if ($class =~ /^([LMNPS])|Co|Zs/) {
+	    print "not " unless $char =~ /\p{IsPrint}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsPrint}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsPrint}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsPrint}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsPunct\n";
+	if ($class =~ /^P/ || $char eq "_") {
+	    print "not " unless $char =~ /\p{IsPunct}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsPunct}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsPunct}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsPunct}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsSpace\n";
+	if ($class =~ /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/)) {
+	    print "not " unless $char =~ /\p{IsSpace}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsSpace}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsSpace}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsSpace}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsUpper\n";
+	if ($class =~ /^L[ut]/) {
+	    print "not " unless $char =~ /\p{IsUpper}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsUpper}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsUpper}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsUpper}/;
+	    print "ok $test\n"; $test++;
+	}
+	print "# IsWord\n";
+	if ($class =~ /^[LMN]/ || $char eq "_") {
+	    print "not " unless $char =~ /\p{IsWord}/;
+	    print "ok $test\n"; $test++;
+	    print "not " if     $char =~ /\P{IsWord}/;
+	    print "ok $test\n"; $test++;
+	} else {
+	    print "not " if     $char =~ /\p{IsWord}/;
+	    print "ok $test\n"; $test++;
+	    print "not " unless $char =~ /\P{IsWord}/;
+	    print "ok $test\n"; $test++;
+	}
+    }
+}
+
+{
+    $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg";
+
+    if (/(.\x{300})./) {
+	print "ok 576\n";
+
+	print "not " unless $` eq "abc\x{100}" && length($`) == 4;
+	print "ok 577\n"; 
+
+	print "not " unless $& eq "\x{200}\x{300}\x{380}" && length($&) == 3;
+	print "ok 578\n"; 
+
+	print "not " unless $' eq "\x{400}defg" && length($') == 5;
+	print "ok 579\n"; 
+
+	print "not " unless $1 eq "\x{200}\x{300}" && length($1) == 2;
+	print "ok 580\n"; 
+    }
+}
diff --git a/t/op/split.t b/t/op/split.t
index ffc29be899..ce8d64d947 100755
--- a/t/op/split.t
+++ b/t/op/split.t
@@ -1,6 +1,6 @@
 #!./perl
 
-print "1..32\n";
+print "1..44\n";
 
 $FS = ':';
 
@@ -14,7 +14,7 @@ if (join(';',$a,$b,$c) eq 'a;b;c') {print "ok 1\n";} else {print "not ok 1\n";}
 if (join("$_",@ary) eq 'aa:b:cc') {print "ok 2\n";} else {print "not ok 2\n";}
 
 $_ = "abc\n";
-@xyz = (@ary = split(//));
+my @xyz = (@ary = split(//));
 if (join(".",@ary) eq "a.b.c.\n") {print "ok 3\n";} else {print "not ok 3\n";}
 
 $_ = "a:b:c::::";
@@ -151,5 +151,89 @@ print "not " unless @ary == 3 &&
                     $ary[2] eq "\xFD\xFD"     &&
                     $ary[2] eq "\x{FD}\xFD"   &&
                     $ary[2] eq "\x{FD}\x{FD}";
-
 print "ok 32\n";
+
+
+{
+    my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
+    print "not " unless "@a" eq "1234 123 2345";
+    print "ok 33\n";
+}
+
+{
+    my $x = chr(123);
+    my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
+    print "not " unless "@a" eq "1234 2345";
+    print "ok 34\n";
+}
+
+{
+    # bug id 20000427.003 
+
+    use warnings;
+    use strict;
+
+    my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
+
+    my @charlist = split //, $sushi;
+    my $r = '';
+    foreach my $ch (@charlist) {
+	$r = $r . " " . sprintf "U+%04X", ord($ch);
+    }
+
+    print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
+    print "ok 35\n";
+}
+
+{
+    # bug id 20000426.003
+
+    my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
+
+    my ($a, $b, $c) = split(/\x40/, $s);
+    print "not "
+	unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
+    print "ok 36\n";
+
+    my ($a, $b) = split(/\x{100}/, $s);
+    print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
+    print "ok 37\n";
+
+    my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
+    print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
+    print "ok 38\n";
+
+    my ($a, $b) = split(/\x40\x{80}/, $s);
+    print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
+    print "ok 39\n";
+
+    my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
+    print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
+    print "ok 40\n";
+}
+
+{
+    # 20001205.014
+
+    my $a = "ABC\x{263A}";
+
+    my @b = split( //, $a );
+
+    print "not " unless @b == 4;
+    print "ok 41\n";
+
+    print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}";
+    print "ok 42\n";
+
+    $a =~ s/^A/Z/;
+    print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}";
+    print "ok 43\n";
+}
+
+{
+    my @a = split(/\xFE/, "\xFF\xFE\xFD");
+
+    print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD";
+    print "ok 44\n";
+}
+
diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t
index 60e6c6e102..31d119137a 100755
--- a/t/pragma/utf8.t
+++ b/t/pragma/utf8.t
@@ -10,297 +10,30 @@ BEGIN {
     }
 }
 
-print "1..109\n";
-
-my $test = 1;
-
-sub ok {
-    my ($got,$expect) = @_;
-    print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
-    print "ok $test\n";
-}
-
-sub nok {
-    my ($got,$expect) = @_;
-    print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
-    print "ok $test\n";
-}
-
-sub ok_bytes {
-    use bytes;
-    my ($got,$expect) = @_;
-    print "# expected [$expect], got [$got]\nnot " if $got ne $expect;
-    print "ok $test\n";
-}
-
-sub nok_bytes {
-    use bytes;
-    my ($got,$expect) = @_;
-    print "# expected not [$expect], got [$got]\nnot " if $got eq $expect;
-    print "ok $test\n";
-}
-
-{
-    use utf8;
-
-    $_ = ">\x{263A}<"; 
-    s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; 
-    ok $_, '>&#9786;<';
-    $test++;				# 1
-
-    $_ = ">\x{263A}<"; 
-    my $rx = "\x{80}-\x{10ffff}";
-    s/([$rx])/"&#".ord($1).";"/eg; 
-    ok $_, '>&#9786;<';
-    $test++;				# 2
-
-    $_ = ">\x{263A}<"; 
-    my $rx = "\\x{80}-\\x{10ffff}";
-    s/([$rx])/"&#".ord($1).";"/eg; 
-    ok $_, '>&#9786;<';
-    $test++;				# 3
-
-    $_ = "alpha,numeric"; 
-    m/([[:alpha:]]+)/; 
-    ok $1, 'alpha';
-    $test++;				# 4
-
-    $_ = "alphaNUMERICstring";
-    m/([[:^lower:]]+)/; 
-    ok $1, 'NUMERIC';
-    $test++;				# 5
-
-    $_ = "alphaNUMERICstring";
-    m/(\p{Ll}+)/; 
-    ok $1, 'alpha';
-    $test++;				# 6
-
-    $_ = "alphaNUMERICstring"; 
-    m/(\p{Lu}+)/; 
-    ok $1, 'NUMERIC';
-    $test++;				# 7
-
-    $_ = "alpha,numeric"; 
-    m/([\p{IsAlpha}]+)/; 
-    ok $1, 'alpha';
-    $test++;				# 8
-
-    $_ = "alphaNUMERICstring";
-    m/([^\p{IsLower}]+)/; 
-    ok $1, 'NUMERIC';
-    $test++;				# 9
-
-    $_ = "alpha123numeric456"; 
-    m/([\p{IsDigit}]+)/; 
-    ok $1, '123';
-    $test++;				# 10
-
-    $_ = "alpha123numeric456"; 
-    m/([^\p{IsDigit}]+)/; 
-    ok $1, 'alpha';
-    $test++;				# 11
-
-    $_ = ",123alpha,456numeric"; 
-    m/([\p{IsAlnum}]+)/; 
-    ok $1, '123alpha';
-    $test++;				# 12
-}
-
-{
-    # no use utf8 needed
-    $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}";
-    
-    ok length($_), 6;			# 13
-    $test++;
-
-    ($a) = m/x(.)/;
-
-    ok length($a), 1;			# 14
-    $test++;
-
-    ok length($`), 2;			# 15
-    $test++;
-    ok length($&), 2;			# 16
-    $test++;
-    ok length($'), 2;			# 17
-    $test++;
-
-    ok length($1), 1;			# 18
-    $test++;
-
-    ok length($b=$`), 2;		# 19
-    $test++;
-
-    ok length($b=$&), 2;		# 20
-    $test++;
-
-    ok length($b=$'), 2;		# 21
-    $test++;
-
-    ok length($b=$1), 1;		# 22
-    $test++;
-
-    ok $a, "\x{263A}";			# 23
-    $test++;
-
-    ok $`, "\x{263A}\x{263A}";		# 24
-    $test++;
-
-    ok $&, "x\x{263A}";			# 25
-    $test++;
-
-    ok $', "y\x{263A}";			# 26
-    $test++;
-
-    ok $1, "\x{263A}";			# 27
-    $test++;
-
-    ok_bytes $a, "\342\230\272";	# 28
-    $test++;
-
-    ok_bytes $1, "\342\230\272";	# 29
-    $test++;
-
-    ok_bytes $&, "x\342\230\272";	# 30
-    $test++;
-
-    {
-	use utf8; # required
-	$_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A);
-    }
-
-    ok length($_), 6;			# 31
-    $test++;
-
-    ($a) = m/x(.)/;
-
-    ok length($a), 1;			# 32
-    $test++;
-
-    ok length($`), 2;			# 33
-    $test++;
-
-    ok length($&), 2;			# 34
-    $test++;
-
-    ok length($'), 2;			# 35
-    $test++;
-
-    ok length($1), 1;			# 36
-    $test++;
-
-    ok length($b=$`), 2;		# 37
-    $test++;
-
-    ok length($b=$&), 2;		# 38
-    $test++;
-
-    ok length($b=$'), 2;		# 39
-    $test++;
-
-    ok length($b=$1), 1;		# 40
-    $test++;
-
-    ok $a, "\x{263A}";			# 41
-    $test++;
-
-    ok $`, "\x{263A}\x{263A}";		# 42
-    $test++;
-
-    ok $&, "x\x{263A}";			# 43
-    $test++;
-
-    ok $', "y\x{263A}";			# 44
-    $test++;
-
-    ok $1, "\x{263A}";			# 45
-    $test++;
-
-    ok_bytes $a, "\342\230\272";	# 46
-    $test++;
-
-    ok_bytes $1, "\342\230\272";	# 47
-    $test++;
-
-    ok_bytes $&, "x\342\230\272";	# 48
-    $test++;
-
-    $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272";
-
-    ok length($_), 14;			# 49
-    $test++;
-
-    ($a) = m/x(.)/;
-
-    ok length($a), 1;			# 50
-    $test++;
-
-    ok length($`), 6;			# 51
-    $test++;
-
-    ok length($&), 2;			# 52
-    $test++;
-
-    ok length($'), 6;			# 53
-    $test++;
-
-    ok length($1), 1;			# 54
-    $test++;
-
-    ok length($b=$`), 6;		# 55
-    $test++;
-
-    ok length($b=$&), 2;		# 56
-    $test++;
-
-    ok length($b=$'), 6;		# 57
-    $test++;
-
-    ok length($b=$1), 1;		# 58
-    $test++;
-
-    ok $a, "\342";			# 59
-    $test++;
-
-    ok $`, "\342\230\272\342\230\272";	# 60
-    $test++;
-
-    ok $&, "x\342";			# 61
-    $test++;
-
-    ok $', "\230\272y\342\230\272";	# 62
-    $test++;
-
-    ok $1, "\342";			# 63
-    $test++;
-}
-
-{
-    use utf8;
-    ok "\x{ab}" =~ /^\x{ab}$/, 1;
-    $test++;				# 64
-}
-
-{
-    use utf8;
-    ok_bytes chr(0x1e2), pack("C*", 0xc7, 0xa2);
-    $test++;                # 65
-}
-
-{
-    use utf8;
-    my @a = map ord, split(//, join("", map chr, (1234, 123, 2345)));
-    ok "@a", "1234 123 2345";
-    $test++;                # 66
-}
-
-{
-    use utf8;
-    my $x = chr(123);
-    my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345)));
-    ok "@a", "1234 2345";
-    $test++;                # 67
-}
+# NOTE!
+#
+# Think carefully before adding tests here.  In general this should be
+# used only for about three categories of tests:
+#
+# (1) tests that absolutely require 'use utf8', and since that in general
+#     shouldn't be needed as the utf8 is being obsoleted, this should
+#     have rather few tests.  If you want to test Unicode and regexes,
+#     you probably want to go to op/regexp or op/pat; if you want to test
+#     split, go to op/split; pack, op/pack; appending or joining,
+#     op/append or op/join, and so forth
+#
+# (2) tests that have to do with Unicode tokenizing (though it's likely
+#     that all the other Unicode tests sprinkled around the t/**/*.t are
+#     going to catch that)
+#
+# (3) complicated tests that simultaneously stress so many Unicode features
+#     that deciding into which other test script the tests should go to
+#     is hard -- maybe consider breaking up the complicated test
+#
+#
+
+use Test;
+plan tests => 15;
 
 {
     # bug id 20001009.001
@@ -308,100 +41,29 @@ sub nok_bytes {
     my ($a, $b);
 
     { use bytes; $a = "\xc3\xa4" }
-    { use utf8;  $b = "\xe4"     } # \xXX must not produce UTF-8
+    { use utf8;  $b = "\xe4"     }
 
-    print "not " if $a eq $b;
-    print "ok $test\n"; $test++;	# 68
-
-    { use utf8; print "not " if $a eq $b; }
-    print "ok $test\n"; $test++;	# 69
-}
+    my $test = 68;
 
-{
-    # bug id 20001008.001
-
-    my @x = ("stra\337e 138","stra\337e 138");
-    for (@x) {
-	s/(\d+)\s*([\w\-]+)/$1 . uc $2/e;
-	my($latin) = /^(.+)(?:\s+\d)/;
-	print $latin eq "stra\337e" ? "ok $test\n" :	# 70, 71
-	    "#latin[$latin]\nnot ok $test\n";
-	$test++;
-	$latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a
-	use utf8;
-	$latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a
-    }
-}
-
-{
-    # bug id 20000427.003 
-
-    use utf8;
-    use warnings;
-    use strict;
-
-    my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}";
-
-    my @charlist = split //, $sushi;
-    my $r = '';
-    foreach my $ch (@charlist) {
-	$r = $r . " " . sprintf "U+%04X", ord($ch);
-    }
+    ok($a ne $b);
 
-    print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B";
-    print "ok $test\n";			# 72
-    $test++;
+    { use utf8; ok($a ne $b) }
 }
 
-{
-    # bug id 20000426.003
-
-    use utf8;
-
-    my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20";
-
-    my ($a, $b, $c) = split(/\x40/, $s);
-    print "not "
-	unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a;
-    print "ok $test\n";
-    $test++;				# 73
-
-    my ($a, $b) = split(/\x{100}/, $s);
-    print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20";
-    print "ok $test\n";
-    $test++;				# 74
-
-    my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s);
-    print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20";
-    print "ok $test\n";
-    $test++;				# 75
-
-    my ($a, $b) = split(/\x40\x{80}/, $s);
-    print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20";
-    print "ok $test\n";
-    $test++;				# 76
-
-    my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s);
-    print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20";
-    print "ok $test\n";
-    $test++;				# 77
-}
 
 {
     # bug id 20000730.004
 
-    use utf8;
-
     my $smiley = "\x{263a}";
 
-    for my $s ("\x{263a}",                     # 78
-	       $smiley,                        # 79
+    for my $s ("\x{263a}",
+	       $smiley,
 		
-	       "" . $smiley,                   # 80
-	       "" . "\x{263a}",                # 81
+	       "" . $smiley,
+	       "" . "\x{263a}",
 
-	       $smiley    . "",                # 82
-	       "\x{263a}" . "",                # 83
+	       $smiley    . "",
+	       "\x{263a}" . "",
 	       ) {
 	my $length_chars = length($s);
 	my $length_bytes;
@@ -410,21 +72,18 @@ sub nok_bytes {
 	my $regex_chars = @regex_chars;
 	my @split_chars = split //, $s;
 	my $split_chars = @split_chars;
-	print "not "
-	    unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
-		   "1/1/1/3";
-	print "ok $test\n";
-	$test++;
+	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
+	   "1/1/1/3");
     }
 
-    for my $s ("\x{263a}" . "\x{263a}",        # 84
-	       $smiley    . $smiley,           # 85
+    for my $s ("\x{263a}" . "\x{263a}",
+	       $smiley    . $smiley,
 
-	       "\x{263a}\x{263a}",             # 86
-	       "$smiley$smiley",               # 87
+	       "\x{263a}\x{263a}",
+	       "$smiley$smiley",
 	       
-	       "\x{263a}" x 2,                 # 88
-	       $smiley    x 2,                 # 89
+	       "\x{263a}" x 2,
+	       $smiley    x 2,
 	       ) {
 	my $length_chars = length($s);
 	my $length_bytes;
@@ -433,160 +92,17 @@ sub nok_bytes {
 	my $regex_chars = @regex_chars;
 	my @split_chars = split //, $s;
 	my $split_chars = @split_chars;
-	print "not "
-	    unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq
-		   "2/2/2/6";
-	print "ok $test\n";
-	$test++;
+	ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq
+	   "2/2/2/6");
     }
 }
 
-{
-    use utf8;
-
-    print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
-    print "ok $test\n";
-    $test++;					# 90
-
-    print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
-    print "ok $test\n";
-    $test++;					# 91
-
-    print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}";
-    print "ok $test\n";
-    $test++;					# 92
-
-    print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4";
-    print "ok $test\n";
-    $test++;					# 93
-
-    print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
-    print "ok $test\n";
-    $test++;					# 94
-
-    print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
-    print "ok $test\n";
-    $test++;					# 95
-
-    print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}";
-    print "ok $test\n";
-    $test++;					# 96
-
-    print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4";
-    print "ok $test\n";
-    $test++;					# 97
-}
 
 {
-    # the first half of 20001028.003
-
-    my $X = chr(1448);
-    my ($Y) = $X =~ /(.*)/;
-    print "not " unless $Y eq v1448 && length($Y) == 1;
-    print "ok $test\n";
-    $test++;					# 98
-}
-
-{
-    # 20001108.001
-
-    use utf8;
-    my $X = "Szab\x{f3},Bal\x{e1}zs";
-    my $Y = $X;
-    $Y =~ s/(B)/$1/ for 0..3;
-    print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs";
-    print "ok $test\n";
-    $test++;					# 99
-}
-
-{
-    # 20001114.001	
-
-    use utf8;
-    use charnames ':full';
-    my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}";
-    print "not " unless $text eq "\xc4" && ord($text) == 0xc4;
-    print "ok $test\n";
-    $test++;                                    # 100
-}
-
-{
-    # 20001205.014
-
-    use utf8;
-
-    my $a = "ABC\x{263A}";
-
-    my @b = split( //, $a );
-
-    print "not " unless @b == 4;
-    print "ok $test\n";
-    $test++;                                    # 101
-
-    print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}";
-    print "ok $test\n";
-    $test++;                                    # 102
-
-    $a =~ s/^A/Z/;
-    print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}";
-    print "ok $test\n";
-    $test++;                                    # 103
-}
-
-{
-    # the second half of 20001028.003
-
-    use utf8;
-    $X =~ s/^/chr(1488)/e;
-    print "not " unless length $X == 1 && ord($X) == 1488;
-    print "ok $test\n";
-    $test++;					# 104
-}
-
-{
-    # 20000517.001
-
-    my $x = "\x{100}A";
-
-    $x =~ s/A/B/;
-
-    print "not " unless $x eq "\x{100}B" && length($x) == 2;
-    print "ok $test\n";
-    $test++;					# 105
-}
-
-{
-    use utf8;
-
-    my @a = split(/\xFE/, "\xFF\xFE\xFD");
-
-    print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD";
-    print "ok $test\n";
-    $test++;					# 106
-}
-
-{
-    use utf8;
-
     my $w = 0;
     local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ };
     my $x = eval q/"\\/ . "\x{100}" . q/"/;;
    
-    print "not " unless $w == 0 && $x eq "\x{100}";
-    print "ok $test\n";
-    $test++;					# 107
+    ok($w == 0 && $x eq "\x{100}");
 }
 
-{
-    # bug id 20001230.002
-
-    use utf8;
-
-    print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c';
-    print "ok $test\n";
-    $test++;					# 108
-
-    print "not " unless "École" =~ /^\C\C(c)/;
-    print "ok $test\n";
-    $test++;					# 109
-}
author	Nick Ing-Simmons <nik@tiuk.ti.com>	2001-03-07 21:39:29 +0000
committer	Nick Ing-Simmons <nik@tiuk.ti.com>	2001-03-07 21:39:29 +0000
commit	f127762ae8ed5af83de18a858019907e3c7f32ea (patch)
tree	1874457638eaca8c61c53a759ef3ebfddc63031a
parent	4fd193acd0d951124e02044838daf1b0316a9058 (diff)
parent	4bad07d97843302fe0c8fcda3be736e92b097422 (diff)
download	perl-f127762ae8ed5af83de18a858019907e3c7f32ea.tar.gz