diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-03-07 21:39:29 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-03-07 21:39:29 +0000 |
commit | f127762ae8ed5af83de18a858019907e3c7f32ea (patch) | |
tree | 1874457638eaca8c61c53a759ef3ebfddc63031a | |
parent | 4fd193acd0d951124e02044838daf1b0316a9058 (diff) | |
parent | 4bad07d97843302fe0c8fcda3be736e92b097422 (diff) | |
download | perl-f127762ae8ed5af83de18a858019907e3c7f32ea.tar.gz |
Integrate mainline.
p4raw-id: //depot/perlio@9074
-rw-r--r-- | Changes | 386 | ||||
-rw-r--r-- | Makefile.SH | 9 | ||||
-rw-r--r-- | lib/ExtUtils/MM_Unix.pm | 4 | ||||
-rwxr-xr-x | lib/unicode/mktables.PL | 3 | ||||
-rw-r--r-- | patchlevel.h | 2 | ||||
-rw-r--r-- | perl.c | 1 | ||||
-rw-r--r-- | pod/perlop.pod | 5 | ||||
-rw-r--r-- | pod/perlretut.pod | 37 | ||||
-rw-r--r-- | pod/perlsyn.pod | 8 | ||||
-rw-r--r-- | pp.c | 7 | ||||
-rw-r--r-- | pp_sys.c | 14 | ||||
-rw-r--r-- | t/camel-III/vstring.t | 10 | ||||
-rw-r--r-- | t/lib/charnames.t | 14 | ||||
-rwxr-xr-x | t/op/chop.t | 15 | ||||
-rwxr-xr-x | t/op/pack.t | 5 | ||||
-rwxr-xr-x | t/op/pat.t | 309 | ||||
-rwxr-xr-x | t/op/split.t | 90 | ||||
-rwxr-xr-x | t/pragma/utf8.t | 574 |
18 files changed, 914 insertions, 579 deletions
@@ -32,6 +32,392 @@ Version v5.7.1 Development release working toward v5.8 -------------- ____________________________________________________________________________ +[ 9072] By: jhi on 2001/03/07 16:32:30 + Log: Subject: [ID 20010306.004] || != named unary operator + From: abela@geneanet.org + Date: Tue, 6 Mar 2001 15:57:57 +0100 (CET) + Message-Id: <20010306145757.0CB03D183@little-roots.geneanet.org> + Branch: perl + ! pod/perlop.pod +____________________________________________________________________________ +[ 9071] By: jhi on 2001/03/07 15:14:11 + Log: Subject: downgrading to bytes for common functions + From: andrew@pimlott.ne.mediaone.net (Andrew Pimlott) + Date: Wed, 7 Mar 2001 02:36:55 -0500 + Message-ID: <20010307023655.J24024@pimlott.ne.mediaone.net> + Branch: perl + ! pp_sys.c +____________________________________________________________________________ +[ 9070] By: jhi on 2001/03/07 15:04:58 + Log: Clarify the description differentiating for and while; inspired by + + Subject: [ID 20010306.002] for/while difference in for definition + From: abela@geneanet.org + Date: Tue, 6 Mar 2001 09:40:36 +0100 (CET) + Message-Id: <20010306084036.7BFD0D17F@little-roots.geneanet.org> + Branch: perl + ! pod/perlsyn.pod +____________________________________________________________________________ +[ 9069] By: jhi on 2001/03/07 14:55:30 + Log: Subject: 'no *POSIX' Patch speeding up make on BS2000 + From: Dorner Thomas <Thomas.Dorner@start.de> + Date: Wed, 7 Mar 2001 14:13:31 +0100 + Message-ID: <6727B1DACFCDD311A757009027CA8D69010A88CB@Ex02.inhouse.start.de> + Branch: perl + ! Makefile.SH +____________________________________________________________________________ +[ 9068] By: jhi on 2001/03/07 14:51:17 + Log: Subject: Re: [ID 20010305.012] chop() against list assignment returns char chopped from el zero + From: Radu Greab <radu@netsoft.ro> + Date: Tue, 6 Mar 2001 23:04:44 +0200 (EET) + Message-ID: <15013.20716.201459.540421@ix.netsoft.ro> + Branch: perl + ! pp.c t/op/chop.t +____________________________________________________________________________ +[ 9067] By: jhi on 2001/03/07 14:48:57 + Log: Subject: Re: [PATCH: 5.6.1 trial2] DynaLoading for OS/390 build option + From: Simon Cozens <simon@netthink.co.uk> + Date: Wed, 7 Mar 2001 13:59:28 +0000 + Message-ID: <20010307135928.A21620@pembro26.pmb.ox.ac.uk> + + In order to allow MakeMaker to build correct Makefiles, + you need to change the order of the arguments in MM_Unix.pm + as well. (Unless you have C89_CCMODE set, but we're trying + to be POSIXly correct) + + Thanks to Merijn Broeren for tracking this down. + Branch: perl + ! lib/ExtUtils/MM_Unix.pm +____________________________________________________________________________ +[ 9066] By: jhi on 2001/03/07 14:30:02 + Log: Floating point too messy. + Branch: perl + ! t/camel-III/vstring.t +____________________________________________________________________________ +[ 9065] By: jhi on 2001/03/07 14:17:44 + Log: Integrate change #9064 from maintperl into mainline. + + fix memory leak in pack("Bb",...) + Branch: perl + !> perl.c +____________________________________________________________________________ +[ 9064] By: gsar on 2001/03/07 06:29:24 + Log: fix memory leak in pack("Bb",...) + Branch: maint-5.6/perl + ! perl.c +____________________________________________________________________________ +[ 9063] By: jhi on 2001/03/07 00:55:04 + Log: Major utf8 test reorganisation and rewrite. + Hopefully no tests were lost in the shuffle. + (The beginning of pragma/utf8 was lost intentionally, + the tests were rather bogus and incomplete.) + Branch: perl + ! t/lib/charnames.t t/op/pack.t t/op/pat.t t/op/split.t + ! t/pragma/utf8.t +____________________________________________________________________________ +[ 9062] By: jhi on 2001/03/07 00:41:16 + Log: More tweakage on the Unicode character class descriptions. + Branch: perl + ! lib/unicode/mktables.PL pod/perlretut.pod +____________________________________________________________________________ +[ 9061] By: jhi on 2001/03/06 23:12:38 + Log: The perlretut was still talking about the old \p and \P + definitions. + Branch: perl + ! pod/perlretut.pod +____________________________________________________________________________ +[ 9060] By: nick on 2001/03/06 20:52:37 + Log: Integrate mainline (make test works again now). + Branch: perlio + !> op.c op.h regcomp.c t/camel-III/vstring.t t/op/pat.t +____________________________________________________________________________ +[ 9059] By: jhi on 2001/03/06 15:36:14 + Log: 0.999... does equal 1.0, doesn't it? + Branch: perl + ! t/camel-III/vstring.t +____________________________________________________________________________ +[ 9058] By: jhi on 2001/03/06 15:34:00 + Log: Make /x{abcd}/ to work without use utf8. + Branch: perl + ! regcomp.c +____________________________________________________________________________ +[ 9057] By: jhi on 2001/03/06 03:02:36 + Log: Easier to outcomment all the three reset() tests for now. + Branch: perl + ! t/op/pat.t +____________________________________________________________________________ +[ 9056] By: jhi on 2001/03/06 02:30:02 + Log: Comment out the deadly reset; until the 20010301.005 + is finally fixed. + Branch: perl + ! t/op/pat.t +____________________________________________________________________________ +[ 9055] By: jhi on 2001/03/06 02:21:26 + Log: Integrate the change #9054 from mainline: + retract the PMOP cleanup patch pending further investigation. + Branch: maint-5.6/perl + !> op.c op.h +____________________________________________________________________________ +[ 9054] By: jhi on 2001/03/06 02:19:57 + Log: Retract the PMOP cleanup patch: I'm seeing strange core dumps + with oddly familiar stack traces. + Branch: perl + ! op.c op.h +____________________________________________________________________________ +[ 9053] By: jhi on 2001/03/05 23:20:28 + Log: Integrate perlio. + Branch: perl + +> ext/Encode/Encode/koi8-r.ucm + !> MANIFEST ext/Encode/Encode.pm ext/Encode/Makefile.PL + !> makedef.pl +____________________________________________________________________________ +[ 9052] By: nick on 2001/03/05 23:08:17 + Log: skipping USE_PERLIO symbols correction. + Branch: perlio + ! makedef.pl +____________________________________________________________________________ +[ 9051] By: nick on 2001/03/05 22:35:07 + Log: More Encode alias tidying. + Branch: perlio + + ext/Encode/Encode/koi8-r.ucm + ! MANIFEST ext/Encode/Encode.pm ext/Encode/Makefile.PL +____________________________________________________________________________ +[ 9050] By: jhi on 2001/03/05 21:44:29 + Log: Integrate changes #9033 and #9044 from mainline into maintperl, + Sarathy's fix for ID 20010301.005. + Branch: maint-5.6/perl + !> op.c op.h +____________________________________________________________________________ +[ 9049] By: nick on 2001/03/05 19:54:04 + Log: Integrate Jarkko's Encode.pm tweak. + Branch: perlio + !> ext/Encode/Encode.pm +____________________________________________________________________________ +[ 9048] By: nick on 2001/03/05 19:47:57 + Log: Integrate mainline (mostly - holding of on Encode.pm for a bit.) + Branch: perlio + +> os2/os2add.sym + !> lib/ExtUtils/MM_OS2.pm lib/ExtUtils/MM_Unix.pm makedef.pl op.c + !> op.h os2/Changes os2/Makefile.SHs os2/OS2/REXX/Makefile.PL + !> os2/OS2/REXX/REXX.pm os2/OS2/REXX/REXX.xs + !> os2/OS2/REXX/t/rx_cmprt.t os2/os2.c os2/os2.sym os2/os2ish.h + !> pod/perlapi.pod t/op/pat.t t/op/tr.t +____________________________________________________________________________ +[ 9047] By: jhi on 2001/03/05 19:25:26 + Log: Regen perlapi. + Branch: perl + ! pod/perlapi.pod +____________________________________________________________________________ +[ 9046] By: jhi on 2001/03/05 19:12:14 + Log: Integrate perlio. + Branch: perl + +> t/camel-III/vstring.t + !> MANIFEST pp.h pp_sys.c t/TEST t/harness +____________________________________________________________________________ +[ 9045] By: jhi on 2001/03/05 19:10:36 + Log: The mapping of ISO Latin X to ISO 8859-Y is tricky. + Branch: perl + ! ext/Encode/Encode.pm +____________________________________________________________________________ +[ 9044] By: jhi on 2001/03/05 18:49:12 + Log: Forgot the other half of the PMOP patch (#9033). + Branch: perl + ! op.h +____________________________________________________________________________ +[ 9043] By: nick on 2001/03/05 18:14:47 + Log: Update MANIFEST for new test. + Branch: perlio + ! MANIFEST +____________________________________________________________________________ +[ 9042] By: nick on 2001/03/05 18:12:41 + Log: Some tests for Camel 3rd edition features. + Make gethostbyaddr() test in above work. + Branch: perlio + + t/camel-III/vstring.t + ! pp.h pp_sys.c t/TEST t/harness +____________________________________________________________________________ +[ 9041] By: jhi on 2001/03/05 17:41:48 + Log: Add more ISO 8859 encoding aliases. + Branch: perl + ! ext/Encode/Encode.pm +____________________________________________________________________________ +[ 9040] By: jhi on 2001/03/05 17:30:43 + Log: Integrate perlio. + Branch: perl + !> MANIFEST sv.c +____________________________________________________________________________ +[ 9039] By: nick on 2001/03/05 17:11:36 + Log: Missed added file. + Branch: perlio + ! MANIFEST +____________________________________________________________________________ +[ 9038] By: jhi on 2001/03/05 15:58:01 + Log: Add tr tests for EBCDIC, from Karsten Sperling. + Branch: perl + ! t/op/tr.t +____________________________________________________________________________ +[ 9037] By: nick on 2001/03/05 15:56:35 + Log: Long-standing "defined but not used" SvPVbyte*() are about to get used + for auto-downgrade hooks in gethostbyaddr() etc. (as per Camel-III). + + Make SvPVbyte*() symetric with respect to SvPVutf8*() in that: + - it does a sv_utf8_downgrade (c.f. SvPVutf8's sv_utf8_upgrade()) + - uses SvPV() rather than sv_2pv() in case it isn't a string. + Branch: perlio + ! sv.c +____________________________________________________________________________ +[ 9036] By: jhi on 2001/03/05 15:49:15 + Log: Add matching tests currently failing in EBCDIC, + suggested by Karsten Sperling. + Branch: perl + ! t/op/pat.t +____________________________________________________________________________ +[ 9035] By: jhi on 2001/03/05 15:36:55 + Log: The #9034 needs a MANIFESTation. + Branch: perl + ! MANIFEST +____________________________________________________________________________ +[ 9034] By: jhi on 2001/03/05 15:35:14 + Log: Integrate perlio. + Branch: perl + +> ext/Encode/Encode/Tcl.pm + !> ext/Encode/Encode.pm ext/Encode/Encode.xs + !> ext/Encode/Encode/ascii.ucm ext/Encode/Encode/cp1250.ucm + !> ext/Encode/Encode/iso8859-1.ucm + !> ext/Encode/Encode/iso8859-10.ucm + !> ext/Encode/Encode/iso8859-13.ucm + !> ext/Encode/Encode/iso8859-14.ucm + !> ext/Encode/Encode/iso8859-15.ucm + !> ext/Encode/Encode/iso8859-16.ucm + !> ext/Encode/Encode/iso8859-2.ucm + !> ext/Encode/Encode/iso8859-3.ucm + !> ext/Encode/Encode/iso8859-4.ucm + !> ext/Encode/Encode/iso8859-5.ucm + !> ext/Encode/Encode/iso8859-6.ucm + !> ext/Encode/Encode/iso8859-7.ucm + !> ext/Encode/Encode/iso8859-8.ucm + !> ext/Encode/Encode/iso8859-9.ucm ext/Encode/compile + !> t/lib/encode.t +____________________________________________________________________________ +[ 9033] By: jhi on 2001/03/05 15:33:18 + Log: Subject: Re: [ID 20010301.005] corrupt memory (since @8531, in 5.6.1-T2) + From: Gurusamy Sarathy <gsar@ActiveState.com> + Date: Sun, 04 Mar 2001 22:33:15 -0800 + Message-Id: <200103050633.f256XFo06998@smtp3.ActiveState.com> + + PMOPs need to remember their own stash so that they can + gracefully remove themselves from their linked list. + Branch: perl + ! op.c +____________________________________________________________________________ +[ 9032] By: nick on 2001/03/05 14:51:50 + Log: Encode implementation "completion" + Implement and document define_encoding() + Implement and document encoding aliases including define_alias() + Make Encode::XS use define_encoding() rather than back-door. + Move run-time *.enc to separate Encode::Tcl module. + Make 'compile' honour <codeset_name> + Change canonical names of to iso-8859-* and US-ascii. + Branch: perlio + + ext/Encode/Encode/Tcl.pm + ! ext/Encode/Encode.pm ext/Encode/Encode.xs + ! ext/Encode/Encode/ascii.ucm ext/Encode/Encode/cp1250.ucm + ! ext/Encode/Encode/iso8859-1.ucm + ! ext/Encode/Encode/iso8859-10.ucm + ! ext/Encode/Encode/iso8859-13.ucm + ! ext/Encode/Encode/iso8859-14.ucm + ! ext/Encode/Encode/iso8859-15.ucm + ! ext/Encode/Encode/iso8859-16.ucm + ! ext/Encode/Encode/iso8859-2.ucm + ! ext/Encode/Encode/iso8859-3.ucm + ! ext/Encode/Encode/iso8859-4.ucm + ! ext/Encode/Encode/iso8859-5.ucm + ! ext/Encode/Encode/iso8859-6.ucm + ! ext/Encode/Encode/iso8859-7.ucm + ! ext/Encode/Encode/iso8859-8.ucm + ! ext/Encode/Encode/iso8859-9.ucm ext/Encode/compile + ! t/lib/encode.t +____________________________________________________________________________ +[ 9031] By: jhi on 2001/03/05 13:51:17 + Log: Integrate change #9030 from maintperl into mainline. + + Subject: [PATCH 5.6.1] OS/2 cleanup + Branch: perl + +> os2/os2add.sym + !> MANIFEST lib/ExtUtils/MM_OS2.pm lib/ExtUtils/MM_Unix.pm + !> makedef.pl os2/Changes os2/Makefile.SHs + !> os2/OS2/REXX/Makefile.PL os2/OS2/REXX/REXX.pm + !> os2/OS2/REXX/REXX.xs os2/OS2/REXX/t/rx_cmprt.t os2/os2.c + !> os2/os2.sym os2/os2ish.h +____________________________________________________________________________ +[ 9030] By: jhi on 2001/03/05 13:46:49 + Log: Subject: [PATCH 5.6.1] OS/2 cleanup + From: Ilya Zakharevich <ilya@math.ohio-state.edu> + Date: Mon, 5 Mar 2001 02:29:44 -0500 + Message-ID: <20010305022944.A10117@math.ohio-state.edu> + Branch: maint-5.6/perl + + os2/os2add.sym + ! MANIFEST lib/ExtUtils/MM_OS2.pm lib/ExtUtils/MM_Unix.pm + ! makedef.pl os2/Changes os2/Makefile.SHs + ! os2/OS2/REXX/Makefile.PL os2/OS2/REXX/REXX.pm + ! os2/OS2/REXX/REXX.xs os2/OS2/REXX/t/rx_cmprt.t os2/os2.c + ! os2/os2.sym os2/os2ish.h +____________________________________________________________________________ +[ 9029] By: jhi on 2001/03/05 13:40:35 + Log: Integrate change #9028 from maintperl into mainline. + + various nits in MM_Unix.pm found by disabling SelfLoader + Branch: perl + !> lib/ExtUtils/MM_Unix.pm +____________________________________________________________________________ +[ 9028] By: gsar on 2001/03/05 09:58:38 + Log: various nits in MM_Unix.pm found by disabling SelfLoader + Branch: maint-5.6/perl + ! lib/ExtUtils/MM_Unix.pm +____________________________________________________________________________ +[ 9027] By: nick on 2001/03/05 08:35:22 + Log: Integrate mainline + Branch: perlio + !> (integrate 28 files) +____________________________________________________________________________ +[ 9026] By: jhi on 2001/03/05 02:14:59 + Log: Integrate change #9025 from mainline to maintperl, + retract \N{U+HHHH}. + Branch: maint-5.6/perl + !> lib/charnames.pm pod/perldiag.pod pod/perlretut.pod + !> t/lib/charnames.t toke.c +____________________________________________________________________________ +[ 9025] By: jhi on 2001/03/05 02:13:09 + Log: Retracting \N{U+HHHH}. + Branch: perl + ! lib/charnames.pm pod/perldiag.pod pod/perlretut.pod + ! t/lib/charnames.t toke.c +____________________________________________________________________________ +[ 9024] By: jhi on 2001/03/04 20:51:07 + Log: Subject: Re: Smoking patch 8898 for perl v5.7.0 on aix 4.3.0.0 + From: "H.Merijn Brand" <h.m.brand@hccnet.nl> + Date: Fri, 02 Mar 2001 18:25:26 +0100 + Message-Id: <20010302182416.1BF4.H.M.BRAND@hccnet.nl> + + Casting problem. + Branch: perl + ! ext/Storable/Storable.xs +____________________________________________________________________________ +[ 9023] By: jhi on 2001/03/04 19:46:45 + Log: Subject: [ID 20010303.009] SOCKS5 work around breaks other sockets + From: nick@talking.bollo.cx + Date: Sat, 03 Mar 2001 21:41:33 +0000 + Message-Id: <E14ZJmP-0000mE-00@Bagpuss.unfortu.net> + + Revert the workaround. + Branch: perl + ! perlio.c +____________________________________________________________________________ +[ 9022] By: jhi on 2001/03/04 19:36:28 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ [ 9021] By: jhi on 2001/03/04 18:41:00 Log: makedef.pl updates suggested by Chris Nandor; double-checking the changes in AIX, OS/2 Win32 would be nice. diff --git a/Makefile.SH b/Makefile.SH index 43a1cc37b3..1799253fee 100644 --- a/Makefile.SH +++ b/Makefile.SH @@ -132,6 +132,15 @@ case "$osname" in ;; esac +# Handle the usage of different yaccs in posix-bc (During Configure we +# us yacc for perly.y and byacc for a2p.y. The makefiles must use the +# same configuration for run_byacc!): +case "$osname" in + posix-bc) + byacc=$yacc + ;; +esac + echo "Extracting Makefile (with variable substitutions)" $spitshell >Makefile <<!GROK!THIS! # Makefile.SH diff --git a/lib/ExtUtils/MM_Unix.pm b/lib/ExtUtils/MM_Unix.pm index e043b3cb3f..0f441d0e82 100644 --- a/lib/ExtUtils/MM_Unix.pm +++ b/lib/ExtUtils/MM_Unix.pm @@ -1097,8 +1097,8 @@ $(INST_DYNAMIC): $(OBJECT) $(MYEXTLIB) $(BOOTSTRAP) $(INST_ARCHAUTODIR)/.exists push(@m,' $(RM_F) $@ '); - push(@m,' LD_RUN_PATH="$(LD_RUN_PATH)" $(LD) -o $@ '.$ldrun.' $(LDDLFLAGS) '.$ldfrom. - ' $(OTHERLDFLAGS) $(MYEXTLIB) $(PERL_ARCHIVE) $(LDLOADLIBS) $(PERL_ARCHIVE_AFTER) $(EXPORT_LIST)'); + push(@m,' LD_RUN_PATH="$(LD_RUN_PATH)" $(LD) '.$ldrun.' $(LDDLFLAGS) '.$ldfrom. + ' $(OTHERLDFLAGS) -o $@ $(MYEXTLIB) $(PERL_ARCHIVE) $(LDLOADLIBS) $(PERL_ARCHIVE_AFTER) $(EXPORT_LIST)'); push @m, ' $(CHMOD) $(PERM_RWX) $@ '; diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL index bb253055d2..dbc2ed1842 100755 --- a/lib/unicode/mktables.PL +++ b/lib/unicode/mktables.PL @@ -31,7 +31,8 @@ mkdir "To", 0755; ['IsSpacePerl', '$cat =~ /^Z/ || $code =~ /^(0009|000A|000C|000D)$/', ''], - ['IsBlank', '$cat =~ /^Z[^lp]$/ || $code eq "0009"', ''], + ['IsBlank', '$code =~ /^(0020|0009)$/ || + $cat =~ /^Z[^lp]$/', ''], ['IsDigit', '$cat =~ /^Nd$/', ''], ['IsUpper', '$cat =~ /^L[ut]$/', ''], ['IsLower', '$cat =~ /^Ll$/', ''], diff --git a/patchlevel.h b/patchlevel.h index 1037fa580b..9c94a042e2 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -70,7 +70,7 @@ #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT) static char *local_patches[] = { NULL - ,"DEVEL9021" + ,"DEVEL9072" ,NULL }; @@ -745,6 +745,7 @@ perl_destruct(pTHXx) Safefree(PL_op_mask); Safefree(PL_psig_ptr); Safefree(PL_psig_name); + Safefree(PL_bitcount); Safefree(PL_psig_pend); nuke_stacks(); PL_hints = 0; /* Reset hints. Should hints be per-interpreter ? */ diff --git a/pod/perlop.pod b/pod/perlop.pod index 8f2ecde031..9e6634a5c3 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -242,14 +242,15 @@ operators, like C<-f>, C<-M>, etc. See L<perlfunc>. If any list operator (print(), etc.) or any unary operator (chdir(), etc.) is followed by a left parenthesis as the next token, the operator and arguments within parentheses are taken to be of highest precedence, -just like a normal function call. Examples: +just like a normal function call. For example, +because named unary operators are higher precedence than ||: chdir $foo || die; # (chdir $foo) || die chdir($foo) || die; # (chdir $foo) || die chdir ($foo) || die; # (chdir $foo) || die chdir +($foo) || die; # (chdir $foo) || die -but, because * is higher precedence than ||: +but, because * is higher precedence than named operators: chdir $foo * 20; # chdir ($foo * 20) chdir($foo) * 20; # (chdir $foo) * 20 diff --git a/pod/perlretut.pod b/pod/perlretut.pod index a77b87e125..fa6479c0c4 100644 --- a/pod/perlretut.pod +++ b/pod/perlretut.pod @@ -1720,29 +1720,32 @@ characters, $x =~ /^\p{IsLower}/; # doesn't match, lowercase char class $x =~ /^\P{IsLower}/; # matches, char class sans lowercase -If a C<name> is just one letter, the braces can be dropped. For -instance, C<\pM> is the character class of Unicode 'marks'. Here is -the association between some Perl named classes and the traditional -Unicode classes: +Here is the association between some Perl named classes and the +traditional Unicode classes: - Perl class name Unicode class name + Perl class name Unicode class name or regular expression - IsAlpha Lu, Ll, or Lo - IsAlnum Lu, Ll, Lo, or Nd - IsASCII $code le 127 - IsCntrl C + IsAlpha /^[LM]/ + IsAlnum /^[LMN]/ + IsASCII $code <= 127 + IsCntrl /^C/ + IsBlank $code =~ /^(0020|0009)$/ || /^Z[^lp]/ IsDigit Nd - IsGraph [^C] and $code ne "0020" + IsGraph /^([LMNPS]|Co)/ IsLower Ll - IsPrint [^C] - IsPunct P - IsSpace Z, or ($code lt "0020" and chr(hex $code) is a \s) - IsUpper Lu - IsWord Lu, Ll, Lo, Nd or $code eq "005F" + IsPrint /^([LMNPS]|Co|Zs)/ + IsPunct /^P/ + IsSpace /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/ + IsSpacePerl /^Z/ || ($code =~ /^(0009|000A|000C|000D)$/ + IsUpper /^L[ut]/ + IsWord /^[LMN]/ || $code eq "005F" IsXDigit $code =~ /^00(3[0-9]|[46][1-6])$/ -For a full list of Perl class names, consult the mktables.PL program -in the lib/perl5/5.6.0/unicode directory. +You can also use the official Unicode class names with the C<\p> and +C<\P>, like C<\p{L}> for Unicode 'letters', or C<\p{Lu}> for uppercase +letters, or C<\P{Nd}> for non-digits. If a C<name> is just one +letter, the braces can be dropped. For instance, C<\pM> is the +character class of Unicode 'marks'. C<\X> is an abbreviation for a character class sequence that includes the Unicode 'combining character sequences'. A 'combining character diff --git a/pod/perlsyn.pod b/pod/perlsyn.pod index e6b420e5db..aad4efd2f7 100644 --- a/pod/perlsyn.pod +++ b/pod/perlsyn.pod @@ -263,7 +263,7 @@ available. Replace any occurrence of C<if BLOCK> by C<if (do BLOCK)>. =head2 For Loops -Perl's C-style C<for> loop works exactly like the corresponding C<while> loop; +Perl's C-style C<for> loop works like the corresponding C<while> loop; that means that this: for ($i = 1; $i < 10; $i++) { @@ -279,8 +279,10 @@ is the same as this: $i++; } -(There is one minor difference: The first form implies a lexical scope -for variables declared with C<my> in the initialization expression.) +There is one minor difference: if variables are declared with C<my> +in the initialization section of the C<for>, the lexical scope of +those variables is exactly the C<for> loop (the body of the loop +and the control sections). Besides the normal array index looping, C<for> can lend itself to many other interesting applications. Here's one that avoids the @@ -745,9 +745,10 @@ PP(pp_schop) PP(pp_chop) { - dSP; dMARK; dTARGET; - while (SP > MARK) - do_chop(TARG, POPs); + dSP; dMARK; dTARGET; dORIGMARK; + while (MARK < SP) + do_chop(TARG, *++MARK); + SP = ORIGMARK; PUSHTARG; RETURN; } @@ -4622,7 +4622,7 @@ PP(pp_gnetent) if (which == OP_GNBYNAME) #ifdef HAS_GETNETBYNAME - nent = PerlSock_getnetbyname(POPpx); + nent = PerlSock_getnetbyname(POPpbytex); #else DIE(aTHX_ PL_no_sock_func, "getnetbyname"); #endif @@ -4710,7 +4710,7 @@ PP(pp_gprotoent) if (which == OP_GPBYNAME) #ifdef HAS_GETPROTOBYNAME - pent = PerlSock_getprotobyname(POPpx); + pent = PerlSock_getprotobyname(POPpbytex); #else DIE(aTHX_ PL_no_sock_func, "getprotobyname"); #endif @@ -4793,8 +4793,8 @@ PP(pp_gservent) if (which == OP_GSBYNAME) { #ifdef HAS_GETSERVBYNAME - char *proto = POPpx; - char *name = POPpx; + char *proto = POPpbytex; + char *name = POPpbytex; if (proto && !*proto) proto = Nullch; @@ -4806,7 +4806,7 @@ PP(pp_gservent) } else if (which == OP_GSBYPORT) { #ifdef HAS_GETSERVBYPORT - char *proto = POPpx; + char *proto = POPpbytex; unsigned short port = POPu; #ifdef HAS_HTONS @@ -5040,7 +5040,7 @@ PP(pp_gpwent) switch (which) { case OP_GPWNAM: - pwent = getpwnam(POPpx); + pwent = getpwnam(POPpbytex); break; case OP_GPWUID: pwent = getpwuid((Uid_t)POPi); @@ -5241,7 +5241,7 @@ PP(pp_ggrent) STRLEN n_a; if (which == OP_GGRNAM) - grent = (struct group *)getgrnam(POPpx); + grent = (struct group *)getgrnam(POPpbytex); else if (which == OP_GGRGID) grent = (struct group *)getgrgid(POPi); else diff --git a/t/camel-III/vstring.t b/t/camel-III/vstring.t index a252a3b3c8..7360ae7654 100644 --- a/t/camel-III/vstring.t +++ b/t/camel-III/vstring.t @@ -4,7 +4,7 @@ BEGIN { @INC = '../lib'; } use Test; -plan test => 6; +plan test => 5; # Error messages may have wide chars, say that is okay - if we can. eval { binmode STDOUT,":utf8" }; @@ -33,7 +33,7 @@ else # Chapter 28, pp671 ok(v5.6.0 lt v5.7.0,1,"v5.6.0 lt v5.7.0 fails"); -# Some floating-point risk here ... -my $v = ord($^V)+ord(substr($^V,1,1))/1000+ord(substr($^V,2,1))/1000000; -$v =~ s/^5\.006999\d+/5.007/; # floating point fun -ok($v,$],"\$^V and \$] do not match"); + +# floating point too messy +# my $v = ord($^V)+ord(substr($^V,1,1))/1000+ord(substr($^V,2,1))/1000000; +# ok($v,$],"\$^V and \$] do not match"); diff --git a/t/lib/charnames.t b/t/lib/charnames.t index 6a8a8be32d..9773a2025d 100644 --- a/t/lib/charnames.t +++ b/t/lib/charnames.t @@ -8,7 +8,7 @@ BEGIN { } $| = 1; -print "1..15\n"; +print "1..16\n"; use charnames ':full'; @@ -103,6 +103,18 @@ sub to_bytes { print "not " unless to_bytes("\N{DESERET SMALL LETTER ENG}") eq $encoded_deseng; print "ok 15\n"; +} +{ + # 20001114.001 + + if (ord("Ä") == 0xc4) { # Try to do this only on Latin-1. + use charnames ':full'; + my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; + print "not " unless $text eq "\xc4" && ord($text) == 0xc4; + print "ok 16\n"; + } else { + print "ok 16 # Skip: not Latin-1\n"; + } } diff --git a/t/op/chop.t b/t/op/chop.t index 9edddedaf7..1b55f11832 100755 --- a/t/op/chop.t +++ b/t/op/chop.t @@ -1,6 +1,6 @@ #!./perl -print "1..34\n"; +print "1..37\n"; # optimized @@ -104,6 +104,15 @@ $_ = "\x{1234}\x{2345}"; chop; print $_ eq "\x{1234}" ? "ok 33\n" : "not ok 33\n"; -# TODO! Make sure chop(LIST) returns the right value. my @stuff = qw(this that); -print chop(@stuff[0,1]) eq 't' ? "ok 34 # TODO\n" : "not ok 34 # TODO\n"; +print chop(@stuff[0,1]) eq 't' ? "ok 34\n" : "not ok 34\n"; + +# bug id 20010305.012 +@stuff = qw(ab cd ef); +print chop(@stuff = @stuff) eq 'f' ? "ok 35\n" : "not ok 35\n"; + +@stuff = qw(ab cd ef); +print chop(@stuff[0, 2]) eq 'f' ? "ok 36\n" : "not ok 36\n"; + +my %stuff = (1..4); +print chop(@stuff{1, 3}) eq '4' ? "ok 37\n" : "not ok 37\n"; diff --git a/t/op/pack.t b/t/op/pack.t index 67bd547c5b..4c169917ae 100755 --- a/t/op/pack.t +++ b/t/op/pack.t @@ -6,7 +6,7 @@ BEGIN { require Config; import Config; } -print "1..159\n"; +print "1..160\n"; $format = "c2 x5 C C x s d i l a6"; # Need the expression in here to force ary[5] to be numeric. This avoids @@ -416,3 +416,6 @@ print 'not ' unless v1.20.300.4000 ne sprintf "%vd", pack("C0U*",1,20,300,4000); print "ok $test\n"; $test++; +# 160 +print "not " unless join(" ", unpack("C*", chr(0x1e2))) eq "199 162"; +print "ok $test\n"; $test++; diff --git a/t/op/pat.t b/t/op/pat.t index 0c8810356b..711f9f08e9 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -4,7 +4,7 @@ # the format supported by op/regexp.t. If you want to add a test # that does fit that format, add it to op/re_tests, not here. -print "1..245\n"; +print "1..580\n"; BEGIN { chdir 't' if -d 't'; @@ -1228,3 +1228,310 @@ if (ord('i') == 0x89 && ord('J') == 0xd1) { # EBCDIC print "ok $_ # Skip: not EBCDIC\n"; } } + +print "not " unless "\x{ab}" =~ /\x{ab}/; +print "ok 246\n"; + +print "not " unless "\x{abcd}" =~ /\x{abcd}/; +print "ok 247\n"; + +{ + # bug id 20001008.001 + + use utf8; # BUG - should not be needed, but is, otherwise core dump + + my $test = 248; + my @x = ("stra\337e 138","stra\337e 138"); + for (@x) { + s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; + my($latin) = /^(.+)(?:\s+\d)/; + print $latin eq "stra\337e" ? "ok $test\n" : # 248,249 + "#latin[$latin]\nnot ok $test\n"; + $test++; + $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a + use utf8; + $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a + } +} + +{ + print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + print "ok 250\n"; + + print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + print "ok 251\n"; + + print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; + print "ok 252\n"; + + print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; + print "ok 253\n"; + + print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + print "ok 254\n"; + + print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + print "ok 255\n"; + + print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; + print "ok 256\n"; + + print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; + print "ok 257\n"; +} + +{ + # the first half of 20001028.003 + + my $X = chr(1448); + my ($Y) = $X =~ /(.*)/; + print "not " unless $Y eq v1448 && length($Y) == 1; + print "ok 258\n"; +} + +{ + # 20001108.001 + + my $X = "Szab\x{f3},Bal\x{e1}zs"; + my $Y = $X; + $Y =~ s/(B)/$1/ for 0..3; + print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs"; + print "ok 259\n"; +} + +{ + # the second half of 20001028.003 + + $X =~ s/^/chr(1488)/e; + print "not " unless length $X == 1 && ord($X) == 1488; + print "ok 260\n"; +} + +{ + # 20000517.001 + + my $x = "\x{100}A"; + + $x =~ s/A/B/; + + print "not " unless $x eq "\x{100}B" && length($x) == 2; + print "ok 261\n"; +} + +{ + # bug id 20001230.002 + + print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c'; + print "ok 262\n"; + + print "not " unless "École" =~ /^\C\C(c)/; + print "ok 263\n"; +} + +{ + my $test = 264; # till 575 + + use charnames ':full'; + + # This is far from complete testing, there are dozens of character + # classes in Unicode. The mixing of literals and \N{...} is + # intentional so that in non-Latin-1 places we test the native + # characters, not the Unicode code points. + + my %s = ( + "a" => 'Ll', + "\N{CYRILLIC SMALL LETTER A}" => 'Ll', + "A" => 'Lu', + "\N{GREEK CAPITAL LETTER ALPHA}" => 'Lu', + "\N{HIRAGANA LETTER SMALL A}" => 'Lo', + "\N{COMBINING GRAVE ACCENT}" => 'Mn', + "0" => 'Nd', + "\N{ARABIC-INDIC DIGIT ZERO}" => 'Nd', + "_" => 'N', + "!" => 'P', + " " => 'Zs', + "\0" => 'Cc', + ); + + for my $char (keys %s) { + my $class = $s{$char}; + my $code = sprintf("%04x", ord($char)); + printf "# 0x$code\n"; + print "# IsAlpha\n"; + if ($class =~ /^[LM]/) { + print "not " unless $char =~ /\p{IsAlpha}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsAlpha}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsAlpha}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsAlpha}/; + print "ok $test\n"; $test++; + } + print "# IsAlnum\n"; + if ($class =~ /^[LMN]/ && $char ne "_") { + print "not " unless $char =~ /\p{IsAlnum}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsAlnum}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsAlnum}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsAlnum}/; + print "ok $test\n"; $test++; + } + print "# IsASCII\n"; + if ($code <= 127) { + print "not " unless $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsASCII}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsASCII}/; + print "ok $test\n"; $test++; + } + print "# IsCntrl\n"; + if ($class =~ /^C/) { + print "not " unless $char =~ /\p{IsCntrl}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsCntrl}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsCntrl}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsCntrl}/; + print "ok $test\n"; $test++; + } + print "# IsBlank\n"; + if ($class =~ /^Z[lp]/ || $char eq " ") { + print "not " unless $char =~ /\p{IsBlank}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsBlank}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsBlank}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsBlank}/; + print "ok $test\n"; $test++; + } + print "# IsDigit\n"; + if ($class =~ /^Nd$/) { + print "not " unless $char =~ /\p{IsDigit}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsDigit}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsDigit}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsDigit}/; + print "ok $test\n"; $test++; + } + print "# IsGraph\n"; + if ($class =~ /^([LMNPS])|Co/) { + print "not " unless $char =~ /\p{IsGraph}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsGraph}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsGraph}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsGraph}/; + print "ok $test\n"; $test++; + } + print "# IsLower\n"; + if ($class =~ /^Ll$/) { + print "not " unless $char =~ /\p{IsLower}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsLower}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsLower}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsLower}/; + print "ok $test\n"; $test++; + } + print "# IsPrint\n"; + if ($class =~ /^([LMNPS])|Co|Zs/) { + print "not " unless $char =~ /\p{IsPrint}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsPrint}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsPrint}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsPrint}/; + print "ok $test\n"; $test++; + } + print "# IsPunct\n"; + if ($class =~ /^P/ || $char eq "_") { + print "not " unless $char =~ /\p{IsPunct}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsPunct}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsPunct}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsPunct}/; + print "ok $test\n"; $test++; + } + print "# IsSpace\n"; + if ($class =~ /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/)) { + print "not " unless $char =~ /\p{IsSpace}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsSpace}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsSpace}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsSpace}/; + print "ok $test\n"; $test++; + } + print "# IsUpper\n"; + if ($class =~ /^L[ut]/) { + print "not " unless $char =~ /\p{IsUpper}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsUpper}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsUpper}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsUpper}/; + print "ok $test\n"; $test++; + } + print "# IsWord\n"; + if ($class =~ /^[LMN]/ || $char eq "_") { + print "not " unless $char =~ /\p{IsWord}/; + print "ok $test\n"; $test++; + print "not " if $char =~ /\P{IsWord}/; + print "ok $test\n"; $test++; + } else { + print "not " if $char =~ /\p{IsWord}/; + print "ok $test\n"; $test++; + print "not " unless $char =~ /\P{IsWord}/; + print "ok $test\n"; $test++; + } + } +} + +{ + $_ = "abc\x{100}\x{200}\x{300}\x{380}\x{400}defg"; + + if (/(.\x{300})./) { + print "ok 576\n"; + + print "not " unless $` eq "abc\x{100}" && length($`) == 4; + print "ok 577\n"; + + print "not " unless $& eq "\x{200}\x{300}\x{380}" && length($&) == 3; + print "ok 578\n"; + + print "not " unless $' eq "\x{400}defg" && length($') == 5; + print "ok 579\n"; + + print "not " unless $1 eq "\x{200}\x{300}" && length($1) == 2; + print "ok 580\n"; + } +} diff --git a/t/op/split.t b/t/op/split.t index ffc29be899..ce8d64d947 100755 --- a/t/op/split.t +++ b/t/op/split.t @@ -1,6 +1,6 @@ #!./perl -print "1..32\n"; +print "1..44\n"; $FS = ':'; @@ -14,7 +14,7 @@ if (join(';',$a,$b,$c) eq 'a;b;c') {print "ok 1\n";} else {print "not ok 1\n";} if (join("$_",@ary) eq 'aa:b:cc') {print "ok 2\n";} else {print "not ok 2\n";} $_ = "abc\n"; -@xyz = (@ary = split(//)); +my @xyz = (@ary = split(//)); if (join(".",@ary) eq "a.b.c.\n") {print "ok 3\n";} else {print "not ok 3\n";} $_ = "a:b:c::::"; @@ -151,5 +151,89 @@ print "not " unless @ary == 3 && $ary[2] eq "\xFD\xFD" && $ary[2] eq "\x{FD}\xFD" && $ary[2] eq "\x{FD}\x{FD}"; - print "ok 32\n"; + + +{ + my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); + print "not " unless "@a" eq "1234 123 2345"; + print "ok 33\n"; +} + +{ + my $x = chr(123); + my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); + print "not " unless "@a" eq "1234 2345"; + print "ok 34\n"; +} + +{ + # bug id 20000427.003 + + use warnings; + use strict; + + my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; + + my @charlist = split //, $sushi; + my $r = ''; + foreach my $ch (@charlist) { + $r = $r . " " . sprintf "U+%04X", ord($ch); + } + + print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; + print "ok 35\n"; +} + +{ + # bug id 20000426.003 + + my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; + + my ($a, $b, $c) = split(/\x40/, $s); + print "not " + unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; + print "ok 36\n"; + + my ($a, $b) = split(/\x{100}/, $s); + print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; + print "ok 37\n"; + + my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); + print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; + print "ok 38\n"; + + my ($a, $b) = split(/\x40\x{80}/, $s); + print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; + print "ok 39\n"; + + my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); + print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; + print "ok 40\n"; +} + +{ + # 20001205.014 + + my $a = "ABC\x{263A}"; + + my @b = split( //, $a ); + + print "not " unless @b == 4; + print "ok 41\n"; + + print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}"; + print "ok 42\n"; + + $a =~ s/^A/Z/; + print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}"; + print "ok 43\n"; +} + +{ + my @a = split(/\xFE/, "\xFF\xFE\xFD"); + + print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD"; + print "ok 44\n"; +} + diff --git a/t/pragma/utf8.t b/t/pragma/utf8.t index 60e6c6e102..31d119137a 100755 --- a/t/pragma/utf8.t +++ b/t/pragma/utf8.t @@ -10,297 +10,30 @@ BEGIN { } } -print "1..109\n"; - -my $test = 1; - -sub ok { - my ($got,$expect) = @_; - print "# expected [$expect], got [$got]\nnot " if $got ne $expect; - print "ok $test\n"; -} - -sub nok { - my ($got,$expect) = @_; - print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; - print "ok $test\n"; -} - -sub ok_bytes { - use bytes; - my ($got,$expect) = @_; - print "# expected [$expect], got [$got]\nnot " if $got ne $expect; - print "ok $test\n"; -} - -sub nok_bytes { - use bytes; - my ($got,$expect) = @_; - print "# expected not [$expect], got [$got]\nnot " if $got eq $expect; - print "ok $test\n"; -} - -{ - use utf8; - - $_ = ">\x{263A}<"; - s/([\x{80}-\x{10ffff}])/"&#".ord($1).";"/eg; - ok $_, '>☺<'; - $test++; # 1 - - $_ = ">\x{263A}<"; - my $rx = "\x{80}-\x{10ffff}"; - s/([$rx])/"&#".ord($1).";"/eg; - ok $_, '>☺<'; - $test++; # 2 - - $_ = ">\x{263A}<"; - my $rx = "\\x{80}-\\x{10ffff}"; - s/([$rx])/"&#".ord($1).";"/eg; - ok $_, '>☺<'; - $test++; # 3 - - $_ = "alpha,numeric"; - m/([[:alpha:]]+)/; - ok $1, 'alpha'; - $test++; # 4 - - $_ = "alphaNUMERICstring"; - m/([[:^lower:]]+)/; - ok $1, 'NUMERIC'; - $test++; # 5 - - $_ = "alphaNUMERICstring"; - m/(\p{Ll}+)/; - ok $1, 'alpha'; - $test++; # 6 - - $_ = "alphaNUMERICstring"; - m/(\p{Lu}+)/; - ok $1, 'NUMERIC'; - $test++; # 7 - - $_ = "alpha,numeric"; - m/([\p{IsAlpha}]+)/; - ok $1, 'alpha'; - $test++; # 8 - - $_ = "alphaNUMERICstring"; - m/([^\p{IsLower}]+)/; - ok $1, 'NUMERIC'; - $test++; # 9 - - $_ = "alpha123numeric456"; - m/([\p{IsDigit}]+)/; - ok $1, '123'; - $test++; # 10 - - $_ = "alpha123numeric456"; - m/([^\p{IsDigit}]+)/; - ok $1, 'alpha'; - $test++; # 11 - - $_ = ",123alpha,456numeric"; - m/([\p{IsAlnum}]+)/; - ok $1, '123alpha'; - $test++; # 12 -} - -{ - # no use utf8 needed - $_ = "\x{263A}\x{263A}x\x{263A}y\x{263A}"; - - ok length($_), 6; # 13 - $test++; - - ($a) = m/x(.)/; - - ok length($a), 1; # 14 - $test++; - - ok length($`), 2; # 15 - $test++; - ok length($&), 2; # 16 - $test++; - ok length($'), 2; # 17 - $test++; - - ok length($1), 1; # 18 - $test++; - - ok length($b=$`), 2; # 19 - $test++; - - ok length($b=$&), 2; # 20 - $test++; - - ok length($b=$'), 2; # 21 - $test++; - - ok length($b=$1), 1; # 22 - $test++; - - ok $a, "\x{263A}"; # 23 - $test++; - - ok $`, "\x{263A}\x{263A}"; # 24 - $test++; - - ok $&, "x\x{263A}"; # 25 - $test++; - - ok $', "y\x{263A}"; # 26 - $test++; - - ok $1, "\x{263A}"; # 27 - $test++; - - ok_bytes $a, "\342\230\272"; # 28 - $test++; - - ok_bytes $1, "\342\230\272"; # 29 - $test++; - - ok_bytes $&, "x\342\230\272"; # 30 - $test++; - - { - use utf8; # required - $_ = chr(0x263A) . chr(0x263A) . 'x' . chr(0x263A) . 'y' . chr(0x263A); - } - - ok length($_), 6; # 31 - $test++; - - ($a) = m/x(.)/; - - ok length($a), 1; # 32 - $test++; - - ok length($`), 2; # 33 - $test++; - - ok length($&), 2; # 34 - $test++; - - ok length($'), 2; # 35 - $test++; - - ok length($1), 1; # 36 - $test++; - - ok length($b=$`), 2; # 37 - $test++; - - ok length($b=$&), 2; # 38 - $test++; - - ok length($b=$'), 2; # 39 - $test++; - - ok length($b=$1), 1; # 40 - $test++; - - ok $a, "\x{263A}"; # 41 - $test++; - - ok $`, "\x{263A}\x{263A}"; # 42 - $test++; - - ok $&, "x\x{263A}"; # 43 - $test++; - - ok $', "y\x{263A}"; # 44 - $test++; - - ok $1, "\x{263A}"; # 45 - $test++; - - ok_bytes $a, "\342\230\272"; # 46 - $test++; - - ok_bytes $1, "\342\230\272"; # 47 - $test++; - - ok_bytes $&, "x\342\230\272"; # 48 - $test++; - - $_ = "\342\230\272\342\230\272x\342\230\272y\342\230\272"; - - ok length($_), 14; # 49 - $test++; - - ($a) = m/x(.)/; - - ok length($a), 1; # 50 - $test++; - - ok length($`), 6; # 51 - $test++; - - ok length($&), 2; # 52 - $test++; - - ok length($'), 6; # 53 - $test++; - - ok length($1), 1; # 54 - $test++; - - ok length($b=$`), 6; # 55 - $test++; - - ok length($b=$&), 2; # 56 - $test++; - - ok length($b=$'), 6; # 57 - $test++; - - ok length($b=$1), 1; # 58 - $test++; - - ok $a, "\342"; # 59 - $test++; - - ok $`, "\342\230\272\342\230\272"; # 60 - $test++; - - ok $&, "x\342"; # 61 - $test++; - - ok $', "\230\272y\342\230\272"; # 62 - $test++; - - ok $1, "\342"; # 63 - $test++; -} - -{ - use utf8; - ok "\x{ab}" =~ /^\x{ab}$/, 1; - $test++; # 64 -} - -{ - use utf8; - ok_bytes chr(0x1e2), pack("C*", 0xc7, 0xa2); - $test++; # 65 -} - -{ - use utf8; - my @a = map ord, split(//, join("", map chr, (1234, 123, 2345))); - ok "@a", "1234 123 2345"; - $test++; # 66 -} - -{ - use utf8; - my $x = chr(123); - my @a = map ord, split(/$x/, join("", map chr, (1234, 123, 2345))); - ok "@a", "1234 2345"; - $test++; # 67 -} +# NOTE! +# +# Think carefully before adding tests here. In general this should be +# used only for about three categories of tests: +# +# (1) tests that absolutely require 'use utf8', and since that in general +# shouldn't be needed as the utf8 is being obsoleted, this should +# have rather few tests. If you want to test Unicode and regexes, +# you probably want to go to op/regexp or op/pat; if you want to test +# split, go to op/split; pack, op/pack; appending or joining, +# op/append or op/join, and so forth +# +# (2) tests that have to do with Unicode tokenizing (though it's likely +# that all the other Unicode tests sprinkled around the t/**/*.t are +# going to catch that) +# +# (3) complicated tests that simultaneously stress so many Unicode features +# that deciding into which other test script the tests should go to +# is hard -- maybe consider breaking up the complicated test +# +# + +use Test; +plan tests => 15; { # bug id 20001009.001 @@ -308,100 +41,29 @@ sub nok_bytes { my ($a, $b); { use bytes; $a = "\xc3\xa4" } - { use utf8; $b = "\xe4" } # \xXX must not produce UTF-8 + { use utf8; $b = "\xe4" } - print "not " if $a eq $b; - print "ok $test\n"; $test++; # 68 - - { use utf8; print "not " if $a eq $b; } - print "ok $test\n"; $test++; # 69 -} + my $test = 68; -{ - # bug id 20001008.001 - - my @x = ("stra\337e 138","stra\337e 138"); - for (@x) { - s/(\d+)\s*([\w\-]+)/$1 . uc $2/e; - my($latin) = /^(.+)(?:\s+\d)/; - print $latin eq "stra\337e" ? "ok $test\n" : # 70, 71 - "#latin[$latin]\nnot ok $test\n"; - $test++; - $latin =~ s/stra\337e/straße/; # \303\237 after the 2nd a - use utf8; - $latin =~ s!(s)tr(?:aß|s+e)!$1tr.!; # \303\237 after the a - } -} - -{ - # bug id 20000427.003 - - use utf8; - use warnings; - use strict; - - my $sushi = "\x{b36c}\x{5a8c}\x{ff5b}\x{5079}\x{505b}"; - - my @charlist = split //, $sushi; - my $r = ''; - foreach my $ch (@charlist) { - $r = $r . " " . sprintf "U+%04X", ord($ch); - } + ok($a ne $b); - print "not " unless $r eq " U+B36C U+5A8C U+FF5B U+5079 U+505B"; - print "ok $test\n"; # 72 - $test++; + { use utf8; ok($a ne $b) } } -{ - # bug id 20000426.003 - - use utf8; - - my $s = "\x20\x40\x{80}\x{100}\x{80}\x40\x20"; - - my ($a, $b, $c) = split(/\x40/, $s); - print "not " - unless $a eq "\x20" && $b eq "\x{80}\x{100}\x{80}" && $c eq $a; - print "ok $test\n"; - $test++; # 73 - - my ($a, $b) = split(/\x{100}/, $s); - print "not " unless $a eq "\x20\x40\x{80}" && $b eq "\x{80}\x40\x20"; - print "ok $test\n"; - $test++; # 74 - - my ($a, $b) = split(/\x{80}\x{100}\x{80}/, $s); - print "not " unless $a eq "\x20\x40" && $b eq "\x40\x20"; - print "ok $test\n"; - $test++; # 75 - - my ($a, $b) = split(/\x40\x{80}/, $s); - print "not " unless $a eq "\x20" && $b eq "\x{100}\x{80}\x40\x20"; - print "ok $test\n"; - $test++; # 76 - - my ($a, $b, $c) = split(/[\x40\x{80}]+/, $s); - print "not " unless $a eq "\x20" && $b eq "\x{100}" && $c eq "\x20"; - print "ok $test\n"; - $test++; # 77 -} { # bug id 20000730.004 - use utf8; - my $smiley = "\x{263a}"; - for my $s ("\x{263a}", # 78 - $smiley, # 79 + for my $s ("\x{263a}", + $smiley, - "" . $smiley, # 80 - "" . "\x{263a}", # 81 + "" . $smiley, + "" . "\x{263a}", - $smiley . "", # 82 - "\x{263a}" . "", # 83 + $smiley . "", + "\x{263a}" . "", ) { my $length_chars = length($s); my $length_bytes; @@ -410,21 +72,18 @@ sub nok_bytes { my $regex_chars = @regex_chars; my @split_chars = split //, $s; my $split_chars = @split_chars; - print "not " - unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq - "1/1/1/3"; - print "ok $test\n"; - $test++; + ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq + "1/1/1/3"); } - for my $s ("\x{263a}" . "\x{263a}", # 84 - $smiley . $smiley, # 85 + for my $s ("\x{263a}" . "\x{263a}", + $smiley . $smiley, - "\x{263a}\x{263a}", # 86 - "$smiley$smiley", # 87 + "\x{263a}\x{263a}", + "$smiley$smiley", - "\x{263a}" x 2, # 88 - $smiley x 2, # 89 + "\x{263a}" x 2, + $smiley x 2, ) { my $length_chars = length($s); my $length_bytes; @@ -433,160 +92,17 @@ sub nok_bytes { my $regex_chars = @regex_chars; my @split_chars = split //, $s; my $split_chars = @split_chars; - print "not " - unless "$length_chars/$regex_chars/$split_chars/$length_bytes" eq - "2/2/2/6"; - print "ok $test\n"; - $test++; + ok("$length_chars/$regex_chars/$split_chars/$length_bytes" eq + "2/2/2/6"); } } -{ - use utf8; - - print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; - print "ok $test\n"; - $test++; # 90 - - print "not " unless "ba\xd4c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; - print "ok $test\n"; - $test++; # 91 - - print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\x{d4}"; - print "ok $test\n"; - $test++; # 92 - - print "not " unless "ba\x{d4}c" =~ /([a\xd4]+)/ && $1 eq "a\xd4"; - print "ok $test\n"; - $test++; # 93 - - print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; - print "ok $test\n"; - $test++; # 94 - - print "not " unless "ba\xd4c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; - print "ok $test\n"; - $test++; # 95 - - print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\x{d4}"; - print "ok $test\n"; - $test++; # 96 - - print "not " unless "ba\x{d4}c" =~ /([a\x{d4}]+)/ && $1 eq "a\xd4"; - print "ok $test\n"; - $test++; # 97 -} { - # the first half of 20001028.003 - - my $X = chr(1448); - my ($Y) = $X =~ /(.*)/; - print "not " unless $Y eq v1448 && length($Y) == 1; - print "ok $test\n"; - $test++; # 98 -} - -{ - # 20001108.001 - - use utf8; - my $X = "Szab\x{f3},Bal\x{e1}zs"; - my $Y = $X; - $Y =~ s/(B)/$1/ for 0..3; - print "not " unless $Y eq $X && $X eq "Szab\x{f3},Bal\x{e1}zs"; - print "ok $test\n"; - $test++; # 99 -} - -{ - # 20001114.001 - - use utf8; - use charnames ':full'; - my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; - print "not " unless $text eq "\xc4" && ord($text) == 0xc4; - print "ok $test\n"; - $test++; # 100 -} - -{ - # 20001205.014 - - use utf8; - - my $a = "ABC\x{263A}"; - - my @b = split( //, $a ); - - print "not " unless @b == 4; - print "ok $test\n"; - $test++; # 101 - - print "not " unless length($b[3]) == 1 && $b[3] eq "\x{263A}"; - print "ok $test\n"; - $test++; # 102 - - $a =~ s/^A/Z/; - print "not " unless length($a) == 4 && $a eq "ZBC\x{263A}"; - print "ok $test\n"; - $test++; # 103 -} - -{ - # the second half of 20001028.003 - - use utf8; - $X =~ s/^/chr(1488)/e; - print "not " unless length $X == 1 && ord($X) == 1488; - print "ok $test\n"; - $test++; # 104 -} - -{ - # 20000517.001 - - my $x = "\x{100}A"; - - $x =~ s/A/B/; - - print "not " unless $x eq "\x{100}B" && length($x) == 2; - print "ok $test\n"; - $test++; # 105 -} - -{ - use utf8; - - my @a = split(/\xFE/, "\xFF\xFE\xFD"); - - print "not " unless @a == 2 && $a[0] eq "\xFF" && $a[1] eq "\xFD"; - print "ok $test\n"; - $test++; # 106 -} - -{ - use utf8; - my $w = 0; local $SIG{__WARN__} = sub { print "#($_[0])\n"; $w++ }; my $x = eval q/"\\/ . "\x{100}" . q/"/;; - print "not " unless $w == 0 && $x eq "\x{100}"; - print "ok $test\n"; - $test++; # 107 + ok($w == 0 && $x eq "\x{100}"); } -{ - # bug id 20001230.002 - - use utf8; - - print "not " unless "École" =~ /^\C\C(.)/ && $1 eq 'c'; - print "ok $test\n"; - $test++; # 108 - - print "not " unless "École" =~ /^\C\C(c)/; - print "ok $test\n"; - $test++; # 109 -} |