diff options
author | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-05-29 18:41:19 +0000 |
---|---|---|
committer | Nick Ing-Simmons <nik@tiuk.ti.com> | 2001-05-29 18:41:19 +0000 |
commit | a0f7c5349d9cbdebc03bb61d0662902819c72b0d (patch) | |
tree | 6bea7aec0b362bf7f11c510133b32a0b5cb1da45 | |
parent | 00aadd7184751f37937d2ec7edb2b9d1c8a55e0e (diff) | |
parent | 55bceba65f83da05702b3603a0967b74e0c73135 (diff) | |
download | perl-a0f7c5349d9cbdebc03bb61d0662902819c72b0d.tar.gz |
Post weekend integrate mainline (fails one test pragma/autouse).
p4raw-id: //depot/perlio@10299
117 files changed, 9993 insertions, 1051 deletions
@@ -31,6 +31,641 @@ or any other branch. Version v5.7.1 Development release working toward v5.8 -------------- ____________________________________________________________________________ +[ 10297] By: jhi on 2001/05/29 16:25:47 + Log: Fix Perl_swash_init & Perl_swash_fetch to save ERRSV (= $@) + before Perl_load_module/Perl_call_method and restore the value + after if !SvTRUE(ERRSV). (from Inaba Hiroto) + Branch: perl + ! utf8.c +____________________________________________________________________________ +[ 10296] By: jhi on 2001/05/29 16:01:53 + Log: Subject: Re: AIX and gcc (moving targets) + From: "H.Merijn Brand" <h.m.brand@hccnet.nl> + Date: Tue, 29 May 2001 17:59:40 +0200 + Message-Id: <20010529175841.7078.H.M.BRAND@hccnet.nl> + Branch: perl + ! ext/Storable/Makefile.PL +____________________________________________________________________________ +[ 10295] By: jhi on 2001/05/29 15:59:05 + Log: Subject: Re: AIX and gcc (moving targets) + From: "H.Merijn Brand" <h.m.brand@hccnet.nl> + Date: Tue, 29 May 2001 12:32:57 +0200 + Message-Id: <20010529115151.9FE0.H.M.BRAND@hccnet.nl> + Branch: perl + ! ext/POSIX/Makefile.PL +____________________________________________________________________________ +[ 10294] By: jhi on 2001/05/29 15:53:43 + Log: Syncing with Test 1.21, from Michael G Schwern (#10240 retracted) + Branch: perl + ! lib/Test/Harness.pm t/lib/test-harness.t +____________________________________________________________________________ +[ 10293] By: jhi on 2001/05/29 15:46:10 + Log: Syncing with Test 1.17, from Michael G Schwern. + Branch: perl + + t/lib/Test/fail.t t/lib/Test/mix.t t/lib/Test/onfail.t + + t/lib/Test/qr.t t/lib/Test/skip.t t/lib/Test/success.t + + t/lib/Test/todo.t + ! MANIFEST lib/Test.pm t/TEST +____________________________________________________________________________ +[ 10292] By: jhi on 2001/05/29 15:34:08 + Log: Allow tests to be in deeper subdirectories so we can have + things like t/lib/Some-Module/foo.t (from Michael G Schwern) + Branch: perl + ! t/TEST +____________________________________________________________________________ +[ 10291] By: jhi on 2001/05/29 15:29:37 + Log: Add tests for Time::gmtime and Time::localtime. + Branch: perl + + t/lib/time-gmtime.t t/lib/time-localtime.t + ! MANIFEST t/lib/1_compile.t +____________________________________________________________________________ +[ 10290] By: jhi on 2001/05/29 15:15:44 + Log: Add test for File::stat. + Branch: perl + + t/lib/filestat.t + ! MANIFEST t/lib/1_compile.t +____________________________________________________________________________ +[ 10289] By: jhi on 2001/05/29 15:05:38 + Log: Add test for Net::servent. + Branch: perl + + t/lib/net-sent.t + ! MANIFEST t/lib/1_compile.t +____________________________________________________________________________ +[ 10288] By: jhi on 2001/05/29 14:57:15 + Log: Add test for Net::protoent. + Branch: perl + + t/lib/net-pent.t + ! MANIFEST t/lib/1_compile.t +____________________________________________________________________________ +[ 10287] By: jhi on 2001/05/29 14:53:10 + Log: Add test for Net::netent. + Branch: perl + + t/lib/net-nent.t + ! MANIFEST t/lib/1_compile.t +____________________________________________________________________________ +[ 10286] By: jhi on 2001/05/29 14:31:57 + Log: Add test for User::grent. Portability doubtful. + Branch: perl + + t/lib/user-grent.t + ! MANIFEST t/lib/1_compile.t +____________________________________________________________________________ +[ 10285] By: jhi on 2001/05/29 14:24:20 + Log: Add test for User::pwent. + Probably will fall down somewhere for portability reasons. + Branch: perl + + t/lib/user-pwent.t + ! MANIFEST t/lib/1_compile.t +____________________________________________________________________________ +[ 10284] By: jhi on 2001/05/29 13:26:43 + Log: Metaconfig units changes for #10283. + Branch: metaconfig + ! U/installdirs/inc_version_list.U U/installdirs/perl5.U + ! U/installdirs/siteman1.U U/installdirs/siteman3.U + ! U/installdirs/sitescript.U U/modified/Signal.U +____________________________________________________________________________ +[ 10283] By: jhi on 2001/05/29 13:26:24 + Log: Subject: [PATCH 5.6.1] signal names in Configure + From: Ilya Zakharevich <ilya@math.ohio-state.edu> + Date: Tue, 29 May 2001 03:52:25 -0400 + Message-ID: <20010529035225.A9400@math.ohio-state.edu> + + Subject: [PATCH 5.6.1] older perl in Configure + From: Ilya Zakharevich <ilya@math.ohio-state.edu> + Date: Tue, 29 May 2001 03:55:40 -0400 + Message-ID: <20010529035539.A9411@math.ohio-state.edu> + + Subject: [PATCH 5.6.1] goofs in Configure + From: Ilya Zakharevich <ilya@math.ohio-state.edu> + Date: Tue, 29 May 2001 04:13:07 -0400 + Message-ID: <20010529041307.A9658@math.ohio-state.edu> + + Configure portability tweaks. + Branch: perl + ! Configure +____________________________________________________________________________ +[ 10282] By: jhi on 2001/05/29 12:41:41 + Log: Subject: [PATCH 5.6.1] extLibpath for OS/2 + From: Ilya Zakharevich <ilya@math.ohio-state.edu> + Date: Tue, 29 May 2001 04:07:06 -0400 + Message-ID: <20010529040706.A9579@math.ohio-state.edu> + Branch: perl + ! os2/os2.c +____________________________________________________________________________ +[ 10281] By: jhi on 2001/05/29 12:40:31 + Log: Subject: [PATCH 5.6.1] extra static libs for OS/2 + From: Ilya Zakharevich <ilya@math.ohio-state.edu> + Date: Tue, 29 May 2001 04:01:38 -0400 + Message-ID: <20010529040138.A9482@math.ohio-state.edu> + Branch: perl + ! hints/os2.sh os2/Makefile.SHs +____________________________________________________________________________ +[ 10280] By: jhi on 2001/05/29 12:38:48 + Log: Subject: Re: [PATCH 5.6.1] Test::Harness clumsy + From: Ilya Zakharevich <ilya@math.ohio-state.edu> + Date: Tue, 29 May 2001 03:50:18 -0400 + Message-ID: <20010529035018.A9387@math.ohio-state.edu> + Branch: perl + ! lib/Test/Harness.pm +____________________________________________________________________________ +[ 10279] By: jhi on 2001/05/29 12:34:53 + Log: Additional safeguard against $@ getting trampled; idea from Hugo. + Branch: perl + ! lib/utf8_heavy.pl +____________________________________________________________________________ +[ 10278] By: jhi on 2001/05/29 02:15:24 + Log: Subject: Re: [ID 20010528.004] dual bug under utf8: $@ has UTF8 flag and \s+ does not match + From: Hugo <hv@crypt.compulink.co.uk> + Date: Tue, 29 May 2001 03:03:45 +0100 + Message-Id: <200105290203.DAA00825@crypt.compulink.co.uk> + + Explanation why the $@ always gets the UTF8 flag when under use utf8-- + because we told it to have the flag when under use utf8. + Branch: perl + ! pp_ctl.c +____________________________________________________________________________ +[ 10277] By: jhi on 2001/05/29 00:51:34 + Log: At least a partial fix for 20010528.004. + Branch: perl + ! lib/utf8_heavy.pl +____________________________________________________________________________ +[ 10276] By: jhi on 2001/05/29 00:42:59 + Log: Subject: Re: Report /pro/3gl/CPAN/perl-5.7.1 + From: Nicholas Clark <nick@ccl4.org> + Date: Mon, 28 May 2001 23:39:38 +0100 + Message-ID: <20010528233938.M86445@plum.flirble.org> + + More portable non-zero UV. + Branch: perl + ! t/lib/extutils.t +____________________________________________________________________________ +[ 10275] By: jhi on 2001/05/29 00:40:28 + Log: Subject: [PATCH] (was Re: Why t/lib/extutils.t is failing ... + From: Nicholas Clark <nick@ccl4.org> + Date: Mon, 28 May 2001 22:46:09 +0100 + Message-ID: <20010528224608.L86445@plum.flirble.org> + + Test also "make clean". + Branch: perl + ! t/lib/extutils.t +____________________________________________________________________________ +[ 10274] By: jhi on 2001/05/29 00:39:18 + Log: Subject: Re: Would -Wno-unused -Wall be better? + From: Michael G Schwern <schwern@pobox.com> + Date: Tue, 29 May 2001 00:55:54 +0100 + Message-ID: <20010529005553.B675@blackrider.blackstar.co.uk> + Branch: perl + ! pp.h +____________________________________________________________________________ +[ 10273] By: jhi on 2001/05/29 00:36:06 + Log: Add a test for 20010528.007, fixed in #10272. + Branch: perl + ! t/op/misc.t toke.c +____________________________________________________________________________ +[ 10272] By: jhi on 2001/05/29 00:21:12 + Log: Subject: Re: [ID 20010528.007] "\x{" causes panic:constant overflowed allocated space + From: Hugo <hv@crypt.compulink.co.uk> + Date: Tue, 29 May 2001 00:23:23 +0100 + Message-Id: <200105282323.AAA07930@crypt.compulink.co.uk> + Branch: perl + ! toke.c +____________________________________________________________________________ +[ 10271] By: jhi on 2001/05/28 22:52:11 + Log: Subject: Re: [ID 20010528.001] use autouse 'URI::Escape' => qw(URI::Escape::uri_escape) failed + From: Michael G Schwern <schwern@pobox.com> + Date: Tue, 29 May 2001 00:42:59 +0100 + Message-ID: <20010529004259.A675@blackrider.blackstar.co.uk> + Branch: perl + ! t/pragma/autouse.t +____________________________________________________________________________ +[ 10270] By: jhi on 2001/05/28 21:44:06 + Log: Some shells seemingly arrange the signal handlers differently + (bug id 20010521.004). + Branch: perl + ! t/lib/sigaction.t +____________________________________________________________________________ +[ 10269] By: jhi on 2001/05/28 20:34:21 + Log: Regen perlmodlib. + Branch: perl + ! pod/perlmodlib.pod +____________________________________________________________________________ +[ 10268] By: jhi on 2001/05/28 19:08:45 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ +[ 10267] By: jhi on 2001/05/28 19:03:54 + Log: Subject: [PATCH] perlnewmod.pod to reflect DLSI(P) change + From: Elaine -HFB- Ashton <elaine@chaos.wustl.edu> + Date: Mon, 28 May 2001 14:56:35 -0500 + Message-ID: <20010528145635.L8487@chaos.wustl.edu> + Branch: perl + ! pod/perlnewmod.pod +____________________________________________________________________________ +[ 10266] By: jhi on 2001/05/28 18:35:03 + Log: Regen toc. + Branch: perl + ! pod/perltoc.pod +____________________________________________________________________________ +[ 10265] By: jhi on 2001/05/28 18:33:32 + Log: Subject: [PATCH] RE: [20000223.001] no test cases for splice(@array) + From: "Richard Soderberg" <rs@crystalflame.net> + Date: Mon, 28 May 2001 12:31:23 -0700 + Message-ID: <NAEKLNAAHLMBPMPNBMLEOEFLDFAA.rs@crystalflame.net> + Branch: perl + ! t/op/splice.t +____________________________________________________________________________ +[ 10264] By: jhi on 2001/05/28 17:59:00 + Log: The #10260 was too bold: locales and utf8 still do not mix. + Branch: perl + ! t/pragma/locale.t +____________________________________________________________________________ +[ 10263] By: jhi on 2001/05/28 17:52:25 + Log: Subject: [PATCH] Pod nitpicks + From: "Richard Soderberg" <rs@crystalflame.net> + Date: Mon, 28 May 2001 10:08:58 -0700 + Message-ID: <NAEKLNAAHLMBPMPNBMLEOEFGDFAA.rs@crystalflame.net> + Branch: perl + ! pod/perl571delta.pod pod/perlapi.pod sv.h +____________________________________________________________________________ +[ 10262] By: jhi on 2001/05/28 17:50:05 + Log: Upgrade to I18N::LangTags 0.22. + Branch: perl + + lib/I18N/LangTags/List.pod + ! MANIFEST lib/I18N/LangTags.pm +____________________________________________________________________________ +[ 10261] By: jhi on 2001/05/28 17:23:40 + Log: A slightly more serious bug found by -Mutf8; op/misc and + lib/complex dumped core. + Branch: perl + ! regexec.c +____________________________________________________________________________ +[ 10260] By: jhi on 2001/05/28 16:58:11 + Log: Various buglets shaken out by -Mutf8. + Branch: perl + ! t/io/utf8.t t/lib/charnames.t t/lib/lc-language.t + ! t/pragma/locale.t +____________________________________________________________________________ +[ 10259] By: jhi on 2001/05/28 15:48:46 + Log: STDERR looks much like STDOUT. (Subtest #2 wasn't really okay.) + Branch: perl + ! t/lib/carp.t +____________________________________________________________________________ +[ 10258] By: jhi on 2001/05/28 15:32:41 + Log: Subject: [PATCH] todo patch + From: Artur Bergman <artur@contiller.se> + Date: Mon, 28 May 2001 17:03:51 +0200 + Message-ID: <B7383577.F34%artur@contiller.se> + Branch: perl + ! pod/perltodo.pod +____________________________________________________________________________ +[ 10257] By: jhi on 2001/05/28 15:31:25 + Log: Subject: typo in perlguts.pod + From: "John P. Linderman" <jpl@research.att.com> + Date: Mon, 28 May 2001 09:35:47 -0400 (EDT) + Message-Id: <200105281335.JAA27851@raptor.research.att.com> + Branch: perl + ! pod/perlguts.pod +____________________________________________________________________________ +[ 10256] By: jhi on 2001/05/28 15:30:42 + Log: Subject: AIX and gcc (moving targets) + From: "H.Merijn Brand" <h.m.brand@hccnet.nl> + Date: Mon, 28 May 2001 12:54:04 +0200 + Message-Id: <20010528124531.9FAB.H.M.BRAND@hccnet.nl> + Branch: perl + ! hints/aix.sh +____________________________________________________________________________ +[ 10255] By: jhi on 2001/05/28 15:28:55 + Log: Subject: Re: perlio + multiple perl_alloc..destruct + From: Doug MacEachern <dougm@covalent.net> + Date: Sun, 27 May 2001 13:47:13 -0700 (PDT) + Message-ID: <Pine.LNX.4.21.0105271340370.5938-100000@mako.covalent.net> + Branch: perl + ! perlio.c +____________________________________________________________________________ +[ 10254] By: jhi on 2001/05/28 15:27:49 + Log: The #10251 wasn't quite up-to-the-code. + Branch: perl + ! t/op/misc.t +____________________________________________________________________________ +[ 10253] By: jhi on 2001/05/28 15:26:39 + Log: Test case for bug 20010526.004, fixed in #10252. + Branch: perl + ! t/op/taint.t +____________________________________________________________________________ +[ 10252] By: jhi on 2001/05/28 15:26:14 + Log: Subject: Re: [ID 20010526.004] Taint looses value + From: Hugo <hv@crypt.compulink.co.uk> + Date: Sun, 27 May 2001 20:39:32 +0100 + Message-Id: <200105271939.UAA27591@crypt.compulink.co.uk> + Branch: perl + ! sv.c +____________________________________________________________________________ +[ 10251] By: jhi on 2001/05/28 15:13:40 + Log: Test case for 20010422.005, fixed by #10250. + Branch: perl + ! t/op/misc.t +____________________________________________________________________________ +[ 10250] By: jhi on 2001/05/28 15:11:16 + Log: Subject: [PATCH] Re: [ID 20010422.005] perl -e '{s//${}/; //}' # segfaults on FreeBSD + From: Spider Boardman <spider@Orb.Nashua.NH.US> + Date: Mon, 28 May 2001 06:39:12 -0400 + Message-Id: <200105281039.GAA03962@Orb.Nashua.NH.US> + Branch: perl + ! toke.c +____________________________________________________________________________ +[ 10249] By: jhi on 2001/05/28 15:09:24 + Log: Linerewrapping. + Branch: perl + ! lib/open.pm +____________________________________________________________________________ +[ 10248] By: jhi on 2001/05/28 15:09:07 + Log: Updates on the modules list. + Branch: perl + ! t/lib/1_compile.t +____________________________________________________________________________ +[ 10247] By: jhi on 2001/05/28 15:03:35 + Log: Add a test for PerlIO. + + (I probably got the crlf/raw thing wrong for clrfy platforms...) + Branch: perl + + t/lib/perlio.t + ! MANIFEST lib/PerlIO.pm +____________________________________________________________________________ +[ 10246] By: jhi on 2001/05/28 14:24:08 + Log: Add a test for carp et alia. + Branch: perl + + t/lib/carp.t + ! MANIFEST +____________________________________________________________________________ +[ 10245] By: jhi on 2001/05/28 13:42:55 + Log: Adding the new test would be swell. + Branch: perl + + t/pragma/autouse.t +____________________________________________________________________________ +[ 10244] By: jhi on 2001/05/28 13:42:34 + Log: Add a test for the autouse pragma. + Branch: perl + ! MANIFEST +____________________________________________________________________________ +[ 10243] By: jhi on 2001/05/28 13:26:25 + Log: Subject: [ID 20010528.001] use autouse 'URI::Escape' => qw(URI::Escape::uri_escape) failed + From: dLux <dlux@spam.sch.bme.hu> + Date: Sun, 27 May 2001 16:14:26 +0200 + Message-Id: <E1541JK-0000YC-00@dl.sch.bme.hu> + Branch: perl + ! lib/autouse.pm +____________________________________________________________________________ +[ 10242] By: jhi on 2001/05/28 13:21:50 + Log: Subject: [PATCH #2] RE: [ID 20010528.002] dprofpp: "-R" does not work + From: "Richard Soderberg" <rs@crystalflame.net> + Date: Mon, 28 May 2001 03:56:36 -0700 + Message-ID: <NAEKLNAAHLMBPMPNBMLEEEFADFAA.rs@crystalflame.net> + Branch: perl + ! utils/dprofpp.PL +____________________________________________________________________________ +[ 10241] By: jhi on 2001/05/28 13:18:56 + Log: Subject: Re: [ID 20010522.003] Time::Local module bug + From: "Stephen P. Potter" <spp@spotter.yi.org> + Date: Tue, 22 May 2001 11:40:25 -0400 + Message-Id: <20010522154030.584F4729E2@belgarath.spotter.yi.org> + + Subject: Re: [ID 20010522.003] Time::Local module bug + From: Ronald J Kimball <rjk@linguist.Thayer.dartmouth.edu> + Date: Tue, 22 May 2001 11:50:19 -0400 + Message-ID: <20010522115019.D48634@linguist.thayer.dartmouth.edu> + Branch: perl + ! lib/Time/Local.pm +____________________________________________________________________________ +[ 10240] By: jhi on 2001/05/27 22:44:49 + Log: The PERL_DL_NONLAZY can have whitespace in front. + Branch: perl + ! t/lib/extutils.t +____________________________________________________________________________ +[ 10239] By: jhi on 2001/05/27 21:23:21 + Log: Document strftime() and strptime(). + Branch: perl + ! ext/POSIX/POSIX.pod ext/Time/Piece/Piece.pm t/lib/time-piece.t +____________________________________________________________________________ +[ 10238] By: jhi on 2001/05/27 20:29:07 + Log: Make Time::Piece::strptime() to be a function, not a method. + Branch: perl + ! ext/Time/Piece/Piece.pm t/lib/time-piece.t +____________________________________________________________________________ +[ 10237] By: jhi on 2001/05/27 20:22:09 + Log: Tweak the test to be more portable. + Branch: perl + ! t/lib/extutils.t +____________________________________________________________________________ +[ 10236] By: jhi on 2001/05/27 19:15:54 + Log: Subject: PATCH: Re: Re: Attributes that tie + From: Leon Brocard <acme@astray.com> + Date: Sun, 27 May 2001 12:37:29 +0100 + Message-ID: <20010527123729.A22663@ns0.astray.com> + + Document that variable attributes are not currently usable + for tieing. (An ugly limitation that should be fixed.) + Branch: perl + ! lib/attributes.pm +____________________________________________________________________________ +[ 10235] By: jhi on 2001/05/27 19:03:31 + Log: Microperl tweaks. + Branch: perl + ! Makefile.micro uconfig.h uconfig.sh +____________________________________________________________________________ +[ 10234] By: jhi on 2001/05/27 18:22:09 + Log: Add make target for microperl (kind of silly, but convenient). + Branch: perl + ! Makefile.SH +____________________________________________________________________________ +[ 10233] By: jhi on 2001/05/27 18:18:56 + Log: O_APPEND and O_TRUNC are not portable. (Not available e.g. + for microperl.) + Branch: perl + ! doio.c +____________________________________________________________________________ +[ 10232] By: jhi on 2001/05/27 17:57:18 + Log: Subject: [PATCH] [ID 19991013.005] utime undef, undef, @files + From: rspier@pobox.com (Robert Spier) + Date: Sat, 26 May 2001 20:05:23 -0400 + Message-ID: <15120.17603.148648.12430@rls.cx> + + Subject: Re: [PATCH] [ID 19991013.005] utime undef, undef, @files + From: rspier@pobox.com (Robert Spier) + Date: Sun, 27 May 2001 00:23:12 -0400 + Message-ID: <15120.33072.511966.767230@rls.cx> + Branch: perl + ! doio.c pod/perlfunc.pod +____________________________________________________________________________ +[ 10231] By: jhi on 2001/05/27 15:45:20 + Log: Regen toc. (And add the README.tru64 from #10230 to MANIFEST.) + Branch: perl + ! MANIFEST pod/buildtoc.PL pod/perl.pod pod/perltoc.pod +____________________________________________________________________________ +[ 10230] By: jhi on 2001/05/27 15:41:06 + Log: Add README.tru64. + Branch: perl + + README.tru64 +____________________________________________________________________________ +[ 10229] By: jhi on 2001/05/27 13:50:57 + Log: Integrate Locale::Maketext 1.01 from Sean Burke. + Branch: perl + + lib/Locale/Maketext.pm lib/Locale/Maketext.pod + + lib/Locale/Maketext/TPJ13.pod t/lib/lc-maketext.t + ! MANIFEST +____________________________________________________________________________ +[ 10228] By: jhi on 2001/05/27 13:43:38 + Log: Integrate I18N::LangTags from Sean Burke. + + TODO: the language list from RFC 3066 needs to be integrated + and made available somehow. The list is included in the + I18N-LangTags 0.21 distribution, but it is undocumented + and unconnected to the module. + Branch: perl + + lib/I18N/LangTags.pm t/lib/i18n-langtags.t + ! MANIFEST +____________________________________________________________________________ +[ 10227] By: jhi on 2001/05/27 01:41:33 + Log: Allow 'eval "v200"' to work (part of 20000323.059); fix as + envisioned by Sarathy. + Branch: perl + ! t/op/ver.t toke.c +____________________________________________________________________________ +[ 10226] By: jhi on 2001/05/27 00:28:34 + Log: Subject: [ID 20010525.001] Pod typo nits fixed + From: lvirden@cas.org + Date: Fri, 25 May 2001 06:57:43 -0400 (EDT) + Message-Id: <200105251057.f4PAvhY13003@lwv26awu.cas.org> + + minus the perlsolaris decimation plus the + + Subject: Re: [ID 20010525.001] Pod typo nits fixed + From: "Philip Newton" <Philip.Newton@gmx.net> + Date: Fri, 25 May 2001 18:05:55 +0200 + Message-Id: <200105251604.f4PG4kt15034@chaos.wustl.edu> + Branch: perl + ! README.amiga README.cygwin README.mpeix pod/perl5005delta.pod + ! pod/perldebtut.pod pod/perlebcdic.pod pod/perlfaq3.pod + ! pod/perlhack.pod pod/perltoc.pod pod/perltodo.pod + ! pod/perlutil.pod pod/perlxstut.pod +____________________________________________________________________________ +[ 10225] By: jhi on 2001/05/26 22:38:16 + Log: return clauses are nice. + Branch: perl + ! sv.c +____________________________________________________________________________ +[ 10224] By: jhi on 2001/05/26 22:35:31 + Log: Subject: change 10199 backwards? + From: Doug MacEachern <dougm@covalent.net> + Date: Sat, 26 May 2001 11:26:07 -0700 (PDT) + Message-ID: <Pine.LNX.4.21.0105261118510.22038-100000@mako.covalent.net> + Branch: perl + ! sv.c +____________________________________________________________________________ +[ 10223] By: jhi on 2001/05/26 22:31:46 + Log: Subject: Re: 5.6.*, bleadperl: bugs in pp_concat + From: Hugo <hv@crypt.compulink.co.uk> + Date: Sat, 26 May 2001 17:05:12 +0100 + Message-Id: <200105261605.RAA12295@crypt.compulink.co.uk> + Branch: perl + + t/op/gmagic.t + ! MANIFEST doop.c embed.h embed.pl global.sym + ! lib/File/Basename.pm objXSUB.h perlapi.c pod/perlapi.pod + ! pp_hot.c proto.h sv.c sv.h t/pragma/warn/pp_hot +____________________________________________________________________________ +[ 10222] By: jhi on 2001/05/26 22:10:38 + Log: Regen headers for #10221. + Branch: perl + ! global.sym objXSUB.h perlapi.c +____________________________________________________________________________ +[ 10221] By: jhi on 2001/05/26 22:06:06 + Log: Subject: [ID 20010506.012] Patch for 5.6.1 embed.pl (shared libperl&mod_perl) + From: Juha Laiho <juha.laiho@Elma.Net> + Date: Thu, 3 May 2001 09:51:30 +0300 + Message-Id: <200105030651.JAA327254@tokka.elma.fi> + Branch: perl + ! embed.pl +____________________________________________________________________________ +[ 10220] By: jhi on 2001/05/26 22:01:30 + Log: Subject: Re: [PATCH] Re: stability of sort()? + From: "John P. Linderman" <jpl@research.att.com> + Date: Sat, 26 May 2001 13:27:19 -0400 + Message-Id: <200105261727.NAA06654@raptor.research.att.com> + Branch: perl + ! pod/perlfunc.pod +____________________________________________________________________________ +[ 10219] By: jhi on 2001/05/26 14:02:34 + Log: Integrate perlio. + Branch: perl + !> pod/perlguts.pod +____________________________________________________________________________ +[ 10218] By: jhi on 2001/05/26 13:39:52 + Log: Subject: [PATCH perl@10210] PerlIO for VMS + From: "Craig A. Berry" <craigberry@mac.com> + Date: Sat, 26 May 2001 09:34:11 -0500 + Message-Id: <a05100e0ab734816701a5@[172.16.52.1]> + Branch: perl + ! configure.com doio.c iperlsys.h perlio.c perlio.h perliol.h + ! perlsdio.h vms/ext/Stdio/Stdio.xs vms/gen_shrfls.pl vms/vms.c + ! vms/vmsish.h +____________________________________________________________________________ +[ 10217] By: jhi on 2001/05/26 13:19:05 + Log: Subject: patch to fix: [ID 20010524.004] perl5db.pl version 1.12 doesn't stop on breakpoints + From: David Dyck <dcd@tc.fluke.com> + Date: Fri, 25 May 2001 00:03:04 -0700 (PDT) + Message-ID: <Pine.LNX.4.33.0105242354030.17331-100000@dd.tc.fluke.com> + Branch: perl + ! lib/perl5db.pl +____________________________________________________________________________ +[ 10216] By: jhi on 2001/05/26 13:17:47 + Log: Subject: utf8 regexp tests + From: Hugo <hv@crypt.compulink.co.uk> + Date: Fri, 25 May 2001 22:35:01 +0100 + Message-Id: <200105252135.WAA03197@crypt.compulink.co.uk> + Branch: perl + ! t/op/regexp.t +____________________________________________________________________________ +[ 10215] By: jhi on 2001/05/26 13:15:40 + Log: Subject: [PATCH] Re: stability of sort()? + From: Nicholas Clark <nick@ccl4.org> + Date: Fri, 25 May 2001 22:40:19 +0100 + Message-ID: <20010525224019.B86445@plum.flirble.org> + Branch: perl + ! pod/perlfunc.pod +____________________________________________________________________________ +[ 10214] By: jhi on 2001/05/26 13:14:30 + Log: Subject: Re: [ID 20010426.005] Magic not being removed at scope exit [PATCH] + From: John Peacock <jpeacock@rowman.com> + Date: Thu, 24 May 2001 22:14:01 -0400 + Message-ID: <3B0DBFE9.A7C49084@rowman.com> + Branch: perl + ! mg.c scope.c sv.c +____________________________________________________________________________ +[ 10213] By: jhi on 2001/05/26 13:08:56 + Log: Subject: [PATCH] Re: [PATCH] Re: h2xs [was Re: HEAR YE, HEAR YE!] + From: Nicholas Clark <nick@ccl4.org> + Date: Sun, 20 May 2001 19:24:13 +0100 + Message-ID: <20010520192413.G83222@plum.flirble.org> + Branch: perl + + lib/ExtUtils/Constant.pm t/lib/extutils.t + ! MANIFEST utils/h2xs.PL +____________________________________________________________________________ +[ 10212] By: nick on 2001/05/26 09:49:28 + Log: Change perlguts docs to not suggest PUSHi etc. for multiple results, + add a few more notes there on use of mortals on the stack. + Branch: perlio + ! pod/perlguts.pod +____________________________________________________________________________ +[ 10211] By: nick on 2001/05/26 09:05:36 + Log: Integrate mainline + Branch: perlio + +> t/lib/fcntl.t t/pragma/vars.t + !> (integrate 49 files) +____________________________________________________________________________ +[ 10210] By: jhi on 2001/05/25 12:29:16 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ [ 10209] By: jhi on 2001/05/25 12:24:45 Log: Based on @@ -20,7 +20,7 @@ # $Id: Head.U,v 3.0.1.9 1997/02/28 15:02:09 ram Exp $ # -# Generated on Thu May 24 17:54:36 EET DST 2001 [metaconfig 3.0 PL70] +# Generated on Tue May 29 17:16:31 EET DST 2001 [metaconfig 3.0 PL70] # (with additional metaconfig patches by perlbug@perl.org) cat >c1$$ <<EOF @@ -6692,13 +6692,13 @@ fi : Find perl5.005 or later. echo "Looking for a previously installed perl5.005 or later... " case "$perl5" in -'') for tdir in `echo "$binexp:$PATH" | $sed "s/$path_sep/ /g"`; do +'') for tdir in `echo "$binexp$path_sep$PATH" | $sed "s/$path_sep/ /g"`; do : Check if this perl is recent and can load a simple module - if $test -x $tdir/perl && $tdir/perl -Mless -e 'use 5.005;' >/dev/null 2>&1; then + if $test -x $tdir/perl$exe_ext && $tdir/perl -Mless -e 'use 5.005;' >/dev/null 2>&1; then perl5=$tdir/perl break; - elif $test -x $tdir/perl5 && $tdir/perl5 -Mless -e 'use 5.005;' >/dev/null 2>&1; then - perl5=$tdir/perl + elif $test -x $tdir/perl5$exe_ext && $tdir/perl5 -Mless -e 'use 5.005;' >/dev/null 2>&1; then + perl5=$tdir/perl5 break; fi done @@ -6763,7 +6763,7 @@ else { EOPL chmod +x getverlist case "$inc_version_list" in -'') if test -x "$perl5"; then +'') if test -x "$perl5$exe_ext"; then dflt=`$perl5 getverlist` else dflt='none' @@ -14588,7 +14588,7 @@ else xxx=`echo '#include <signal.h>' | $cppstdin $cppminus $cppflags 2>/dev/null | $grep '^[ ]*#.*include' | - $awk "{print \\$$fieldn}" | $sed 's!"!!g' | $sort | $uniq` + $awk "{print \\$$fieldn}" | $sed 's!"!!g' | $sed 's!\\\\\\\\!/!g' | $sort | $uniq` fi : Check this list of files to be sure we have parsed the cpp output ok. : This will also avoid potentially non-existent files, such @@ -57,6 +57,7 @@ README.plan9 Notes about Plan9 port README.qnx Notes about QNX port README.solaris Notes about Solaris port README.threads Notes about multithreading +README.tru64 Notes about Tru64 README.vmesa Notes about VM/ESA port README.vms Notes about installing the VMS port README.vos Notes about Stratus VOS port @@ -720,6 +721,7 @@ lib/Env.pm Map environment into ordinary variables lib/Exporter.pm Exporter base class lib/Exporter/Heavy.pm Complicated routines for Exporter lib/ExtUtils/Command.pm Utilities for Make on non-UNIX platforms +lib/ExtUtils/Constant.pm generate XS code to import C header constants lib/ExtUtils/Embed.pm Utilities for embedding Perl in C programs lib/ExtUtils/Install.pm Handles 'make install' on extensions lib/ExtUtils/Installed.pm Information on installed extensions @@ -764,12 +766,17 @@ lib/FindBin.pm Find name of currently executing program lib/Getopt/Long.pm Fetch command options (GetOptions) lib/Getopt/Std.pm Fetch command options (getopt, getopts) lib/I18N/Collate.pm Routines to do strxfrm-based collation +lib/I18N/LangTags.pm I18N::LangTags +lib/I18N/LangTags/List.pod list of tags for human languages lib/IPC/Open2.pm Open a two-ended pipe lib/IPC/Open3.pm Open a three-ended pipe! lib/Locale/Constants.pm Locale::Codes lib/Locale/Country.pm Locale::Codes lib/Locale/Currency.pm Locale::Codes lib/Locale/Language.pm Locale::Codes +lib/Locale/Maketext.pm Locale::Maketext +lib/Locale/Maketext.pod Locale::Maketext documentation +lib/Locale/Maketext/TPJ13.pod Locale::Maketext documentation article lib/Math/BigFloat.pm An arbitrary precision floating-point arithmetic package lib/Math/BigInt.pm An arbitrary precision integer arithmetic package lib/Math/Complex.pm A Complex package @@ -1446,6 +1453,13 @@ t/io/tell.t See if file seeking works t/io/utf8.t See if file seeking works t/lib/1_compile.t See if the various libraries and extensions compile t/lib/MyFilter.pm Helper file for t/lib/filter-simple.t +t/lib/Test/fail.t See if Test works +t/lib/Test/mix.t See if Test works +t/lib/Test/onfail.t See if Test works +t/lib/Test/qr.t See if Test works +t/lib/Test/skip.t See if Test works +t/lib/Test/success.t See if Test works +t/lib/Test/todo.t See if Test works t/lib/abbrev.t See if Text::Abbrev works t/lib/ansicolor.t See if Term::ANSIColor works t/lib/anydbm.t See if AnyDBM_File works @@ -1461,6 +1475,7 @@ t/lib/bigfloat.t See if bigfloat.pl works t/lib/bigfltpm.t See if BigFloat.pm works t/lib/bigint.t See if bigint.pl works t/lib/bigintpm.t See if BigInt.pm works +t/lib/carp.t See if Carp works t/lib/cgi-esc.t See if CGI.pm works t/lib/cgi-form.t See if CGI.pm works t/lib/cgi-function.t See if CGI.pm works @@ -1504,6 +1519,7 @@ t/lib/env-array.t See if Env works for arrays t/lib/env.t See if Env works t/lib/errno.t See if Errno works t/lib/exporter.t See if Exporter works +t/lib/extutils.t See if extutils work t/lib/fatal.t See if Fatal works t/lib/fcntl.t See if Fcntl works t/lib/fields.t See if base/fields works @@ -1514,6 +1530,7 @@ t/lib/filefunc.t See if File::Spec::Functions works t/lib/filehand.t See if FileHandle works t/lib/filepath.t See if File::Path works t/lib/filespec.t See if File::Spec works +t/lib/filestat.t See if File::stat works t/lib/filter-simple.t See if Filter::Simple works t/lib/filter-util.pl See if Filter::Util::Call works t/lib/filter-util.t See if Filter::Util::Call works @@ -1537,6 +1554,7 @@ t/lib/h2ph.pht Generated output from h2ph.h by h2ph, for comparison t/lib/h2ph.t See if h2ph works like it should t/lib/hostname.t See if Sys::Hostname works t/lib/i18n-collate.t See if I18N::Collate works +t/lib/i18n-langtags.t See if I18N::LangTags work t/lib/io_const.t See if constants from IO work t/lib/io_dir.t See if directory-related methods from IO work t/lib/io_dup.t See if dup()-related methods from IO work @@ -1558,6 +1576,7 @@ t/lib/lc-constants.t See if Locale::Codes work t/lib/lc-country.t See if Locale::Codes work t/lib/lc-currency.t See if Locale::Codes work t/lib/lc-language.t See if Locale::Codes work +t/lib/lc-maketext.t See if Locale::Maketext works t/lib/lc-uk.t See if Locale::Codes work t/lib/md5-aaa.t See if Digest::MD5 extension works t/lib/md5-align.t See if Digest::MD5 extension works @@ -1568,6 +1587,9 @@ t/lib/mimeb64u.t see whether MIME::Base64 works t/lib/mimeqp.t see whether MIME::QuotedPrint works t/lib/ndbm.t See if NDBM_File works t/lib/net-hostent.t See if Net::hostent works +t/lib/net-nent.t See if Net::netent works +t/lib/net-pent.t See if Net::protoent works +t/lib/net-sent.t See if Net::servtent works t/lib/next.t See if NEXT works t/lib/odbm.t See if ODBM_File works t/lib/opcode.t See if Opcode works @@ -1576,6 +1598,7 @@ t/lib/open3.t See if IPC::Open3 works t/lib/ops.t See if Opcode works t/lib/parsewords.t See if Text::ParseWords works t/lib/peek.t See if Devel::Peek works +t/lib/perlio.t See if PerlIO works t/lib/ph.t See if h2ph works t/lib/posix.t See if POSIX works t/lib/safe1.t See if Safe works @@ -1640,7 +1663,9 @@ t/lib/tie-stdarray.t Test for Tie::StdArray t/lib/tie-stdhandle.t Test for Tie::StdHandle t/lib/tie-stdpush.t Test for Tie::StdArray t/lib/tie-substrhash.t Test for Tie::SubstrHash +t/lib/time-gmtime.t Test for Time::gmtime t/lib/time-hires.t Test for Time::HiRes +t/lib/time-localtime.t Test for Time::localtime t/lib/time-piece.t Test for Time::Piece t/lib/timelocal.t See if Time::Local works t/lib/trig.t See if Math::Trig works @@ -1657,6 +1682,8 @@ t/lib/u-reftype.t Scalar::Util t/lib/u-sum.t List::Util t/lib/u-tainted.t Scalar::Util t/lib/u-weak.t Scalar::Util +t/lib/user-grent.t See if User::grwent works +t/lib/user-pwent.t See if User::pwent works t/lib/xs-typemap.t test that typemaps work t/op/64bitint.t See if 64 bit integers work t/op/anonsub.t See if anonymous subroutines work @@ -1692,6 +1719,7 @@ t/op/filetest.t See if file tests work t/op/flip.t See if range operator works t/op/fork.t See if fork works t/op/glob.t See if <*> works +t/op/gmagic.t See if GMAGIC works t/op/goto.t See if goto works t/op/goto_xs.t See if "goto &sub" works on XSUBs t/op/grent.t See if getgr*() functions work @@ -1799,6 +1827,7 @@ t/pod/testcmp.pl Module to compare output against expected results t/pod/testp2pt.pl Module to test Pod::PlainText for a given file t/pod/testpchk.pl Module to test Pod::Checker for a given file t/pod/testpods/lib/Pod/Stuff.pm Sample data for find.t +t/pragma/autouse.t See if autouse works t/pragma/constant.t See if compile-time constants work t/pragma/diagnostics.t See if diagnostics.pm works t/pragma/locale.t See if locale support works diff --git a/Makefile.SH b/Makefile.SH index 340303b81a..9ba53130c0 100644 --- a/Makefile.SH +++ b/Makefile.SH @@ -558,6 +558,13 @@ perl.gprof: /usr/bin/gprof perl.gprof.config $(MAKE) PERL_SUFFIX=.gprof PERL_PROFILING=-pg perl @echo "Now you may run perl.gprof and then run gprof perl.gprof." +# Microperl. This is just a convenience thing if one happens to +# build also the full Perl and therefore the real big Makefile: +# usually one should manually explicitly issue the below command. + +microperl: + $(MAKE) -f Makefile.micro + # This version, if specified in Configure, does ONLY those scripts which need # set-id emulation. Suidperl must be setuid root. It contains the "taint" # checks as well as the special code to validate that the script in question diff --git a/Makefile.micro b/Makefile.micro index 1ac87b4ed7..304db0b972 100644 --- a/Makefile.micro +++ b/Makefile.micro @@ -1,7 +1,8 @@ CC = cc LD = $(CC) DEFINES = -DPERL_CORE -DPERL_MICRO -CFLAGS = $(DEFINES) +OPTIMIZE = +CFLAGS = $(DEFINES) $(OPTIMIZE) LIBS = -lm _O = .o diff --git a/README.tru64 b/README.tru64 new file mode 100644 index 0000000000..c551a36594 --- /dev/null +++ b/README.tru64 @@ -0,0 +1,82 @@ +If you read this file _as_is_, just ignore the funny characters you see. +It is written in the POD format (see pod/perlpod.pod) which is specially +designed to be readable as is. + +=head1 NAME + +README.tru64 - Perl version 5 on Tru64 (formerly known as Digital UNIX formerly known as DEC OSF/1) systems + +=head1 DESCRIPTION + +This document describes various features of Compaq's (formerly Digital's) +Unix operating system (Tru64) that will affect how Perl version 5 +is compiled and/or runs. + +=head2 Compiling Perl 5 on Tru64 + +The recommended compiler to use in Tru64 is the native C compiler. +The native compiler produces much faster code (the speed difference is +noticeable: several dozen percentages) and also more correct code: if you +are considering using the GNU C compiler you should use the gcc 2.95.3 +release since older gcc releases are known to produce buggy code when +compiling Perl. + +=head2 Using Large Files with Perl on Tru64 + +In Tru64 Perl is automatically able to use large files, that is, files +larger than 2 gigabytes, there is no need to use the Configure +-Duselargefiles option as described in INSTALL. + +=head2 Threaded Perl on Tru64 + +To compile Perl to use the old Perl 5.005 threads model, run Configure +with the -Dusethreads -Duse5005threads options as described in INSTALL. +This will probably only work in Tru64 4.0 and newer releases, older +operating releases like 3.2 aren't probably going to work properly +with threads. + +Beware: the Perl 5.005 threads model is known to have bugs, for +example the regular expressions are not thread-safe. The bugs are +very hard to fix are and therefore the 5.005 threads model is still +classified as an experimental feature. + +=head2 64-bit Perl on Tru64 + +In Tru64 Perl's integers are automatically 64-bit wide, there is +no need to use the Configure -Duse64bitint option as described +in INSTALL. Similarly, there is no need for -Duse64bitall. + +=head2 Warnings about floating-point overflow when compiling Perl on Tru64 + +When compiling Perl in Tru64 you may (depending on the compiler +release) see two warnings like this + + cc: Warning: util.c, line 3797: In this statement, floating-point overflow occurs in evaluating the expression "1.8e308". (floatoverfl) + return HUGE_VAL; + -----------^ + + cc: Warning: POSIX.xs, line 1304: In this statement, floating-point overflow occurs in evaluating the expression "1.8e308". (floatoverfl) + return HUGE_VAL; + -------------------^ + +The exact line numbers may vary between Perl releases. +The warnings are benign and can be ignored. + +When the file F<pp_sys.c> is being compiled you may (depending on the +operating system release) see an additional compiler flag being used: +C<-DNO_EFF_ONLY_OK>. This is normal and refers to a feature that is +relevant only if you use the C<filetest> pragma. In older releases of +the operating system the feature was broken and the NO_EFF_ONLY_OK +instructs Perl not to use the feature. + +=head1 Testing Perl on Tru64 + +During "make test" the C<comp/cpp> will be skipped because on Tru64 it +cannot be tested before Perl has been installed. The test refers to +the use of the C<-P> option of Perl. + +=head1 AUTHOR + +Jarkko Hietaniemi <jhi@iki.fi> + +=cut diff --git a/configure.com b/configure.com index 209f4ecdfc..3beba6980f 100644 --- a/configure.com +++ b/configure.com @@ -4672,7 +4672,7 @@ $ d_locconv="undef" $ d_setlocale="undef" $ ENDIF $ d_stdio_ptr_lval_sets_cnt="undef" -$ d_stdio_ptr_lval_nochange_cnt="undef" +$ d_stdio_ptr_lval_nochange_cnt="define" $! $! Sockets? $ if Has_Socketshr .OR. Has_Dec_C_Sockets @@ -143,7 +143,14 @@ Perl_do_openn(pTHX_ GV *gv, register char *name, I32 len, int as_raw, if (num_svs != 0) { Perl_croak(aTHX_ "panic: sysopen with multiple args"); } - if (rawmode & (O_WRONLY|O_RDWR|O_APPEND|O_CREAT|O_TRUNC)) + if (rawmode & (O_WRONLY|O_RDWR|O_CREAT +#ifdef O_APPEND /* Not fully portable. */ + |O_APPEND +#endif +#ifdef O_TRUNC /* Not fully portable. */ + |O_TRUNC +#endif + )) TAINT_PROPER("sysopen"); mode[ix++] = '#'; /* Marker to openn to use numeric "sysopen" */ @@ -566,7 +573,7 @@ Perl_do_openn(pTHX_ GV *gv, register char *name, I32 len, int as_raw, #ifdef VMS if (savefd != PerlIO_fileno(PerlIO_stdin())) { char newname[FILENAME_MAX+1]; - if (fgetname(fp, newname)) { + if (PerlIO_getname(fp, newname)) { if (fd == PerlIO_fileno(PerlIO_stdout())) Perl_vmssetuserlnm(aTHX_ "SYS$OUTPUT", newname); if (fd == PerlIO_fileno(PerlIO_stderr())) Perl_vmssetuserlnm(aTHX_ "SYS$ERROR", newname); } @@ -1667,20 +1674,31 @@ nothing in the core. } utbuf; #endif + SV* accessed = *++mark; + SV* modified = *++mark; + void * utbufp = &utbuf; + + /* be like C, and if both times are undefined, let the C + library figure out what to do. This usually means + "current time" */ + + if ( accessed == &PL_sv_undef && modified == &PL_sv_undef ) + utbufp = NULL; + Zero(&utbuf, sizeof utbuf, char); #ifdef BIG_TIME - utbuf.actime = (Time_t)SvNVx(*++mark); /* time accessed */ - utbuf.modtime = (Time_t)SvNVx(*++mark); /* time modified */ + utbuf.actime = (Time_t)SvNVx(accessed); /* time accessed */ + utbuf.modtime = (Time_t)SvNVx(modified); /* time modified */ #else - utbuf.actime = (Time_t)SvIVx(*++mark); /* time accessed */ - utbuf.modtime = (Time_t)SvIVx(*++mark); /* time modified */ + utbuf.actime = (Time_t)SvIVx(accessed); /* time accessed */ + utbuf.modtime = (Time_t)SvIVx(modified); /* time modified */ #endif APPLY_TAINT_PROPER(); tot = sp - mark; while (++mark <= sp) { char *name = SvPVx(*mark, n_a); APPLY_TAINT_PROPER(); - if (PerlLIO_utime(name, &utbuf)) + if (PerlLIO_utime(name, utbufp)) tot--; } } @@ -2103,7 +2121,6 @@ Perl_start_glob (pTHX_ SV *tmpglob, IO *io) char rslt[NAM$C_MAXRSS+1+sizeof(unsigned short int)] = {'\0','\0'}; char vmsspec[NAM$C_MAXRSS+1]; char *rstr = rslt + sizeof(unsigned short int), *begin, *end, *cp; - char tmpfnam[L_tmpnam] = "SYS$SCRATCH:"; $DESCRIPTOR(dfltdsc,"SYS$DISK:[]*.*;"); PerlIO *tmpfp; STRLEN i; @@ -2118,7 +2135,6 @@ Perl_start_glob (pTHX_ SV *tmpglob, IO *io) ((struct NAM *)((struct FAB *)cxt)->fab$l_nam)->nam$l_fnb but that's unsupported, so I don't want to do it now and have it bite someone in the future. */ - strcat(tmpfnam,PerlLIO_tmpnam(NULL)); cp = SvPV(tmpglob,i); for (; i; i--) { if (cp[i] == ';') hasver = 1; @@ -2135,7 +2151,7 @@ Perl_start_glob (pTHX_ SV *tmpglob, IO *io) break; } } - if ((tmpfp = PerlIO_open(tmpfnam,"w+","fop=dlt")) != NULL) { + if ((tmpfp = PerlIO_tmpfile()) != NULL) { Stat_t st; if (!PerlLIO_stat(SvPVX(tmpglob),&st) && S_ISDIR(st.st_mode)) ok = ((wilddsc.dsc$a_pointer = tovmspath(SvPVX(tmpglob),vmsspec)) != NULL); @@ -667,14 +667,16 @@ Perl_do_join(pTHX_ register SV *sv, SV *del, register SV **mark, register SV **s ++mark; } + sv_setpv(sv, ""); + if (PL_tainting && SvMAGICAL(sv)) + SvTAINTED_off(sv); + if (items-- > 0) { - sv_setpv(sv, ""); if (*mark) sv_catsv(sv, *mark); mark++; } - else - sv_setpv(sv,""); + if (delimlen) { for (; items > 0; items--,mark++) { sv_catsv(sv,del); @@ -642,7 +642,6 @@ #define sv_2iv Perl_sv_2iv #define sv_2mortal Perl_sv_2mortal #define sv_2nv Perl_sv_2nv -#define sv_2pv Perl_sv_2pv #define sv_2pvutf8 Perl_sv_2pvutf8 #define sv_2pvbyte Perl_sv_2pvbyte #define sv_2uv Perl_sv_2uv @@ -659,8 +658,6 @@ #define sv_catpvf Perl_sv_catpvf #define sv_vcatpvf Perl_sv_vcatpvf #define sv_catpv Perl_sv_catpv -#define sv_catpvn Perl_sv_catpvn -#define sv_catsv Perl_sv_catsv #define sv_chop Perl_sv_chop #define sv_clean_all Perl_sv_clean_all #define sv_clean_objs Perl_sv_clean_objs @@ -692,7 +689,6 @@ #define sv_peek Perl_sv_peek #define sv_pos_u2b Perl_sv_pos_u2b #define sv_pos_b2u Perl_sv_pos_b2u -#define sv_pvn_force Perl_sv_pvn_force #define sv_pvutf8n_force Perl_sv_pvutf8n_force #define sv_pvbyten_force Perl_sv_pvbyten_force #define sv_reftype Perl_sv_reftype @@ -712,7 +708,6 @@ #define sv_setref_pvn Perl_sv_setref_pvn #define sv_setpv Perl_sv_setpv #define sv_setpvn Perl_sv_setpvn -#define sv_setsv Perl_sv_setsv #define sv_taint Perl_sv_taint #define sv_tainted Perl_sv_tainted #define sv_unmagic Perl_sv_unmagic @@ -832,7 +827,6 @@ #define sv_pv Perl_sv_pv #define sv_pvutf8 Perl_sv_pvutf8 #define sv_pvbyte Perl_sv_pvbyte -#define sv_utf8_upgrade Perl_sv_utf8_upgrade #define sv_utf8_downgrade Perl_sv_utf8_downgrade #define sv_utf8_encode Perl_sv_utf8_encode #define sv_utf8_decode Perl_sv_utf8_decode @@ -1176,6 +1170,12 @@ #endif #if defined(PERL_OBJECT) #endif +#define sv_setsv_flags Perl_sv_setsv_flags +#define sv_catpvn_flags Perl_sv_catpvn_flags +#define sv_catsv_flags Perl_sv_catsv_flags +#define sv_utf8_upgrade_flags Perl_sv_utf8_upgrade_flags +#define sv_pvn_force_flags Perl_sv_pvn_force_flags +#define sv_2pv_flags Perl_sv_2pv_flags #define ck_anoncode Perl_ck_anoncode #define ck_bitop Perl_ck_bitop #define ck_concat Perl_ck_concat @@ -2138,7 +2138,6 @@ #define sv_2iv(a) Perl_sv_2iv(aTHX_ a) #define sv_2mortal(a) Perl_sv_2mortal(aTHX_ a) #define sv_2nv(a) Perl_sv_2nv(aTHX_ a) -#define sv_2pv(a,b) Perl_sv_2pv(aTHX_ a,b) #define sv_2pvutf8(a,b) Perl_sv_2pvutf8(aTHX_ a,b) #define sv_2pvbyte(a,b) Perl_sv_2pvbyte(aTHX_ a,b) #define sv_2uv(a) Perl_sv_2uv(aTHX_ a) @@ -2154,8 +2153,6 @@ #define sv_bless(a,b) Perl_sv_bless(aTHX_ a,b) #define sv_vcatpvf(a,b,c) Perl_sv_vcatpvf(aTHX_ a,b,c) #define sv_catpv(a,b) Perl_sv_catpv(aTHX_ a,b) -#define sv_catpvn(a,b,c) Perl_sv_catpvn(aTHX_ a,b,c) -#define sv_catsv(a,b) Perl_sv_catsv(aTHX_ a,b) #define sv_chop(a,b) Perl_sv_chop(aTHX_ a,b) #define sv_clean_all() Perl_sv_clean_all(aTHX) #define sv_clean_objs() Perl_sv_clean_objs(aTHX) @@ -2187,7 +2184,6 @@ #define sv_peek(a) Perl_sv_peek(aTHX_ a) #define sv_pos_u2b(a,b,c) Perl_sv_pos_u2b(aTHX_ a,b,c) #define sv_pos_b2u(a,b) Perl_sv_pos_b2u(aTHX_ a,b) -#define sv_pvn_force(a,b) Perl_sv_pvn_force(aTHX_ a,b) #define sv_pvutf8n_force(a,b) Perl_sv_pvutf8n_force(aTHX_ a,b) #define sv_pvbyten_force(a,b) Perl_sv_pvbyten_force(aTHX_ a,b) #define sv_reftype(a,b) Perl_sv_reftype(aTHX_ a,b) @@ -2206,7 +2202,6 @@ #define sv_setref_pvn(a,b,c,d) Perl_sv_setref_pvn(aTHX_ a,b,c,d) #define sv_setpv(a,b) Perl_sv_setpv(aTHX_ a,b) #define sv_setpvn(a,b,c) Perl_sv_setpvn(aTHX_ a,b,c) -#define sv_setsv(a,b) Perl_sv_setsv(aTHX_ a,b) #define sv_taint(a) Perl_sv_taint(aTHX_ a) #define sv_tainted(a) Perl_sv_tainted(aTHX_ a) #define sv_unmagic(a,b) Perl_sv_unmagic(aTHX_ a,b) @@ -2320,7 +2315,6 @@ #define sv_pv(a) Perl_sv_pv(aTHX_ a) #define sv_pvutf8(a) Perl_sv_pvutf8(aTHX_ a) #define sv_pvbyte(a) Perl_sv_pvbyte(aTHX_ a) -#define sv_utf8_upgrade(a) Perl_sv_utf8_upgrade(aTHX_ a) #define sv_utf8_downgrade(a,b) Perl_sv_utf8_downgrade(aTHX_ a,b) #define sv_utf8_encode(a) Perl_sv_utf8_encode(aTHX_ a) #define sv_utf8_decode(a) Perl_sv_utf8_decode(aTHX_ a) @@ -2663,6 +2657,12 @@ #endif #if defined(PERL_OBJECT) #endif +#define sv_setsv_flags(a,b,c) Perl_sv_setsv_flags(aTHX_ a,b,c) +#define sv_catpvn_flags(a,b,c,d) Perl_sv_catpvn_flags(aTHX_ a,b,c,d) +#define sv_catsv_flags(a,b,c) Perl_sv_catsv_flags(aTHX_ a,b,c) +#define sv_utf8_upgrade_flags(a,b) Perl_sv_utf8_upgrade_flags(aTHX_ a,b) +#define sv_pvn_force_flags(a,b,c) Perl_sv_pvn_force_flags(aTHX_ a,b,c) +#define sv_2pv_flags(a,b,c) Perl_sv_2pv_flags(aTHX_ a,b,c) #define ck_anoncode(a) Perl_ck_anoncode(aTHX_ a) #define ck_bitop(a) Perl_ck_bitop(aTHX_ a) #define ck_concat(a) Perl_ck_concat(aTHX_ a) @@ -5166,6 +5166,18 @@ #endif #if defined(PERL_OBJECT) #endif +#define Perl_sv_setsv_flags CPerlObj::Perl_sv_setsv_flags +#define sv_setsv_flags Perl_sv_setsv_flags +#define Perl_sv_catpvn_flags CPerlObj::Perl_sv_catpvn_flags +#define sv_catpvn_flags Perl_sv_catpvn_flags +#define Perl_sv_catsv_flags CPerlObj::Perl_sv_catsv_flags +#define sv_catsv_flags Perl_sv_catsv_flags +#define Perl_sv_utf8_upgrade_flags CPerlObj::Perl_sv_utf8_upgrade_flags +#define sv_utf8_upgrade_flags Perl_sv_utf8_upgrade_flags +#define Perl_sv_pvn_force_flags CPerlObj::Perl_sv_pvn_force_flags +#define sv_pvn_force_flags Perl_sv_pvn_force_flags +#define Perl_sv_2pv_flags CPerlObj::Perl_sv_2pv_flags +#define sv_2pv_flags Perl_sv_2pv_flags #define Perl_ck_anoncode CPerlObj::Perl_ck_anoncode #define ck_anoncode Perl_ck_anoncode #define Perl_ck_bitop CPerlObj::Perl_ck_bitop @@ -1925,7 +1925,7 @@ Ap |char* |rninstr |const char* big|const char* bigend \ Ap |Sighandler_t|rsignal |int i|Sighandler_t t p |int |rsignal_restore|int i|Sigsave_t* t p |int |rsignal_save |int i|Sighandler_t t1|Sigsave_t* t2 -p |Sighandler_t|rsignal_state|int i +Ap |Sighandler_t|rsignal_state|int i p |void |rxres_free |void** rsp p |void |rxres_restore |void** rsp|REGEXP* prx p |void |rxres_save |void** rsp|REGEXP* prx @@ -2000,7 +2000,7 @@ Ap |IO* |sv_2io |SV* sv Ap |IV |sv_2iv |SV* sv Apd |SV* |sv_2mortal |SV* sv Ap |NV |sv_2nv |SV* sv -Ap |char* |sv_2pv |SV* sv|STRLEN* lp +Aop |char* |sv_2pv |SV* sv|STRLEN* lp Ap |char* |sv_2pvutf8 |SV* sv|STRLEN* lp Ap |char* |sv_2pvbyte |SV* sv|STRLEN* lp Ap |UV |sv_2uv |SV* sv @@ -2017,8 +2017,8 @@ Apd |SV* |sv_bless |SV* sv|HV* stash Afpd |void |sv_catpvf |SV* sv|const char* pat|... Ap |void |sv_vcatpvf |SV* sv|const char* pat|va_list* args Apd |void |sv_catpv |SV* sv|const char* ptr -Apd |void |sv_catpvn |SV* sv|const char* ptr|STRLEN len -Apd |void |sv_catsv |SV* dsv|SV* ssv +Aopd |void |sv_catpvn |SV* sv|const char* ptr|STRLEN len +Aopd |void |sv_catsv |SV* dsv|SV* ssv Apd |void |sv_chop |SV* sv|char* ptr p |I32 |sv_clean_all p |void |sv_clean_objs @@ -2052,7 +2052,7 @@ Ap |SV* |sv_newref |SV* sv Ap |char* |sv_peek |SV* sv Ap |void |sv_pos_u2b |SV* sv|I32* offsetp|I32* lenp Ap |void |sv_pos_b2u |SV* sv|I32* offsetp -Apd |char* |sv_pvn_force |SV* sv|STRLEN* lp +Aopd |char* |sv_pvn_force |SV* sv|STRLEN* lp Apd |char* |sv_pvutf8n_force|SV* sv|STRLEN* lp Ap |char* |sv_pvbyten_force|SV* sv|STRLEN* lp Apd |char* |sv_reftype |SV* sv|int ob @@ -2073,7 +2073,7 @@ Apd |SV* |sv_setref_pvn |SV* rv|const char* classname|char* pv \ |STRLEN n Apd |void |sv_setpv |SV* sv|const char* ptr Apd |void |sv_setpvn |SV* sv|const char* ptr|STRLEN len -Apd |void |sv_setsv |SV* dsv|SV* ssv +Aopd |void |sv_setsv |SV* dsv|SV* ssv Ap |void |sv_taint |SV* sv Ap |bool |sv_tainted |SV* sv Apd |int |sv_unmagic |SV* sv|int type @@ -2204,7 +2204,7 @@ Ap |char* |sv_2pvbyte_nolen|SV* sv Ap |char* |sv_pv |SV *sv Ap |char* |sv_pvutf8 |SV *sv Ap |char* |sv_pvbyte |SV *sv -Apd |STRLEN |sv_utf8_upgrade|SV *sv +Aopd |STRLEN |sv_utf8_upgrade|SV *sv ApdM |bool |sv_utf8_downgrade|SV *sv|bool fail_ok Apd |void |sv_utf8_encode |SV *sv ApdM |bool |sv_utf8_decode |SV *sv @@ -2586,3 +2586,9 @@ s |void |xstat |int #if defined(PERL_OBJECT) }; #endif +Apd |void |sv_setsv_flags |SV* dsv|SV* ssv|I32 flags +Apd |void |sv_catpvn_flags|SV* sv|const char* ptr|STRLEN len|I32 flags +Apd |void |sv_catsv_flags |SV* dsv|SV* ssv|I32 flags +Apd |STRLEN |sv_utf8_upgrade_flags|SV *sv|I32 flags +Apd |char* |sv_pvn_force_flags|SV* sv|STRLEN* lp|I32 flags +Ap |char* |sv_2pv_flags |SV* sv|STRLEN* lp|I32 flags diff --git a/ext/POSIX/Makefile.PL b/ext/POSIX/Makefile.PL index 73bb02dddb..081f934833 100644 --- a/ext/POSIX/Makefile.PL +++ b/ext/POSIX/Makefile.PL @@ -2,7 +2,9 @@ use ExtUtils::MakeMaker; use Config; my @libs; if ($^O ne 'MSWin32') { - @libs = ('LIBS' => ["-lm -lposix -lcposix"]); + my $libs = "-lm -lposix -lcposix"; + $Config{gccversion} ne "" and $libs .= " -lgcc"; + @libs = ('LIBS' => [ $libs ]); } WriteMakefile( NAME => 'POSIX', diff --git a/ext/POSIX/POSIX.pod b/ext/POSIX/POSIX.pod index 10199e9a2b..9eb9116f96 100644 --- a/ext/POSIX/POSIX.pod +++ b/ext/POSIX/POSIX.pod @@ -1230,6 +1230,8 @@ The string for Tuesday, December 12, 1995. $str = POSIX::strftime( "%A, %B %d, %Y", 0, 0, 0, 12, 11, 95, 2 ); print "$str\n"; +See also L<Time::Piece>. + =item strlen strlen() is C-specific, use C<length()> instead, see L<perlfunc/length>. diff --git a/ext/Storable/Makefile.PL b/ext/Storable/Makefile.PL index c8151f3083..49270b32f1 100644 --- a/ext/Storable/Makefile.PL +++ b/ext/Storable/Makefile.PL @@ -16,10 +16,14 @@ use ExtUtils::MakeMaker; use Config; +my @libs = (); +$Config{gccversion} eq "" or @libs = ('LIBS' => ["-lgcc"]); + WriteMakefile( - 'NAME' => 'Storable', + 'NAME' => 'Storable', 'DISTNAME' => "Storable", - 'MAN3PODS' => {}, + @libs, + 'MAN3PODS' => {}, 'VERSION_FROM' => 'Storable.pm', 'dist' => { SUFFIX => 'gz', COMPRESS => 'gzip -f' }, ); diff --git a/ext/Time/Piece/Piece.pm b/ext/Time/Piece/Piece.pm index 215f489514..5789ec406d 100644 --- a/ext/Time/Piece/Piece.pm +++ b/ext/Time/Piece/Piece.pm @@ -1,7 +1,7 @@ package Time::Piece; use strict; -use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); require Exporter; require DynaLoader; @@ -15,6 +15,10 @@ use Carp; gmtime ); +@EXPORT_OK = qw( + strptime +); + %EXPORT_TAGS = ( ':override' => 'internal', ); @@ -777,9 +781,8 @@ sub _ptime { } sub strptime { - my $time = shift; my $format = shift; - my $stime = @_ ? shift : "$time"; + my $stime = shift; my %ptime; while ($format ne '') { @@ -944,16 +947,20 @@ http://www.xray.mpe.mpg.de/mailing-lists/perl5-porters/2000-01/msg00241.html =head1 USAGE -After importing this module, when you use localtime or gmtime in a scalar -context, rather than getting an ordinary scalar string representing the -date and time, you get a Time::Piece object, whose stringification happens -to produce the same effect as the localtime and gmtime functions. There is -also a new() constructor provided, which is the same as localtime(), except -when passed a Time::Piece object, in which case it's a copy constructor. The -following methods are available on the object: +After importing this module, when you use localtime(0 or gmtime() in +scalar context, rather than getting an ordinary scalar string +representing the date and time, you get a Time::Piece object, whose +stringification happens to produce the same effect as the localtime() +and gmtime(0 functions. + +There is also a new() constructor provided, which is the same as +localtime(), except when passed a Time::Piece object, in which case +it's a copy constructor. - $t->s # 0..61 [1] - # and 61: leap second and double leap second +The following methods are available on the object: + + $t->s # 0..61 + # 60 and 61: leap second and double leap second $t->sec # same as $t->s $t->second # same as $t->s $t->min # 0..59 @@ -1001,17 +1008,18 @@ following methods are available on the object: $t->week # week number (ISO 8601) - $t->is_leap_year # true if it its - $t->month_last_day # 28-31 + $t->is_leap_year # true if it its + Time::Piece::_is_leap_year($year) # true if it its + $t->month_last_day # 28..31 $t->time_separator($s) # set the default separator (default ":") $t->date_separator($s) # set the default separator (default "-") - $t->wday(@days) # set the default weekdays, abbreviated - $t->weekday_names(@days) # set the default weekdays - $t->mon_names(@days) # set the default months, abbreviated - $t->month_names(@days) # set the default months + $t->wday_names(@days) # set the default weekday names, abbreviated + $t->weekday_names(@days) # set the default weekday names + $t->mon_names(@days) # set the default month names, abbreviated + $t->month_names(@days) # set the default month names - $t->strftime($format) # data and time formatting + $t->strftime($format) # date and time formatting $t->strftime() # "Tue, 29 Feb 2000 12:34:56 GMT" $t->_strftime($format) # same as POSIX::strftime (without the @@ -1019,24 +1027,29 @@ following methods are available on the object: # calls the operating system libraries, # as opposed to $t->strftime() + use Time::Piece 'strptime'; # date parsing + my %p = strptime("%H:%M", "12:34"); # $p{H} and ${M} will be set + =head2 Local Locales Both wdayname (day) and monname (month) allow passing in a list to use to index the name of the days against. This can be useful if you need to implement some form of localisation without actually installing or -using locales. +using the locales provided by the operating system. - my @days = qw( Dimanche Lundi Merdi Mercredi Jeudi Vendredi Samedi ); + my @weekdays = qw( Dimanche Lundi Merdi Mercredi Jeudi Vendredi Samedi ); - my $french_day = localtime->day(@days); + my $french_day = localtime->day(@weekdays); These settings can be overriden globally too: - Time::Piece::weekday_names(@days); + Time::Piece::weekday_names(@weekdays); + Time::Piece::wday_names(@wdays); Or for months: Time::Piece::month_names(@months); + Time::Piece::mon_names(@mon); And locally for months: @@ -1074,17 +1087,206 @@ Date comparisons are also possible, using the full suite of "<", ">", The ISO 8601 standard defines the date format to be YYYY-MM-DD, and the time format to be hh:mm:ss (24 hour clock), and if combined, they -should be concatenated with date first and with a capital 'T' in front -of the time. +should be concatenated with the date first and with a capital 'T' in +front of the time. =head2 Week Number The I<week number> may be an unknown concept to some readers. The ISO 8601 standard defines that weeks begin on a Monday and week 1 of the -year is the week that includes both January 4th and the first Thursday -of the year. In other words, if the first Monday of January is the -2nd, 3rd, or 4th, the preceding days of the January are part of the -last week of the preceding year. Week numbers range from 1 to 53. +year is the week that includes both January the 4th and the first +Thursday of the year. In other words, if the first Monday of January +is the 2nd, 3rd, or 4th, the preceding days of the January are part of +the last week of the preceding year. Week numbers range from 1 to 53. + +=head2 strftime method + +The strftime() method can be used to format Time::Piece objects for output. +The argument to strftime() is the format string to be used, for example: + + $t->strftime("%H:%M"); + +will output the hours and minutes concatenated with a colon. The +available format characters are as in the standard strftime() function +(unless otherwise indicated the implementation is in pure Perl, +no operating system strftime() is invoked): + +=over 4 + +=item %% + +The percentage character "%". + +=item %a + +The abbreviated weekday name, e.g. 'Tue'. Note that the abbreviations +are not necessarily three characters wide in all languages. + +=item %A + +The weekday name, e.g. 'Tuesday'. + +=item %b + +The abbreviated month name, e.g. 'Feb'. Note that the abbreviations +are not necessarily three characters wide in all languages. + +=item %B + +The month name, e.g. 'February'. + +=item %c + +The ctime format, or the localtime()/gmtime() format: C<%a %b %m %H:%M:%S %Y>. + +(Should be avoided: use $t->timedate instead.) + +=item %C + +The 'centuries' number, e.g. 19 for the year 1999 and 20 for the year 2000. + +=item %d + +The zero-filled right-aligned day of the month, e.g. '09' or '10'. + +=item %D + +C<%m/%d/%d>. + +(Should be avoided: use $t->date instead.) + +=item %e + +The space-filled right-aligned day of the month, e.g. ' 9' or '10'. + +=item %h + +Same as C<%b>, the abbreviated monthname. + +=item %H + +The zero-filled right-aligned hours in 24 hour clock, e.g. '09' or '10'. + +=item %I + +The zero-filled right-aligned hours in 12 hour clock, e.g. '09' or '10'. + +=item %j + +The zero-filled right-aligned day of the year, e.g. '001' or '365'. + +=item %m + +The zero-filled right-aligned month number, e.g. '09' or '10'. + +=item %M + +The zero-filled right-aligned minutes, e.g. '09' or '10'. + +=item %n + +The newline character "\n". + +=item %p + +Notice that this is somewhat meaningless in 24 hour clocks. + +=item %r + +C<%I:%M:%S %p>. + +(Should be avoided: use $t->time instead.) + +=item %R + +C<%H:%M>. + +=item %S + +The zero-filled right-aligned seconds, e.g. '09' or '10'. + +=item %t + +The tabulator character "\t". + +=item %T + +C<%H:%M%S> + +(Should be avoided: use $t->time instead.) + +=item %u + +The day of the week with Monday as 1 (one) and Sunday as 7. + +=item %U + +The zero-filled right-aligned week number of the year, Sunday as the +first day of the week, from '00' to '53'. + +(Currently taken care by the operating system strftime().) + +=item %V + +The zero-filled right-aligned week of the year, e.g. '01' or '53'. +(ISO 8601) + +=item %w + +The day of the week with Sunday as 0 (zero) and Monday as 1 (one). + +=item %W + +The zero-filled right-aligned week number of the year, Monday as the +first day of the week, from '00' to '53'. + +(Currently taken care by the operating system strftime().) + +=item %x + +C<%m/%d/%y>. + +(Should be avoided: use $t->date instead.) + +=item %y + +The zero-filled right-aligned last two numbers of the year, e.g. 99 +for 1999 and 01 for 2001. + +(Should be avoided: this is the Y2K bug alive and well.) + +=item %Y + +The year, e.g. 1999 or 2001. + +=item %Z + +The timezone name, for example "GMT" or "EET". + +(Taken care by the operating system strftime().) + +=back + +The format C<Z> and any of the C<O*> and C<E*> formats are handled by +the operating system, not by Time::Piece, because those formats are +usually rather unportable and non-standard. (For example 'MST' can +mean almost anything: 'Mountain Standard Time' or 'Moscow Standard Time'.) + +=head2 strptime function + +You can export the strptime() function and use it to parse date and +time strings back to numbers. For example the following will return +the hours, minutes, and seconds as $parse{H}, $parse{M}, and $parse{S}. + + use Time::Piece 'strptime'; + my %parse = strptime('%H:%M:S', '12:34:56'); + +For 'compound' formats like for example 'T' strptime() will return +the 'components'. + +strptime() does not perform overly strict checks on the dates and +times, it will be perfectly happy with the 31st day of February, +for example. Stricter validation should be performed by other means. =head2 Global Overriding @@ -1095,7 +1297,9 @@ including the ':override' tag in the import list: =head1 SEE ALSO -The excellent Calendar FAQ at http://www.tondering.dk/claus/calendar.html +The excellent Calendar FAQ at L<http://www.tondering.dk/claus/calendar.html> + +L<strftime(3)>, L<strftime(3)> =head1 AUTHOR diff --git a/global.sym b/global.sym index f54a3fca08..17e3df3104 100644 --- a/global.sym +++ b/global.sym @@ -333,6 +333,7 @@ Perl_regnext Perl_repeatcpy Perl_rninstr Perl_rsignal +Perl_rsignal_state Perl_savepv Perl_savepvn Perl_savestack_grow @@ -570,3 +571,9 @@ Perl_ptr_table_clear Perl_ptr_table_free Perl_sys_intern_clear Perl_sys_intern_init +Perl_sv_setsv_flags +Perl_sv_catpvn_flags +Perl_sv_catsv_flags +Perl_sv_utf8_upgrade_flags +Perl_sv_pvn_force_flags +Perl_sv_2pv_flags diff --git a/hints/aix.sh b/hints/aix.sh index c3741319e8..e85b68b8ef 100644 --- a/hints/aix.sh +++ b/hints/aix.sh @@ -126,6 +126,7 @@ d_setreuid='undef' # Changes for dynamic linking by Wayne Scott <wscott@ichips.intel.com> # # Tell perl which symbols to export for dynamic linking. +cccdlflags='none' # All AIX code is position independent case "$cc" in *gcc*) ccdlflags='-Xlinker' ;; *) ccversion=`lslpp -L | grep 'C for AIX Compiler$' | awk '{print $2}'` @@ -299,6 +300,9 @@ EOM # Remove xlc-spefific -qflags. ccflags="`echo $ccflags | sed -e 's@ -q[^ ]*@ @g' -e 's@^-q[^ ]* @@g'`" ldflags="`echo $ldflags | sed -e 's@ -q[^ ]*@ @g' -e 's@^-q[^ ]* @@g'`" + # Move xld-spefific -bflags. + ccflags="`echo $ccflags | sed -e 's@ -b@ -Wl,-b@g'`" + ldflags="`echo $ldflags | sed -e 's@ -b@ -Wl,-b@g'`" echo >&4 "(using ccflags $ccflags)" echo >&4 "(using ldflags $ldflags)" ;; diff --git a/hints/os2.sh b/hints/os2.sh index 5ffa589d31..49588f16d0 100644 --- a/hints/os2.sh +++ b/hints/os2.sh @@ -131,6 +131,8 @@ aout_cppflags="-DDOSISH -DPERL_IS_AOUT -DOS2=2 -DEMBED -I. $_defemxcrtrev" aout_use_clib='c' aout_usedl='undef' aout_archobjs="os2.o dl_os2.o" +# Not listed in dynamic_ext, but needed for AOUT static_ext nevertheless +aout_extra_static_ext="OS2::DLL" # variable which have different values for aout compile used_aout='d_shrplib useshrplib plibext lib_ext obj_ext ar plibext d_fork lddlflags ldflags ccflags use_clib usedl archobjs cppflags' diff --git a/iperlsys.h b/iperlsys.h index 6c093dd53f..237fab26d6 100644 --- a/iperlsys.h +++ b/iperlsys.h @@ -303,7 +303,17 @@ struct IPerlStdIOInfo #define PerlSIO_fputs(f,s) fputs(s,f) #define PerlSIO_fflush(f) Fflush(f) #define PerlSIO_fgets(s, n, fp) fgets(s,n,fp) -#define PerlSIO_ungetc(c,f) ungetc(c,f) +#if defined(VMS) && defined(__DECC) + /* Unusual definition of ungetc() here to accomodate fast_sv_gets()' + * belief that it can mix getc/ungetc with reads from stdio buffer */ + int decc$ungetc(int __c, FILE *__stream); +# define PerlSIO_ungetc(c,f) ((c) == EOF ? EOF : \ + ((*(f) && !((*(f))->_flag & _IONBF) && \ + ((*(f))->_ptr > (*(f))->_base)) ? \ + ((*(f))->_cnt++, *(--(*(f))->_ptr) = (c)) : decc$ungetc(c,f))) +#else +# define PerlSIO_ungetc(c,f) ungetc(c,f) +#endif #define PerlSIO_fileno(f) fileno(f) #define PerlSIO_fdopen(f, s) fdopen(f,s) #define PerlSIO_freopen(p, m, f) freopen(p,m,f) diff --git a/lib/ExtUtils/Constant.pm b/lib/ExtUtils/Constant.pm new file mode 100644 index 0000000000..59a3126e97 --- /dev/null +++ b/lib/ExtUtils/Constant.pm @@ -0,0 +1,630 @@ +package ExtUtils::Constant; + +=head1 NAME + +ExtUtils::Constant - generate XS code to import C header constants + +=head1 SYNOPSIS + + use ExtUtils::Constant qw (constant_types C_constant XS_constant); + print constant_types(); # macro defs + foreach (C_constant (undef, "IV", undef, undef, undef, @names) ) { + print $_, "\n"; # C constant subs + } + print "MODULE = Foo PACKAGE = Foo\n"; + print XS_constant ("Foo", {NV => 1, IV => 1}); # XS for Foo::constant + +=head1 DESCRIPTION + +ExtUtils::Constant facilitates generating C and XS wrapper code to allow +perl modules to AUTOLOAD constants defined in C library header files. +It is principally used by the C<h2xs> utility, on which this code is based. +It doesn't contain the routines to scan header files to extract these +constants. + +=head1 USAGE + +Generally one only needs to call the 3 functions shown in the synopsis, +C<constant_types()>, C<C_constant> and C<XS_constant>. + +Currently this module understands the following types. h2xs may only know +a subset. The sizes of the numeric types are chosen by the C<Configure> +script at compile time. + +=over 4 + +=item IV + +signed integer, at least 32 bits. + +=item UV + +unsigned integer, the same size as I<IV> + +=item NV + +floating point type, probably C<double>, possibly C<long double> + +=item PV + +NUL terminated string, length will be determined with C<strlen> + +=item PVN + +A fixed length thing, given as a [pointer, length] pair. If you know the +length of a string at compile time you may use this instead of I<PV> + +=back + +=head1 FUNCTIONS + +=over 4 + +=cut + +require 5.006; # I think, for [:cntrl:] in REGEXP +use warnings; +use strict; +use Carp; + +use Exporter; +use vars qw (@ISA $VERSION %XS_Constant %XS_TypeSet @EXPORT_OK %EXPORT_TAGS); +use Text::Wrap; +$Text::Wrap::huge = 'overflow'; +$Text::Wrap::columns = 80; + +@ISA = 'Exporter'; +$VERSION = '0.01'; + +%EXPORT_TAGS = ( 'all' => [ qw( + XS_constant constant_types return_clause memEQ_clause C_stringify + C_constant autoload +) ] ); + +@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +%XS_Constant = ( + IV => 'PUSHi(iv)', + UV => 'PUSHu((UV)iv)', + NV => 'PUSHn(nv)', + PV => 'PUSHp(pv, strlen(pv))', + PVN => 'PUSHp(pv, iv)' +); + +%XS_TypeSet = ( + IV => '*iv_return =', + UV => '*iv_return = (IV)', + NV => '*nv_return =', + PV => '*pv_return =', + PVN => ['*pv_return =', '*iv_return = (IV)'] +); + + +=item C_stringify NAME + +A function which returns a correctly \ escaped version of the string passed +suitable for C's "" or '' + +=cut + +# Hopefully make a happy C identifier. +sub C_stringify { + local $_ = shift; + s/\\/\\\\/g; + s/([\"\'])/\\$1/g; # Grr. fix perl mode. + s/([[:cntrl:]])/sprintf "\\%03o", ord $1/ge; + s/\177/\\177/g; # DEL doesn't seem to be a [:cntrl:] + $_; +} + +=item constant_types + +A function returning a single scalar with C<#define> definitions for the +constants used internally between the generated C and XS functions. + +=cut + +sub constant_types () { + my $start = 1; + my @lines; + push @lines, "#define PERL_constant_NOTFOUND\t$start\n"; $start++; + push @lines, "#define PERL_constant_NOTDEF\t$start\n"; $start++; + foreach (sort keys %XS_Constant) { + push @lines, "#define PERL_constant_IS$_\t$start\n"; $start++; + } + push @lines, << 'EOT'; + +#ifndef NVTYPE +typedef double NV; /* 5.6 and later define NVTYPE, and typedef NV to it. */ +#endif +EOT + + return join '', @lines; +} + +=item memEQ_clause NAME, CHECKED_AT, INDENT + +A function to return a suitable C C<if> statement to check whether I<NAME> +is equal to the C variable C<name>. If I<CHECKED_AT> is defined, then it +is used to avoid C<memEQ> for short names, or to generate a comment to +highlight the position of the character in the C<switch> statement. + +=cut + +sub memEQ_clause { +# if (memEQ(name, "thingy", 6)) { + # Which could actually be a character comparison or even "" + my ($name, $checked_at, $indent) = @_; + $indent = ' ' x ($indent || 4); + my $len = length $name; + + if ($len < 2) { + return $indent . "{\n" if (defined $checked_at and $checked_at == 0); + # We didn't switch, drop through to the code for the 2 character string + $checked_at = 1; + } + if ($len < 3 and defined $checked_at) { + my $check; + if ($checked_at == 1) { + $check = 0; + } elsif ($checked_at == 0) { + $check = 1; + } + if (defined $check) { + my $char = C_stringify (substr $name, $check, 1); + return $indent . "if (name[$check] == '$char') {\n"; + } + } + # Could optimise a memEQ on 3 to 2 single character checks here + $name = C_stringify ($name); + my $body = $indent . "if (memEQ(name, \"$name\", $len)) {\n"; + $body .= $indent . "/* ". (' ' x $checked_at) . '^' + . (' ' x ($len - $checked_at + length $len)) . " */\n" + if defined $checked_at; + return $body; +} + +=item return_clause VALUE, TYPE, INDENT, MACRO + +A function to return a suitable C<#ifdef> clause. I<MACRO> defaults to +I<VALUE> when not defined. If I<TYPE> is aggregate (eg I<PVN> expects both +pointer and length) then I<VALUE> should be a reference to an array of +values in the order expected by the type. + +=cut + +sub return_clause { +##ifdef thingy +# *iv_return = thingy; +# return PERL_constant_ISIV; +##else +# return PERL_constant_NOTDEF; +##endif + my ($value, $type, $indent, $macro) = @_; + $macro = $value unless defined $macro; + $indent = ' ' x ($indent || 6); + + die "Macro must not be a reference" if ref $macro; + my $clause = "#ifdef $macro\n"; + + my $typeset = $XS_TypeSet{$type}; + die "Can't generate code for type $type" unless defined $typeset; + if (ref $typeset) { + die "Type $type is aggregate, but only single value given" + unless ref $value; + foreach (0 .. $#$typeset) { + $clause .= $indent . "$typeset->[$_] $value->[$_];\n"; + } + } else { + die "Aggregate value given for type $type" + if ref $value; + $clause .= $indent . "$typeset $value;\n"; + } + return $clause . <<"EOT"; +${indent}return PERL_constant_IS$type; +#else +${indent}return PERL_constant_NOTDEF; +#endif +EOT +} + +=item params WHAT + +An internal function. I<WHAT> should be a hashref of types the constant +function will return. I<params> returns the list of flags C<$use_iv, $use_nv, +$use_pv> to show which combination of pointers will be needed in the C +argument list. + +=cut + +sub params { + my $what = shift; + foreach (sort keys %$what) { + warn "ExtUtils::Constant doesn't know how to handle values of type $_" unless defined $XS_Constant{$_}; + } + my $use_iv = $what->{IV} || $what->{UV} || $what->{PVN}; + my $use_nv = $what->{NV}; + my $use_pv = $what->{PV} || $what->{PVN}; + return ($use_iv, $use_nv, $use_pv); +} + +=item C_constant SUBNAME, DEFAULT_TYPE, TYPES, INDENT, NAMELEN, ITEM... + +A function that returns a B<list> of C subroutine definitions that return +the value and type of constants when passed the name by the XS wrapper. +I<ITEM...> gives a list of constant names. Each can either be a string, +which is taken as a C macro name, or a reference to a hash with the following +keys + +=over 8 + +=item name + +The name of the constant, as seen by the perl code. + +=item type + +The type of the constant (I<IV>, I<NV> etc) + +=item value + +A C expression for the value of the constant, or a list of C expressions if +the type is aggregate. This defaults to the I<name> if not given. + +=item macro + +The C pre-processor macro to use in the C<#ifdef>. This defaults to the +I<name>, and is mainly used if I<value> is an C<enum>. + +=back + +The first 5 argument can safely be given as C<undef>, and are mainly used +for recursion. I<SUBNAME> defaults to C<constant> if undefined. + +I<DEFAULT_TYPE> is the type returned by C<ITEM>s that don't specify their +type. In turn it defaults to I<IV>. I<TYPES> should be given either as a comma +separated list of types that the C subroutine C<constant> will generate or as +a reference to a hash. I<DEFAULT_TYPE> will be added to the list if not +present, as will any types given in the list of I<ITEM>s. The resultant list +should be the same list of types that C<XS_constant> is given. [Otherwise +C<XS_constant> and C<C_constant> may differ in the number of parameters to the +constant function. I<INDENT> is currently unused and ignored. In future it may +be used to pass in information used to change the C indentation style used.] +The best way to maintain consistency is to pass in a hash reference and let +this function update it. + +I<NAMELEN> if defined signals that all the I<name>s of the I<ITEM>s are of +this length, and that the constant name passed in by perl is checked and +also of this length. It is used during recursion, and should be C<undef> +unless the caller has checked all the lengths during code generation, and +the generated subroutine is only to be called with a name of this length. + +=cut + +sub C_constant { + my ($subname, $default_type, $what, $indent, $namelen, @items) = @_; + $subname ||= 'constant'; + # I'm not using this. But a hashref could be used for full formatting without + # breaking this API + $indent ||= 0; + $default_type ||= 'IV'; + if (!ref $what) { + # Convert line of the form IV,UV,NV to hash + $what = {map {$_ => 1} split /,\s*/, ($what || '')}; + # Figure out what types we're dealing with, and assign all unknowns to the + # default type + } + my %items; + foreach (@items) { + my $name; + if (ref $_) { + $name = $_->{name}; + $what->{$_->{type} ||= $default_type} = 1; + } else { + $name = $_; + $_ = {name=>$_, type=>$default_type}; + $what->{$default_type} = 1; + } + warn "ExtUtils::Constant doesn't know how to handle values of type $_ used in macro $name" unless defined $XS_Constant{$_->{type}}; + if (exists $items{$name}) { + die "Multiple definitions for macro $name"; + } + $items{$name} = $_; + } + my ($use_iv, $use_nv, $use_pv) = params ($what); + + my ($body, @subs) = "static int\n$subname (const char *name"; + $body .= ", STRLEN len" unless defined $namelen; + $body .= ", IV *iv_return" if $use_iv; + $body .= ", NV *nv_return" if $use_nv; + $body .= ", const char **pv_return" if $use_pv; + $body .= ") {\n"; + + my @names = sort map {$_->{name}} @items; + my $names = << 'EOT' + /* When generated this function returned values for the list of names given + here. However, subsequent manual editing may have added or removed some. +EOT + . wrap (" ", " ", join (" ", @names) . " */") . "\n"; + + if (defined $namelen) { + # We are a child subroutine. + # Figure out what to switch on. + # (RMS, Spread of jump table, Position, Hashref) + my @best = (1e38, ~0); + foreach my $i (0 .. ($namelen - 1)) { + my ($min, $max) = (~0, 0); + my %spread; + foreach (@names) { + my $char = substr $_, $i, 1; + my $ord = ord $char; + $max = $ord if $ord > $max; + $min = $ord if $ord < $min; + push @{$spread{$char}}, $_; + # warn "$_ $char"; + } + # I'm going to pick the character to split on that minimises the root + # mean square of the number of names in each case. Normally this should + # be the one with the most keys, but it may pick a 7 where the 8 has + # one long linear search. I'm not sure if RMS or just sum of squares is + # actually better. + # $max and $min are for the tie-breaker if the root mean squares match. + # Assuming that the compiler may be building a jump table for the + # switch() then try to minimise the size of that jump table. + # Finally use < not <= so that if it still ties the earliest part of + # the string wins. Because if that passes but the memEQ fails, it may + # only need the start of the string to bin the choice. + # I think. But I'm micro-optimising. :-) + my $ss; + $ss += @$_ * @$_ foreach values %spread; + my $rms = sqrt ($ss / keys %spread); + if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) { + @best = ($rms, $max - $min, $i, \%spread); + } + } + die "Internal error. Failed to pick a switch point for @names" + unless defined $best[2]; + # use Data::Dumper; print Dumper (@best); + my ($offset, $best) = @best[2,3]; + $body .= " /* Names all of length $namelen. */\n"; + $body .= $names; + $body .= " /* Offset $offset gives the best switch position. */\n"; + $body .= " switch (name[$offset]) {\n"; + foreach my $char (sort keys %$best) { + $body .= " case '" . C_stringify ($char) . "':\n"; + foreach my $name (sort @{$best->{$char}}) { + my $thisone = $items{$name}; + my ($value, $macro) = (@$thisone{qw (value macro)}); + $value = $name unless defined $value; + $macro = $name unless defined $macro; + + $body .= memEQ_clause ($name, $offset); # We have checked this offset. + $body .= return_clause ($value, $thisone->{type}, undef, $macro); + $body .= " }\n"; + } + $body .= " break;\n"; + } + $body .= " }\n"; + } else { + # We are the top level. + $body .= " /* Initially switch on the length of the name. */\n"; + $body .= $names; + $body .= " switch (len) {\n"; + # Need to group names of the same length + my @by_length; + foreach (@items) { + push @{$by_length[length $_->{name}]}, $_; + } + foreach my $i (0 .. $#by_length) { + next unless $by_length[$i]; # None of this length + $body .= " case $i:\n"; + if (@{$by_length[$i]} == 1) { + my $thisone = $by_length[$i]->[0]; + my ($name, $value, $macro) = (@$thisone{qw (name value macro)}); + $value = $name unless defined $value; + $macro = $name unless defined $macro; + + $body .= memEQ_clause ($name); + $body .= return_clause ($value, $thisone->{type}, undef, $macro); + $body .= " }\n"; + } else { + push @subs, C_constant ("${subname}_$i", $default_type, $what, $indent, + $i, @{$by_length[$i]}); + $body .= " return ${subname}_$i (name"; + $body .= ", iv_return" if $use_iv; + $body .= ", nv_return" if $use_nv; + $body .= ", pv_return" if $use_pv; + $body .= ");\n"; + } + $body .= " break;\n"; + } + $body .= " }\n"; + } + $body .= " return PERL_constant_NOTFOUND;\n}\n"; + return (@subs, $body); +} + +=item XS_constant PACKAGE, TYPES, SUBNAME, C_SUBNAME + +A function to generate the XS code to implement the perl subroutine +I<PACKAGE>::constant used by I<PACKAGE>::AUTOLOAD to load constants. +This XS code is a wrapper around a C subroutine usually generated by +C<C_constant>, and usually named C<constant>. + +I<TYPES> should be given either as a comma separated list of types that the +C subroutine C<constant> will generate or as a reference to a hash. It should +be the same list of types as C<C_constant> was given. +[Otherwise C<XS_constant> and C<C_constant> may have different ideas about +the number of parameters passed to the C function C<constant>] + +You can call the perl visible subroutine something other than C<constant> if +you give the parameter I<SUBNAME>. The C subroutine it calls defaults to the +the name of the perl visible subroutine, unless you give the parameter +I<C_SUBNAME>. + +=cut + +sub XS_constant { + my $package = shift; + my $what = shift; + my $subname = shift; + my $C_subname = shift; + $subname ||= 'constant'; + $C_subname ||= $subname; + + if (!ref $what) { + # Convert line of the form IV,UV,NV to hash + $what = {map {$_ => 1} split /,\s*/, ($what)}; + } + my ($use_iv, $use_nv, $use_pv) = params ($what); + my $type; + + my $xs = <<"EOT"; +void +$subname(sv) + PREINIT: +#ifdef dXSTARG + dXSTARG; /* Faster if we have it. */ +#else + dTARGET; +#endif + STRLEN len; + int type; +EOT + + if ($use_iv) { + $xs .= " IV iv;\n"; + } else { + $xs .= " /* IV\t\tiv;\tUncomment this if you need to return IVs */\n"; + } + if ($use_nv) { + $xs .= " NV nv;\n"; + } else { + $xs .= " /* NV\t\tnv;\tUncomment this if you need to return NVs */\n"; + } + if ($use_pv) { + $xs .= " const char *pv;\n"; + } else { + $xs .= + " /* const char\t*pv;\tUncomment this if you need to return PVs */\n"; + } + + $xs .= << 'EOT'; + INPUT: + SV * sv; + const char * s = SvPV(sv, len); + PPCODE: +EOT + + if ($use_iv xor $use_nv) { + $xs .= << "EOT"; + /* Change this to $C_subname(s, len, &iv, &nv); + if you need to return both NVs and IVs */ +EOT + } + $xs .= " type = $C_subname(s, len"; + $xs .= ', &iv' if $use_iv; + $xs .= ', &nv' if $use_nv; + $xs .= ', &pv' if $use_pv; + $xs .= ");\n"; + + $xs .= << "EOT"; + /* Return 1 or 2 items. First is error message, or undef if no error. + Second, if present, is found value */ + switch (type) { + case PERL_constant_NOTFOUND: + sv = sv_2mortal(newSVpvf("%s is not a valid $package macro", s)); + break; + case PERL_constant_NOTDEF: + sv = sv_2mortal(newSVpvf( + "Your vendor has not defined $package macro %s used", s)); + break; +EOT + + foreach $type (sort keys %XS_Constant) { + $xs .= "\t/* Uncomment this if you need to return ${type}s\n" + unless $what->{$type}; + $xs .= << "EOT"; + case PERL_constant_IS$type: + EXTEND(SP, 1); + PUSHs(&PL_sv_undef); + $XS_Constant{$type}; + break; +EOT + unless ($what->{$type}) { + chop $xs; # Yes, another need for chop not chomp. + $xs .= " */\n"; + } + } + $xs .= << "EOT"; + default: + sv = sv_2mortal(newSVpvf( + "Unexpected return type %d while processing $package macro %s used", + type, s)); + } +EOT + + return $xs; +} + + +=item autoload PACKAGE, VERSION + +A function to generate the AUTOLOAD subroutine for the module I<PACKAGE> +I<VERSION> is the perl version the code should be backwards compatible with. +It defaults to the version of perl running the subroutine. + +=cut + +sub autoload { + my ($module, $compat_version) = @_; + $compat_version ||= $]; + croak "Can't maintain compatibility back as far as version $compat_version" + if $compat_version < 5; + my $tmp = ( $compat_version < 5.006 ? "" : "our \$AUTOLOAD;" ); + return <<"END"; +sub AUTOLOAD { + # This AUTOLOAD is used to 'autoload' constants from the constant() + # XS function. If a constant is not found then control is passed + # to the AUTOLOAD in AutoLoader. + + my \$constname; + $tmp + (\$constname = \$AUTOLOAD) =~ s/.*:://; + croak "&${module}::constant not defined" if \$constname eq 'constant'; + my (\$error, \$val) = constant(\$constname); + if (\$error) { + if (\$error =~ /is not a valid/) { + \$AutoLoader::AUTOLOAD = \$AUTOLOAD; + goto &AutoLoader::AUTOLOAD; + } else { + croak \$error; + } + } + { + no strict 'refs'; + # Fixed between 5.005_53 and 5.005_61 +#XXX if (\$] >= 5.00561) { +#XXX *\$AUTOLOAD = sub () { \$val }; +#XXX } +#XXX else { + *\$AUTOLOAD = sub { \$val }; +#XXX } + } + goto &\$AUTOLOAD; +} + +END + +} +1; +__END__ + +=back + +=head1 AUTHOR + +Nicholas Clark <nick@ccl4.org> based on the code in C<h2xs> by Larry Wall and +others + +=cut diff --git a/lib/File/Basename.pm b/lib/File/Basename.pm index 14522437e9..cc124744ca 100644 --- a/lib/File/Basename.pm +++ b/lib/File/Basename.pm @@ -213,8 +213,8 @@ sub fileparse { } $tail .= $taint if defined $tail; # avoid warning if $tail == undef - wantarray ? ($basename . $taint, $dirpath . $taint, $tail) - : $basename . $taint; + wantarray ? ($basename .= $taint, $dirpath .= $taint, $tail) + : $basename .= $taint; } diff --git a/lib/I18N/LangTags.pm b/lib/I18N/LangTags.pm new file mode 100644 index 0000000000..f5db28231d --- /dev/null +++ b/lib/I18N/LangTags.pm @@ -0,0 +1,620 @@ + +# Time-stamp: "2001-05-27 19:53:11 MDT" +# Sean M. Burke <sburke@cpan.org> + +require 5.000; +package I18N::LangTags; +use strict; +use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION); # $Debug +require Exporter; +# $Debug = 0; +@ISA = qw(Exporter); +@EXPORT = qw(); +@EXPORT_OK = qw(is_language_tag same_language_tag + extract_language_tags super_languages + similarity_language_tag is_dialect_of + locale2language_tag alternate_language_tags + encode_language_tag + ); + +$VERSION = "0.22"; + +=head1 NAME + +I18N::LangTags - functions for dealing with RFC3066-style language tags + +=head1 SYNOPSIS + + use I18N::LangTags qw(is_language_tag same_language_tag + extract_language_tags super_languages + similarity_language_tag is_dialect_of + locale2language_tag alternate_language_tags + encode_language_tag + ); + +...or whatever of those functions you want to import. Those are +all the exportable functions -- you're free to import only some, +or none at all. By default, none are imported. + +If you don't import any of these functions, assume a C<&I18N::LangTags::> +in front of all the function names in the following examples. + +=head1 DESCRIPTION + +Language tags are a formalism, described in RFC 3066 (obsoleting +1766), for declaring what language form (language and possibly +dialect) a given chunk of information is in. + +This library provides functions for common tasks involving language +tags as they are needed in a variety of protocols and applications. + +Please see the "See Also" references for a thorough explanation +of how to correctly use language tags. + +=over + +=cut + +########################################################################### + +=item * the function is_language_tag($lang1) + +Returns true iff $lang1 is a formally valid language tag. + + is_language_tag("fr") is TRUE + is_language_tag("x-jicarilla") is FALSE + (Subtags can be 8 chars long at most -- 'jicarilla' is 9) + + is_language_tag("sgn-US") is TRUE + (That's American Sign Language) + + is_language_tag("i-Klikitat") is TRUE + (True without regard to the fact noone has actually + registered Klikitat -- it's a formally valid tag) + + is_language_tag("fr-patois") is TRUE + (Formally valid -- altho descriptively weak!) + + is_language_tag("Spanish") is FALSE + is_language_tag("french-patois") is FALSE + (No good -- first subtag has to match + /^([xXiI]|[a-zA-Z]{2,3})$/ -- see RFC3066) + + is_language_tag("x-borg-prot2532") is TRUE + (Yes, subtags can contain digits, as of RFC3066) + +=cut + +sub is_language_tag { + + ## Changes in the language tagging standards may have to be reflected here. + + my($tag) = lc($_[0]); + + return 0 if $tag eq "i" or $tag eq "x"; + # Bad degenerate cases the following + # regexp would erroneously let pass + + return $tag =~ + /^(?: # First subtag + [xi] | [a-z]{2,3} + ) + (?: # Subtags thereafter + - # separator + [a-z0-9]{1,8} # subtag + )* + $/xs ? 1 : 0; +} + +########################################################################### + +=item * the function extract_language_tags($whatever) + +Returns a list of whatever looks like formally valid language tags +in $whatever. Not very smart, so don't get too creative with +what you want to feed it. + + extract_language_tags("fr, fr-ca, i-mingo") + returns: ('fr', 'fr-ca', 'i-mingo') + + extract_language_tags("It's like this: I'm in fr -- French!") + returns: ('It', 'in', 'fr') + (So don't just feed it any old thing.) + +The output is untainted. If you don't know what tainting is, +don't worry about it. + +=cut + +sub extract_language_tags { + + ## Changes in the language tagging standards may have to be reflected here. + + my($text) = + $_[0] =~ m/(.+)/ # to make for an untainted result + ? $1 : '' + ; + + return grep(!m/^[ixIX]$/s, # 'i' and 'x' aren't good tags + $text =~ + m/ + \b + (?: # First subtag + [iIxX] | [a-zA-Z]{2,3} + ) + (?: # Subtags thereafter + - # separator + [a-zA-Z0-9]{1,8} # subtag + )* + \b + /xsg + ); +} + +########################################################################### + +=item * the function same_language_tag($lang1, $lang2) + +Returns true iff $lang1 and $lang2 are acceptable variant tags +representing the same language-form. + + same_language_tag('x-kadara', 'i-kadara') is TRUE + (The x/i- alternation doesn't matter) + same_language_tag('X-KADARA', 'i-kadara') is TRUE + (...and neither does case) + same_language_tag('en', 'en-US') is FALSE + (all-English is not the SAME as US English) + same_language_tag('x-kadara', 'x-kadar') is FALSE + (these are totally unrelated tags) + +C<same_language_tag> works by just seeing whether +C<encode_language_tag($lang1)> is the same as +C<encode_language_tag($lang2)>. + +(Yes, I know this function is named a bit oddly. Call it historic +reasons.) + +=cut + +sub same_language_tag { + my $el1 = &encode_language_tag($_[0]); + return 0 unless defined $el1; + # this avoids the problem of + # encode_language_tag($lang1) eq and encode_language_tag($lang2) + # being true if $lang1 and $lang2 are both undef + + return $el1 eq &encode_language_tag($_[1]) ? 1 : 0; +} + +########################################################################### + +=item * the function similarity_language_tag($lang1, $lang2) + +Returns an integer representing the degree of similarity between +tags $lang1 and $lang2 (the order of which does not matter), where +similarity is the number of common elements on the left, +without regard to case and to x/i- alternation. + + similarity_language_tag('fr', 'fr-ca') is 1 + (one element in common) + similarity_language_tag('fr-ca', 'fr-FR') is 1 + (one element in common) + + similarity_language_tag('fr-CA-joual', + 'fr-CA-PEI') is 2 + similarity_language_tag('fr-CA-joual', 'fr-CA') is 2 + (two elements in common) + + similarity_language_tag('x-kadara', 'i-kadara') is 1 + (x/i- doesn't matter) + + similarity_language_tag('en', 'x-kadar') is 0 + similarity_language_tag('x-kadara', 'x-kadar') is 0 + (unrelated tags -- no similarity) + + similarity_language_tag('i-cree-syllabic', + 'i-cherokee-syllabic') is 0 + (no B<leftmost> elements in common!) + +=cut + +sub similarity_language_tag { + my $lang1 = &encode_language_tag($_[0]); + my $lang2 = &encode_language_tag($_[1]); + + # NB: (i-sil-...)? (i-sgn-...)? + + return undef if !defined($lang1) and !defined($lang2); + return 0 if !defined($lang1) or !defined($lang2); + + my @l1_subtags = split('-', $lang1); + my @l2_subtags = split('-', $lang2); + my $similarity = 0; + + while(@l1_subtags and @l2_subtags) { + if(shift(@l1_subtags) eq shift(@l2_subtags)) { + ++$similarity; + } else { + last; + } + } + return $similarity; +} + +########################################################################### + +=item * the function is_dialect_of($lang1, $lang2) + +Returns true iff language tag $lang1 represents a subdialect of +language tag $lang2. + +B<Get the order right! It doesn't work the other way around!> + + is_dialect_of('en-US', 'en') is TRUE + (American English IS a dialect of all-English) + + is_dialect_of('fr-CA-joual', 'fr-CA') is TRUE + is_dialect_of('fr-CA-joual', 'fr') is TRUE + (Joual is a dialect of (a dialect of) French) + + is_dialect_of('en', 'en-US') is FALSE + (all-English is a NOT dialect of American English) + + is_dialect_of('fr', 'en-CA') is FALSE + + is_dialect_of('en', 'en' ) is TRUE + is_dialect_of('en-US', 'en-US') is TRUE + (B<Note:> these are degenerate cases) + + is_dialect_of('i-mingo-tom', 'x-Mingo') is TRUE + (the x/i thing doesn't matter, nor does case) + +=cut + +sub is_dialect_of { + + my $lang1 = &encode_language_tag($_[0]); + my $lang2 = &encode_language_tag($_[1]); + + return undef if !defined($lang1) and !defined($lang2); + return 0 if !defined($lang1) or !defined($lang2); + + return 1 if $lang1 eq $lang2; + return 0 if length($lang1) < length($lang2); + + $lang1 .= '-'; + $lang2 .= '-'; + return + (substr($lang1, 0, length($lang2)) eq $lang2) ? 1 : 0; +} + +########################################################################### + +=item * the function super_languages($lang1) + +Returns a list of language tags that are superordinate tags to $lang1 +-- it gets this by removing subtags from the end of $lang1 until +nothing (or just "i" or "x") is left. + + super_languages("fr-CA-joual") is ("fr-CA", "fr") + + super_languages("en-AU") is ("en") + + super_languages("en") is empty-list, () + + super_languages("i-cherokee") is empty-list, () + ...not ("i"), which would be illegal as well as pointless. + +If $lang1 is not a valid language tag, returns empty-list in +a list context, undef in a scalar context. + +A notable and rather unavoidable problem with this method: +"x-mingo-tom" has an "x" because the whole tag isn't an +IANA-registered tag -- but super_languages('x-mingo-tom') is +('x-mingo') -- which isn't really right, since 'i-mingo' is +registered. But this module has no way of knowing that. (But note +that same_language_tag('x-mingo', 'i-mingo') is TRUE.) + +More importantly, you assume I<at your peril> that superordinates of +$lang1 are mutually intelligible with $lang1. Consider this +carefully. + +=cut + +sub super_languages { + my $lang1 = $_[0]; + return() unless defined($lang1) && &is_language_tag($lang1); + my @l1_subtags = split('-', $lang1); + + ## Changes in the language tagging standards may have to be reflected here. + + # NB: (i-sil-...)? + + my @supers = (); + foreach my $bit (@l1_subtags) { + push @supers, + scalar(@supers) ? ($supers[-1] . '-' . $bit) : $bit; + } + pop @supers if @supers; + shift @supers if @supers && $supers[0] =~ m<^[iIxX]$>s; + return reverse @supers; +} + +########################################################################### + +=item * the function locale2language_tag($locale_identifier) + +This takes a locale name (like "en", "en_US", or "en_US.ISO8859-1") +and maps it to a language tag. If it's not mappable (as with, +notably, "C" and "POSIX"), this returns empty-list in a list context, +or undef in a scalar context. + + locale2language_tag("en") is "en" + + locale2language_tag("en_US") is "en-US" + + locale2language_tag("en_US.ISO8859-1") is "en-US" + + locale2language_tag("C") is undef or () + + locale2language_tag("POSIX") is undef or () + + locale2language_tag("POSIX") is undef or () + +I'm not totally sure that locale names map satisfactorily to language +tags. Think REAL hard about how you use this. YOU HAVE BEEN WARNED. + +The output is untainted. If you don't know what tainting is, +don't worry about it. + +=cut + +sub locale2language_tag { + my $lang = + $_[0] =~ m/(.+)/ # to make for an untainted result + ? $1 : '' + ; + + return $lang if &is_language_tag($lang); # like "en" + + $lang =~ tr<_><->; # "en_US" -> en-US + $lang =~ s<\.[-_a-zA-Z0-9\.]*><>s; # "en_US.ISO8859-1" -> en-US + + return $lang if &is_language_tag($lang); + + return; +} + +########################################################################### + +=item * the function encode_language_tag($lang1) + +This function, if given a language tag, returns an encoding of it such +that: + +* tags representing different languages never get the same encoding. + +* tags representing the same language always get the same encoding. + +* an encoding of a formally valid language tag always is a string +value that is defined, has length, and is true if considered as a +boolean. + +Note that the encoding itself is B<not> a formally valid language tag. +Note also that you cannot, currently, go from an encoding back to a +language tag that it's an encoding of. + +Note also that you B<must> consider the encoded value as atomic; i.e., +you should not consider it as anything but an opaque, unanalysable +string value. (The internals of the encoding method may change in +future versions, as the language tagging standard changes over time.) + +C<encode_language_tag> returns undef if given anything other than a +formally valid language tag. + +The reason C<encode_language_tag> exists is because different language +tags may represent the same language; this is normally treatable with +C<same_language_tag>, but consider this situation: + +You have a data file that expresses greetings in different languages. +Its format is "[language tag]=[how to say 'Hello']", like: + + en-US=Hiho + fr=Bonjour + i-mingo=Hau' + +And suppose you write a program that reads that file and then runs as +a daemon, answering client requests that specify a language tag and +then expect the string that says how to greet in that language. So an +interaction looks like: + + greeting-client asks: fr + greeting-server answers: Bonjour + +So far so good. But suppose the way you're implementing this is: + + my %greetings; + die unless open(IN, "<in.dat"); + while(<IN>) { + chomp; + next unless /^([^=]+)=(.+)/s; + my($lang, $expr) = ($1, $2); + $greetings{$lang} = $expr; + } + close(IN); + +at which point %greetings has the contents: + + "en-US" => "Hiho" + "fr" => "Bonjour" + "i-mingo" => "Hau'" + +And suppose then that you answer client requests for language $wanted +by just looking up $greetings{$wanted}. + +If the client asks for "fr", that will look up successfully in +%greetings, to the value "Bonjour". And if the client asks for +"i-mingo", that will look up successfully in %greetings, to the value +"Hau'". + +But if the client asks for "i-Mingo" or "x-mingo", or "Fr", then the +lookup in %greetings fails. That's the Wrong Thing. + +You could instead do lookups on $wanted with: + + use I18N::LangTags qw(same_language_tag); + my $repsonse = ''; + foreach my $l2 (keys %greetings) { + if(same_language_tag($wanted, $l2)) { + $response = $greetings{$l2}; + last; + } + } + +But that's rather inefficient. A better way to do it is to start your +program with: + + use I18N::LangTags qw(encode_language_tag); + my %greetings; + die unless open(IN, "<in.dat"); + while(<IN>) { + chomp; + next unless /^([^=]+)=(.+)/s; + my($lang, $expr) = ($1, $2); + $greetings{ + encode_language_tag($lang) + } = $expr; + } + close(IN); + +and then just answer client requests for language $wanted by just +looking up + + $greetings{encode_language_tag($wanted)} + +And that does the Right Thing. + +=cut + +sub encode_language_tag { + # Only similarity_language_tag() is allowed to analyse encodings! + + ## Changes in the language tagging standards may have to be reflected here. + + my($tag) = uc($_[0]); # smash case + return undef unless &is_language_tag($tag); + # If it's not a language tag, its encoding is undef + + $tag =~ s/^[xiXI]-//s; + # Just lop off any leading "x/i-" + # Or I suppose I could do s/^[xiXI]-/_/s or something. + + return "~$tag"; +} + +#-------------------------------------------------------------------------- + +=item * the function alternate_language_tags($lang1) + +This function, if given a language tag, returns all language tags that +are alternate forms of this language tag. (There is little +alternation in the C<current> language tagging formalism, but +extensions to the formalism are under consideration which could add a +great deal of alternation.) + +Examples from the current formalism: + + alternate_language_tags('en') is () + alternate_language_tags('x-mingo-tom') is ('i-mingo-tom') + alternate_language_tags('x-klikitat') is ('i-klikitat') + alternate_language_tags('i-klikitat') is ('x-klikitat') + +This function returns undef if given anything other than a formally +valid language tag. + +=cut + +my %alt = qw( i x x i I X X I ); +sub alternate_language_tags { + ## Changes in the language tagging standards may have to be reflected here. + my $tag = $_[0]; + return() unless &is_language_tag($tag); + + # might as well preserve case + + if($tag =~ /^([XIxi])(-.+)/) { + # This handles all the alternation that exists CURRENTLY + return($alt{$1} . $2); + } + return(); +} + +########################################################################### + +=back + +=head1 ABOUT LOWERCASING + +I've considered making all the above functions that output language +tags return all those tags strictly in lowercase. Having all your +language tags in lowercase does make some things easier. But you +might as well just lowercase as you like, or call +C<encode_language_tag($lang1)> where appropriate. + +=head1 ABOUT UNICODE PLAINTEXT LANGUAGE TAGS + +In some future version of I18N::LangTags, I plan to include support +for RFC2482-style language tags -- which are basically just normal +language tags with their ASCII characters shifted into Plane 14. + +=head1 SEE ALSO + +* L<I18N::LangTags::List|I18N::LangTags::List> + +* RFC 3066, C<ftp://ftp.isi.edu/in-notes/rfc3066.txt>, "Tags for the +Identification of Languages". (Obsoletes RFC 1766) + +* RFC 2277, C<ftp://ftp.isi.edu/in-notes/rfc2277.txt>, "IETF Policy on +Character Sets and Languages". + +* RFC 2231, C<ftp://ftp.isi.edu/in-notes/rfc2231.txt>, "MIME Parameter +Value and Encoded Word Extensions: Character Sets, Languages, and +Continuations". + +* RFC 2482, C<ftp://ftp.isi.edu/in-notes/rfc2482.txt>, +"Language Tagging in Unicode Plain Text". + +* Locale::Codes, in +C<http://www.perl.com/CPAN/modules/by-module/Locale/> + +* ISO 639, "Code for the representation of names of languages", +C<http://www.indigo.ie/egt/standards/iso639/iso639-1-en.html> + +* ISO 639-2, "Codes for the representation of names of languages", +including three-letter codes, +C<http://lcweb.loc.gov/standards/iso639-2/bibcodes.html> + +* The IANA list of registered languages (hopefully up-to-date), +C<ftp://ftp.isi.edu/in-notes/iana/assignments/languages/> + +=head1 COPYRIGHT + +Copyright (c) 1998-2001 Sean M. Burke. All rights reserved. + +This library is free software; you can redistribute it and/or +modify it under the same terms as Perl itself. + +The programs and documentation in this dist are distributed in +the hope that they will be useful, but without any warranty; without +even the implied warranty of merchantability or fitness for a +particular purpose. + +=head1 AUTHOR + +Sean M. Burke C<sburke@cpan.org> + +=cut + +1; + +__END__ diff --git a/lib/I18N/LangTags/List.pod b/lib/I18N/LangTags/List.pod new file mode 100644 index 0000000000..9bb5e07107 --- /dev/null +++ b/lib/I18N/LangTags/List.pod @@ -0,0 +1,1446 @@ +=head1 NAME + +I18n::LangTags::List -- list of tags for human languages + +=head1 SYNOPSIS + + Time-stamp: "2001-05-27 19:55:19 MDT" + [This is not a module; it is documentation] + +=head1 ABOUT LANGUAGE TAGS + +Internet language tags, as defined in RFC 3066, are a formalism +for denoting human languages. The two-letter ISO 639-1 language +codes are well known (as "en" for English), as are their forms +when qualified by a country code ("en-US"). Less well-known are the +arbitrary-length non-ISO codes (like "i-mingo"), and the +recently (in 2001) introduced three-letter ISO-639-2 codes. + +Remember this important facts: + +=over + +=item * + +Language tags are not locale IDs. A locale ID is written with a "_" +instead of a "-", (almost?) always matches C<m/^\w\w_\w\w\b/>, and +I<means> something different than a language tag. A language tag +denotes a language. A locale ID denotes a language I<as used in> +a particular place, in combination with non-linguistic +location-specific information such as what currency in used +there. Locales I<also> often denote character set information, +as in "en_US.ISO8859-1". + +=item * + +Language tags are not for computer languages. + +=item * + +"Dialect" is not a useful term, since there is no objective +criterion for establishing when two languages are +dialects of eachother, or are separate languages. + +=item * + +Language tags are not case-sensitive. en-US, en-us, En-Us, etc., +are all the same tag, and denote the same language. + +=item * + +Not every language tag really refers to a single language. Some +language tags refer to conditions: i-default (system-message text +in English plus maybe other languages), und (undetermined +language). Others (notably lots of the three-letter codes) are +bibliographic tags that classify whole groups of languages, as +with cus "Cushitic (Other)" (i.e., a +language that has been classed as Cushtic, but which has no more +specific code) or the even less linguistically coherent +sai for "South American Indian (Other)". While useful in +bibliography, B<SUCH TAGS ARE NOT +FOR GENERAL USE>. For further guidance, email me. + +=item * + +Language tags are not country codes. In fact, they are often +distinct codes, as with language tag ja for Japanese, and +ISO 3166 country code C<.jp> for Japan. + +=back + +=head1 LIST OF LANGUAGES + +The first part of each item is the language tag, between +{...} and in italic characters. It +is followed by an English name for the language or language-group. +Language tags that I judge to be not for general use, are bracketed. + +This list is in alphabetical order by English name of the language. + +=over + +=item I<{ab}> : Abkhazian + +eq Abkhaz + +=item I<{ace}> : Achinese + +=item I<{ach}> : Acoli + +=item I<{ada}> : Adangme + +=item I<{aa}> : Afar + +=item I<{afh}> : Afrihili + +(Artificial) + +=item I<{af}> : Afrikaans + +=item [I<{afa}> : Afro-Asiatic (Other)] + +=item I<{aka}> : Akan + +=item I<{akk}> : Akkadian + +(Historical) + +=item I<{sq}> : Albanian + +=item I<{ale}> : Aleut + +=item [I<{alg}> : Algonquian languages] + +NOT Algonquin! + +=item [I<{tut}> : Altaic (Other)] + +=item I<{am}> : Amharic + +NOT Aramaic! + +=item I<{i-ami}> : Ami + +eq Amis. eq 'Amis. eq Pangca. + +=item [I<{apa}> : Apache languages] + +=item I<{ar}> : Arabic + +Many forms are mutually un-intelligible in spoken media. +Notable forms: +ar-ae +ar-bh +ar-dz +ar-eg +ar-iq +ar-jo +ar-kw +ar-lb +ar-ly +ar-ma +ar-om +ar-qa +ar-sa +ar-sy +ar-tn +ar-ye. + +=item I<{arc}> : Aramaic + +NOT Amharic! NOT Samaritan Aramaic! + +=item I<{arp}> : Arapaho + +=item I<{arn}> : Araucanian + +=item I<{arw}> : Arawak + +=item I<{hy}> : Armenian + +=item [I<{art}> : Artificial (Other)] + +=item I<{as}> : Assamese + +=item [I<{ath}> : Athapascan languages] + +eq Athabaskan. eq Athapaskan. eq Athabascan. + +=item [I<{aus}> : Australian languages] + +=item [I<{map}> : Austronesian (Other)] + +=item I<{ava}> : Avaric + +=item I<{ae}> : Avestan + +eq Zend + +=item I<{awa}> : Awadhi + +=item I<{ay}> : Aymara + +=item I<{az}> : Azerbaijani + +eq Azeri + +=item I<{ban}> : Balinese + +=item [I<{bat}> : Baltic (Other)] + +=item I<{bal}> : Baluchi + +=item I<{bam}> : Bambara + +=item [I<{bai}> : Bamileke languages] + +=item I<{bad}> : Banda + +=item [I<{bnt}> : Bantu (Other)] + +=item I<{bas}> : Basa + +=item I<{ba}> : Bashkir + +=item I<{eu}> : Basque + +=item I<{btk}> : Batak (Indonesia) + +=item I<{bej}> : Beja + +=item I<{be}> : Belarusian + +eq Belarussian. eq Byelarussian. +eq Belorussian. eq Byelorussian. +eq White Russian. eq White Ruthenian. +NOT Ruthenian! + +=item I<{bem}> : Bemba + +=item I<{bn}> : Bengali + +=item [I<{ber}> : Berber (Other)] + +=item I<{bho}> : Bhojpuri + +=item I<{bh}> : Bihari + +=item I<{bik}> : Bikol + +=item I<{bin}> : Bini + +=item I<{bi}> : Bislama + +=item I<{bs}> : Bosnian + +=item I<{bra}> : Braj + +=item I<{br}> : Breton + +=item I<{bug}> : Buginese + +=item I<{bg}> : Bulgarian + +=item I<{i-bnn}> : Bunun + +=item I<{bua}> : Buriat + +=item I<{my}> : Burmese + +=item I<{cad}> : Caddo + +=item I<{car}> : Carib + +=item I<{ca}> : Catalan + +eq CatalE<aacute>n. eq Catalonian. + +=item [I<{cau}> : Caucasian (Other)] + +=item I<{ceb}> : Cebuano + +=item [I<{cel}> : Celtic (Other)] + +Notable forms: cel-gaulish. + +=item [I<{cai}> : Central American Indian (Other)] + +=item I<{chg}> : Chagatai + +(Historical?) + +=item [I<{cmc}> : Chamic languages] + +=item I<{ch}> : Chamorro + +=item I<{ce}> : Chechen + +=item I<{chr}> : Cherokee + +eq Tsalagi + +=item I<{chy}> : Cheyenne + +=item I<{chb}> : Chibcha + +(Historical) NOT Chibchan (which is a language family). + +=item I<{ny}> : Chichewa + +eq Nyanja. eq Chinyanja. + +=item I<{zh}> : Chinese + +Many forms are mutually un-intelligible in spoken media. +Notable subforms: +zh-cn (PRC Chinese), +zh-hk (Hong Kong Chinese), +zh-mo (Macau Chinese), +zh-sg (Singapore Chinese), +zh-tw (Taiwan Chinese), +zh-guoyu (Putonghua/Guoyu/Mandarin), +zh-hakka (Hakka; formerly i-hakka), +zh-min (Hokkien), +zh-min-nan (Southern Hokkien), +zh-wuu (Shanghaiese), +zh-xiang (Hunanese), +zh-yue (Cantonese). + +=item I<{chn}> : Chinook Jargon + +eq Chinook Wawa. + +=item I<{chp}> : Chipewyan + +=item I<{cho}> : Choctaw + +=item I<{cu}> : Church Slavic + +eq Old Church Slavonic. + +=item I<{chk}> : Chuukese + +eq Trukese. eq Chuuk. eq Truk. eq Ruk. + +=item I<{cv}> : Chuvash + +=item I<{cop}> : Coptic + +=item I<{kw}> : Cornish + +=item I<{co}> : Corsican + +eq Corse. + +=item I<{cre}> : Cree + +NOT Creek! + +=item I<{mus}> : Creek + +NOT Cree! + +=item [I<{cpe}> : English-based Creoles and pidgins (Other)] + +=item [I<{cpf}> : French-based Creoles and pidgins (Other)] + +=item [I<{cpp}> : Portuguese-based Creoles and pidgins (Other)] + +=item [I<{crp}> : Creoles and pidgins (Other)] + +=item I<{hr}> : Croatian + +eq Croat. + +=item [I<{cus}> : Cushitic (Other)] + +=item I<{cs}> : Czech + +=item I<{dak}> : Dakota + +eq Nakota. eq Latoka. + +=item I<{da}> : Danish + +=item I<{day}> : Dayak + +=item I<{i-default}> : Default (Fallthru) Language + +Defined in RFC 2277, this is for tagging text +(which must include English text, and might/should include text +in other appropriate languages) that is emitted in a context +where language-negotiation wasn't possible -- in SMTP mail failure +messages, for example. + +=item I<{del}> : Delaware + +=item I<{din}> : Dinka + +=item I<{div}> : Divehi + +=item I<{doi}> : Dogri + +NOT Dogrib! + +=item I<{dgr}> : Dogrib + +NOT Dogri! + +=item [I<{dra}> : Dravidian (Other)] + +=item I<{dua}> : Duala + +=item I<{nl}> : Dutch + +eq Netherlander. Notable forms: nl-nl, nl-be. + +=item I<{dum}> : Middle Dutch (ca.1050-1350) + +(Historical) + +=item I<{dyu}> : Dyula + +=item I<{dz}> : Dzongkha + +=item I<{efi}> : Efik + +=item I<{egy}> : Ancient Egyptian + +(Historical) + +=item I<{eka}> : Ekajuk + +=item I<{elx}> : Elamite + +(Historical) + +=item I<{en}> : English + +Notable forms: +en-au +en-bz +en-ca +en-gb +en-ie +en-jm +en-nz +en-ph +en-tt +en-us +en-za +en-zw. + +=item I<{enm}> : Old English (1100-1500) + +(Historical) + +=item I<{ang}> : Old English (ca.450-1100) + +eq Anglo-Saxon. (Historical) + +=item I<{eo}> : Esperanto + +(Artificial) + +=item I<{et}> : Estonian + +=item I<{ewe}> : Ewe + +=item I<{ewo}> : Ewondo + +=item I<{fan}> : Fang + +=item I<{fat}> : Fanti + +=item I<{fo}> : Faroese + +=item I<{fj}> : Fijian + +=item I<{fi}> : Finnish + +=item [I<{fiu}> : Finno-Ugrian (Other)] + +eq Finno-Ugric. NOT Ugaritic! + +=item I<{fon}> : Fon + +=item I<{fr}> : French + +Notable forms: +fr-fr +fr-be +fr-ca +fr-ch +fr-lu +fr-mc. + +=item I<{frm}> : Middle French (ca.1400-1600) + +(Historical) + +=item I<{fro}> : Old French (842-ca.1400) + +(Historical) + +=item I<{fy}> : Frisian + +=item I<{fur}> : Friulian + +=item I<{ful}> : Fulah + +=item I<{gaa}> : Ga + +=item I<{gd}> : Scots Gaelic + +NOT Scots! + +=item I<{gl}> : Gallegan + +eq Galician + +=item I<{lug}> : Ganda + +=item I<{gay}> : Gayo + +=item I<{gba}> : Gbaya + +=item I<{gez}> : Geez + +eq Ge'ez + +=item I<{ka}> : Georgian + +=item I<{de}> : German + +Notable forms: de-at +de-be +de-ch +de-de +de-li +de-lu. + +=item I<{gmh}> : Middle High German (ca.1050-1500) + +(Historical) + +=item I<{goh}> : Old High German (ca.750-1050) + +(Historical) + +=item [I<{gem}> : Germanic (Other)] + +=item I<{gil}> : Gilbertese + +=item I<{gon}> : Gondi + +=item I<{gor}> : Gorontalo + +=item I<{got}> : Gothic + +(Historical) + +=item I<{grb}> : Grebo + +=item I<{grc}> : Ancient Greek (to 1453) + +(Historical) + +=item I<{el}> : Modern Greek (1453-) + +=item I<{gn}> : Guarani + +GuaranE<iacute> + +=item I<{gu}> : Gujarati + +=item I<{gwi}> : Gwich'in + +eq Gwichin + +=item I<{hai}> : Haida + +=item I<{ha}> : Hausa + +=item I<{haw}> : Hawaiian + +Hawai'ian + +=item I<{he}> : Hebrew + +(Formerly "iw".) + +=item I<{hz}> : Herero + +=item I<{hil}> : Hiligaynon + +=item I<{him}> : Himachali + +=item I<{hi}> : Hindi + +=item I<{ho}> : Hiri Motu + +=item I<{hit}> : Hittite + +(Historical) + +=item I<{hmn}> : Hmong + +=item I<{hu}> : Hungarian + +=item I<{hup}> : Hupa + +=item I<{iba}> : Iban + +=item I<{is}> : Icelandic + +=item I<{ibo}> : Igbo + +=item I<{ijo}> : Ijo + +=item I<{ilo}> : Iloko + +=item [I<{inc}> : Indic (Other)] + +=item [I<{ine}> : Indo-European (Other)] + +=item I<{id}> : Indonesian + +(Formerly "in".) + +=item I<{ia}> : Interlingua (International Auxiliary Language Association) + +(Artificial) NOT Interlingue! + +=item I<{ie}> : Interlingue + +(Artificial) NOT Interlingua! + +=item I<{iu}> : Inuktitut + +A subform of "Eskimo". + +=item I<{ik}> : Inupiaq + +A subform of "Eskimo". + +=item [I<{ira}> : Iranian (Other)] + +=item I<{ga}> : Irish + +=item I<{mga}> : Middle Irish (900-1200) + +(Historical) + +=item I<{sga}> : Old Irish (to 900) + +(Historical) + +=item [I<{iro}> : Iroquoian languages] + +=item I<{it}> : Italian + +Notable forms: it-it, it-ch + +=item I<{ja}> : Japanese + +(NOT "jp"!) + +=item I<{jw}> : Javanese + +=item I<{jrb}> : Judeo-Arabic + +=item I<{jpr}> : Judeo-Persian + +=item I<{kab}> : Kabyle + +=item I<{kac}> : Kachin + +=item I<{kl}> : Kalaallisut + +eq Greenlandic "Eskimo" + +=item I<{kam}> : Kamba + +=item I<{kn}> : Kannada + +NOT Canadian! + +=item I<{kau}> : Kanuri + +=item I<{kaa}> : Kara-Kalpak + +=item I<{kar}> : Karen + +=item I<{ks}> : Kashmiri + +=item I<{kaw}> : Kawi + +=item I<{kk}> : Kazakh + +=item I<{kha}> : Khasi + +=item I<{km}> : Khmer + +eq Cambodian. eq Kampuchean. + +=item [I<{khi}> : Khoisan (Other)] + +=item I<{kho}> : Khotanese + +=item I<{ki}> : Kikuyu + +eq Gikuyu. + +=item I<{kmb}> : Kimbundu + +=item I<{rw}> : Kinyarwanda + +=item I<{ky}> : Kirghiz + +=item I<{i-klingon}> : Klingon + +=item I<{kv}> : Komi + +=item I<{kon}> : Kongo + +=item I<{kok}> : Konkani + +=item I<{ko}> : Korean + +=item I<{kos}> : Kosraean + +=item I<{kpe}> : Kpelle + +=item I<{kro}> : Kru + +=item I<{kj}> : Kuanyama + +=item I<{kum}> : Kumyk + +=item I<{ku}> : Kurdish + +=item I<{kru}> : Kurukh + +=item I<{kut}> : Kutenai + +=item I<{lad}> : Ladino + +eq Judeo-Spanish. NOT Ladin (a minority language in Italy). + +=item I<{lah}> : Lahnda + +NOT Lamba! + +=item I<{lam}> : Lamba + +NOT Lahnda! + +=item I<{lo}> : Lao + +=item I<{la}> : Latin + +(Historical) NOT Ladin! NOT Ladino! + +=item I<{lv}> : Latvian + +eq Lettish. + +=item I<{lb}> : Letzeburgesch + +eq Luxemburgian, eq Luxemburger. (Formerly i-lux.) + +=item I<{lez}> : Lezghian + +=item I<{ln}> : Lingala + +=item I<{lt}> : Lithuanian + +=item I<{nds}> : Low German + +eq Low Saxon. eq Low German. eq Low Saxon. + +=item I<{loz}> : Lozi + +=item I<{lub}> : Luba-Katanga + +=item I<{lua}> : Luba-Lulua + +=item I<{lui}> : Luiseno + +eq LuiseE<ntilde>o. + +=item I<{lun}> : Lunda + +=item I<{luo}> : Luo (Kenya and Tanzania) + +=item I<{lus}> : Lushai + +=item I<{mk}> : Macedonian + +eq the modern Slavic language spoken in what was Yugoslavia. +NOT the form of Greek spoken in Greek Macedonia! + +=item I<{mad}> : Madurese + +=item I<{mag}> : Magahi + +=item I<{mai}> : Maithili + +=item I<{mak}> : Makasar + +=item I<{mg}> : Malagasy + +=item I<{ms}> : Malay + +NOT Malayalam! + +=item I<{ml}> : Malayalam + +NOT Malay! + +=item I<{mt}> : Maltese + +=item I<{mnc}> : Manchu + +=item I<{mdr}> : Mandar + +NOT Mandarin! + +=item I<{man}> : Mandingo + +=item I<{mni}> : Manipuri + +=item [I<{mno}> : Manobo languages] + +=item I<{gv}> : Manx + +=item I<{mi}> : Maori + +NOT Mari! + +=item I<{mr}> : Marathi + +=item I<{chm}> : Mari + +NOT Maori! + +=item I<{mh}> : Marshall + +eq Marshallese. + +=item I<{mwr}> : Marwari + +=item I<{mas}> : Masai + +=item [I<{myn}> : Mayan languages] + +=item I<{men}> : Mende + +=item I<{mic}> : Micmac + +=item I<{min}> : Minangkabau + +=item I<{i-mingo}> : Mingo + +eq the Irquoian language West Virginia Seneca. NOT New York Seneca! + +=item [I<{mis}> : Miscellaneous languages] + +Don't use this. + +=item I<{moh}> : Mohawk + +=item I<{mo}> : Moldavian + +eq Moldovan. + +=item [I<{mkh}> : Mon-Khmer (Other)] + +=item I<{lol}> : Mongo + +=item I<{mn}> : Mongolian + +eq Mongol. + +=item I<{mos}> : Mossi + +=item [I<{mul}> : Multiple languages] + +Not for normal use. + +=item [I<{mun}> : Munda languages] + +=item I<{nah}> : Nahuatl + +=item I<{na}> : Nauru + +=item I<{nv}> : Navajo + +eq Navaho. (Formerly i-navajo.) + +=item I<{nd}> : North Ndebele + +=item I<{nr}> : South Ndebele + +=item I<{ng}> : Ndonga + +=item I<{ne}> : Nepali + +eq Nepalese. Notable forms: ne-np ne-in. + +=item I<{new}> : Newari + +=item I<{nia}> : Nias + +=item [I<{nic}> : Niger-Kordofanian (Other)] + +=item [I<{ssa}> : Nilo-Saharan (Other)] + +=item I<{niu}> : Niuean + +=item I<{non}> : Old Norse + +(Historical) + +=item [I<{nai}> : North American Indian] + +Do not use this. + +=item I<{se}> : Northern Sami + +eq Lappish. eq Lapp. eq (Northern) Saami. + +=item I<{no}> : Norwegian + +Note the two following forms: + +=item I<{nb}> : Norwegian BokmE<aring>l + +(A form of Norwegian.) (Formerly no-bok.) + +=item I<{nn}> : Norwegian Nynorsk + +(A form of Norwegian.) (Formerly no-nyn.) + +=item [I<{nub}> : Nubian languages] + +=item I<{nym}> : Nyamwezi + +=item I<{nyn}> : Nyankole + +=item I<{nyo}> : Nyoro + +=item I<{nzi}> : Nzima + +=item I<{oc}> : Occitan (post 1500) + +eq ProvenE<ccedil>al, eq Provencal + +=item I<{oji}> : Ojibwa + +eq Ojibwe. + +=item I<{or}> : Oriya + +=item I<{om}> : Oromo + +=item I<{osa}> : Osage + +=item I<{os}> : Ossetian; Ossetic + +=item [I<{oto}> : Otomian languages] + +Group of languages collectively called "OtomE<iacute>". + +=item I<{pal}> : Pahlavi + +eq Pahlevi + +=item I<{i-pwn}> : Paiwan + +eq Pariwan + +=item I<{pau}> : Palauan + +=item I<{pi}> : Pali + +(Historical?) + +=item I<{pam}> : Pampanga + +=item I<{pag}> : Pangasinan + +=item I<{pa}> : Panjabi + +eq Punjabi + +=item I<{pap}> : Papiamento + +eq Papiamentu. + +=item [I<{paa}> : Papuan (Other)] + +=item I<{fa}> : Persian + +eq Farsi. + +=item I<{peo}> : Old Persian (ca.600-400 B.C.) + +=item [I<{phi}> : Philippine (Other)] + +=item I<{phn}> : Phoenician + +(Historical) + +=item I<{pon}> : Pohnpeian + +=item I<{pl}> : Polish + +=item I<{pt}> : Portuguese + +eq Portugese. Notable forms: pt-pt pt-br. + +=item [I<{pra}> : Prakrit languages] + +=item I<{pro}> : Old ProvenE<ccedil>al (to 1500) + +eq Old Provencal. (Historical.) + +=item I<{ps}> : Pushto + +eq Pashto. eq Pushtu. + +=item I<{qu}> : Quechua + +eq Quecha. + +=item I<{rm}> : Raeto-Romance + +eq Romansh. + +=item I<{raj}> : Rajasthani + +=item I<{rap}> : Rapanui + +=item I<{rar}> : Rarotongan + +=item [I<{qaa}>-I<qtz> : Reserved for local use.] + +=item [I<{roa}> : Romance (Other)] + +NOT Romanian! NOT Romany! NOT Romansh! + +=item I<{ro}> : Romanian + +eq Rumanian. NOT Romany! + +=item I<{rom}> : Romany + +eq Rom. NOT Romanian! + +=item I<{rn}> : Rundi + +=item I<{ru}> : Russian + +NOT White Russian! NOT Rusyn! + +=item [I<{sal}> : Salishan languages] + +Large language group. + +=item I<{sam}> : Samaritan Aramaic + +NOT Aramaic! + +=item [I<{smi}> : Sami languages (Other)] + +=item I<{sm}> : Samoan + +=item I<{sad}> : Sandawe + +=item I<{sg}> : Sango + +=item I<{sa}> : Sanskrit + +(Historical) + +=item I<{sat}> : Santali + +=item I<{sc}> : Sardinian + +eq Sard. + +=item I<{sas}> : Sasak + +=item I<{sco}> : Scots + +NOT Scots Gaelic! + +=item I<{sel}> : Selkup + +=item [I<{sem}> : Semitic (Other)] + +=item I<{sr}> : Serbian + +eq Serb. NOT Sorbian. + +=item I<{srr}> : Serer + +=item I<{shn}> : Shan + +=item I<{sn}> : Shona + +=item I<{sid}> : Sidamo + +=item I<{sgn-...}> : Sign Languages + +Always use with a subtag. Notable forms: sgn-gb sgn-ie sgn-ni sgn-us. + +=item I<{bla}> : Siksika + +eq Blackfoot. eq Pikanii. + +=item I<{sd}> : Sindhi + +=item I<{si}> : Sinhalese + +eq Sinhala. + +=item [I<{sit}> : Sino-Tibetan (Other)] + +=item [I<{sio}> : Siouan languages] + +=item I<{den}> : Slave (Athapascan) + +("Slavey" is a subform.) + +=item [I<{sla}> : Slavic (Other)] + +=item I<{sk}> : Slovak + +eq Slovakian. + +=item I<{sl}> : Slovenian + +eq Slovene. + +=item I<{sog}> : Sogdian + +=item I<{so}> : Somali + +=item I<{son}> : Songhai + +=item I<{snk}> : Soninke + +=item I<{wen}> : Sorbian languages + +eq Wendish. eq Sorb. eq Lusatian. eq Wend. NOT Venda! NOT Serbian! + +=item I<{nso}> : Northern Sotho + +=item I<{st}> : Southern Sotho + +eq Sutu. eq Sesotho. + +=item [I<{sai}> : South American Indian (Other)] + +=item I<{es}> : Spanish + +Notable forms: +es-ar es-bo es-cl es-co es-do es-ec es-es es-gt +es-hn es-mx es-pa es-pe es-pr es-py es-sv es-us +es-uy es-ve + +=item I<{suk}> : Sukuma + +=item I<{sux}> : Sumerian + +(Historical) + +=item I<{su}> : Sundanese + +=item I<{sus}> : Susu + +=item I<{sw}> : Swahili + +eq Kiswahili + +=item I<{ss}> : Swati + +=item I<{sv}> : Swedish + +Notable forms: sv-se sv-fi. + +=item I<{syr}> : Syriac + +=item I<{tl}> : Tagalog + +=item I<{ty}> : Tahitian + +=item [I<{tai}> : Tai (Other)] + +NOT Thai! + +=item I<{tg}> : Tajik + +=item I<{tmh}> : Tamashek + +=item I<{ta}> : Tamil + +=item I<{i-tao}> : Tao + +eq Yami. + +=item I<{tt}> : Tatar + +=item I<{i-tay}> : Tayal + +eq Atayal. eq Atayan. + +=item I<{te}> : Telugu + +=item I<{ter}> : Tereno + +=item I<{tet}> : Tetum + +=item I<{th}> : Thai + +NOT Tai! + +=item I<{bo}> : Tibetan + +=item I<{tig}> : Tigre + +=item I<{ti}> : Tigrinya + +=item I<{tem}> : Timne + +eq Themne. eq Timene. + +=item I<{tiv}> : Tiv + +=item I<{tli}> : Tlingit + +=item I<{tpi}> : Tok Pisin + +=item I<{tkl}> : Tokelau + +=item I<{tog}> : Tonga (Nyasa) + +NOT Tsonga! + +=item I<{to}> : Tonga (Tonga Islands) + +(Pronounced "Tong-a", not "Tong-ga") + +NOT Tsonga! + +=item I<{tsi}> : Tsimshian + +eq Sm'algyax + +=item I<{ts}> : Tsonga + +NOT Tonga! + +=item I<{i-tsu}> : Tsou + +=item I<{tn}> : Tswana + +Same as Setswana. + +=item I<{tum}> : Tumbuka + +=item I<{tr}> : Turkish + +(Typically in Roman script) + +=item I<{ota}> : Ottoman Turkish (1500-1928) + +(Typically in Arabic script) (Historical) + +=item I<{tk}> : Turkmen + +eq Turkmeni. + +=item I<{tvl}> : Tuvalu + +=item I<{tyv}> : Tuvinian + +eq Tuvan. eq Tuvin. + +=item I<{tw}> : Twi + +=item I<{uga}> : Ugaritic + +NOT Ugric! + +=item I<{ug}> : Uighur + +=item I<{uk}> : Ukrainian + +=item I<{umb}> : Umbundu + +=item I<{und}> : Undetermined + +Not a tag for normal use. + +=item I<{ur}> : Urdu + +=item I<{uz}> : Uzbek + +eq E<Ouml>zbek + +=item I<{vai}> : Vai + +=item I<{ven}> : Venda + +NOT Wendish! NOT Wend! NOT Avestan! + +=item I<{vi}> : Vietnamese + +eq Viet. + +=item I<{vo}> : VolapE<uuml>k + +eq Volapuk. (Artificial) + +=item I<{vot}> : Votic + +eq Votian. eq Vod. + +=item [I<{wak}> : Wakashan languages] + +=item I<{wal}> : Walamo + +eq Wolaytta. + +=item I<{war}> : Waray + +Presumably the Philippine language Waray-Waray (SamareE<ntilde>o), +not the smaller Philippine language Waray Sorsogon, nor the extinct +Australian language Waray. + +=item I<{was}> : Washo + +eq Washoe + +=item I<{cy}> : Welsh + +=item I<{wo}> : Wolof + +=item I<{x-...}> : Unregistered (Private Use) + +"x-" is a prefix for language tags that are not registered with ISO +or IANA. Example, x-double-dutch + +=item I<{xh}> : Xhosa + +=item I<{sah}> : Yakut + +=item I<{yao}> : Yao + +(The Yao in Malawi?) + +=item I<{yap}> : Yapese + +eq Yap + +=item I<{yi}> : Yiddish + +Formerly "ji". Sometimes in Roman script, sometimes in Hebrew script. + +=item I<{yo}> : Yoruba + +=item [I<{ypk}> : Yupik languages] + +Several "Eskimo" languages. + +=item I<{znd}> : Zande + +=item [I<{zap}> : Zapotec] + +(A group of languages.) + +=item I<{zen}> : Zenaga + +NOT Zend. + +=item I<{za}> : Zhuang + +=item I<{zu}> : Zulu + +=item I<{zun}> : Zuni + +eq ZuE<ntilde>i + +=back + +=head1 SEE ALSO + +L<I18N::LangTags|I18N::LangTags> + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (c) 2001 Sean M. Burke. All rights reserved. + +You can redistribute and/or +modify this document under the same terms as Perl itself. + +This document is provided in the the hope that it will be +useful, but without any warranty; +without even the implied warranty of accuracy, authoritativeness, +completeness, merchantability, or fitness for a particular purpose. + +Email any corrections or questions to me. + +=head1 AUTHOR + +Sean M. Burke, sburkeE<64>cpan.org + +=cut + + +# To generate a list of just the two and three-letter codes: + +#!/usr/local/bin/perl -w + +require 5; # Time-stamp: "2001-03-13 21:53:39 MST" + # Sean M. Burke, sburke@cpan.org + # This program is for generating the language_codes.txt file +use strict; +use LWP::Simple; +use HTML::TreeBuilder 3.10; +my $root = HTML::TreeBuilder->new(); +my $url = 'http://lcweb.loc.gov/standards/iso639-2/bibcodes.html'; +$root->parse(get($url) || die "Can't get $url"); +$root->eof(); + +my @codes; + +foreach my $tr ($root->find_by_tag_name('tr')) { + my @f = map $_->as_text(), $tr->content_list(); + #print map("<$_> ", @f), "\n"; + next unless @f == 5; + pop @f; # nix the French name + next if $f[-1] eq 'Language Name (English)'; # it's a header line + my $xx = splice(@f, 2,1); # pull out the two-letter code + $f[-1] =~ s/^\s+//; + $f[-1] =~ s/\s+$//; + if($xx =~ m/[a-zA-Z]/) { # there's a two-letter code for it + push @codes, [ lc($f[-1]), "$xx\t$f[-1]\n" ]; + } else { # print the three-letter codes. + if($f[0] eq $f[1]) { + push @codes, [ lc($f[-1]), "$f[1]\t$f[2]\n" ]; + } else { # shouldn't happen + push @codes, [ lc($f[-1]), "@f !!!!!!!!!!\n" ]; + } + } +} + +print map $_->[1], sort {; $a->[0] cmp $b->[0] } @codes; +print "[ based on $url\n at ", scalar(localtime), "]\n", + "[Note: doesn't include IANA-registered codes.]\n"; +exit; +__END__ + diff --git a/lib/Locale/Maketext.pm b/lib/Locale/Maketext.pm new file mode 100644 index 0000000000..a39383fc30 --- /dev/null +++ b/lib/Locale/Maketext.pm @@ -0,0 +1,646 @@ + +# Time-stamp: "2001-05-25 07:49:06 MDT" + +require 5; +package Locale::Maketext; +use strict; +use vars qw( @ISA $VERSION $MATCH_SUPERS $USING_LANGUAGE_TAGS + $USE_LITERALS); +use Carp (); +use I18N::LangTags 0.21 (); + +#-------------------------------------------------------------------------- + +BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } } + # define the constant 'DEBUG' at compile-time + +$VERSION = "1.01"; +@ISA = (); + +$MATCH_SUPERS = 1; +$USING_LANGUAGE_TAGS = 1; + # Turning this off is somewhat of a security risk in that little or no + # checking will be done on the legality of tokens passed to the + # eval("use $module_name") in _try_use. If you turn this off, you have + # to do your own taint checking. + +$USE_LITERALS = 1 unless defined $USE_LITERALS; + # a hint for compiling bracket-notation things. + +my %isa_scan = (); + +########################################################################### + +sub quant { + my($handle, $num, @forms) = @_; + + return $num if @forms == 0; # what should this mean? + return $forms[2] if @forms > 2 and $num == 0; # special zeroth case + + # Normal case: + # Note that the formatting of $num is preserved. + return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) ); + # Most human languages put the number phrase before the qualified phrase. +} + + +sub numerate { + # return this lexical item in a form appropriate to this number + my($handle, $num, @forms) = @_; + my $s = ($num == 1); + + return '' unless @forms; + if(@forms == 1) { # only the headword form specified + return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack. + } else { # sing and plural were specified + return $s ? $forms[0] : $forms[1]; + } +} + +#-------------------------------------------------------------------------- + +sub numf { + my($handle, $num) = @_[0,1]; + if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) { + $num += 0; # Just use normal integer stringification. + # Specifically, don't let %G turn ten million into 1E+007 + } else { + $num = CORE::sprintf("%G", $num); + # "CORE::" is there to avoid confusion with the above sub sprintf. + } + while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5 + # The initial \d+ gobbles as many digits as it can, and then we + # backtrack so it un-eats the rightmost three, and then we + # insert the comma there. + + $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'}; + # This is just a lame hack instead of using Number::Format + return $num; +} + +sub sprintf { + no integer; + my($handle, $format, @params) = @_; + return CORE::sprintf($format, @params); + # "CORE::" is there to avoid confusion with myself! +} + +#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=# + +use integer; # vroom vroom... applies to the whole rest of the module + +sub language_tag { + my $it = ref($_[0]) || $_[0]; + return undef unless $it =~ m/([^':]+)(?:::)?$/s; + $it = lc($1); + $it =~ tr<_><->; + return $it; +} + +sub encoding { + my $it = $_[0]; + return( + (ref($it) && $it->{'encoding'}) + || "iso-8859-1" # Latin-1 + ); +} + +#-------------------------------------------------------------------------- + +sub fallback_languages { return('i-default', 'en', 'en-US') } + +sub fallback_language_classes { return () } + +#-------------------------------------------------------------------------- + +sub fail_with { # an actual attribute method! + my($handle, @params) = @_; + return unless ref($handle); + $handle->{'fail'} = $params[0] if @params; + return $handle->{'fail'}; +} + +#-------------------------------------------------------------------------- + +sub failure_handler_auto { + # Meant to be used like: + # $handle->fail_with('failure_handler_auto') + + my($handle, $phrase, @params) = @_; + $handle->{'failure_lex'} ||= {}; + my $lex = $handle->{'failure_lex'}; + + my $value; + $lex->{$phrase} ||= ($value = $handle->_compile($phrase)); + + # Dumbly copied from sub maketext: + { + local $SIG{'__DIE__'}; + eval { $value = &$value($handle, @_) }; + } + # If we make it here, there was an exception thrown in the + # call to $value, and so scream: + if($@) { + my $err = $@; + # pretty up the error message + $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?> + <\n in bracket code [compiled line $1],>s; + #$err =~ s/\n?$/\n/s; + Carp::croak "Error in maketexting \"$phrase\":\n$err as used"; + # Rather unexpected, but suppose that the sub tried calling + # a method that didn't exist. + } else { + return $value; + } +} + +#========================================================================== + +sub new { + # Nothing fancy! + my $class = ref($_[0]) || $_[0]; + my $handle = bless {}, $class; + $handle->init; + return $handle; +} + +sub init { return } # no-op + +########################################################################### + +sub maketext { + # Remember, this can fail. Failure is controllable many ways. + Carp::croak "maketext requires at least one parameter" unless @_ > 1; + + my($handle, $phrase) = splice(@_,0,2); + + # Look up the value: + + my $value; + foreach my $h_r ( + @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs } + ) { + print "* Looking up \"$phrase\" in $h_r\n" if DEBUG; + if(exists $h_r->{$phrase}) { + print " Found \"$phrase\" in $h_r\n" if DEBUG; + unless(ref($value = $h_r->{$phrase})) { + # Nonref means it's not yet compiled. Compile and replace. + $value = $h_r->{$phrase} = $handle->_compile($value); + } + last; + } elsif($phrase !~ m/^_/s and $h_r->{'_AUTO'}) { + # it's an auto lex, and this is an autoable key! + print " Automaking \"$phrase\" into $h_r\n" if DEBUG; + + $value = $h_r->{$phrase} = $handle->_compile($phrase); + last; + } + print " Not found in $h_r, nor automakable\n" if DEBUG > 1; + # else keep looking + } + + unless(defined($value)) { + print "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, + " fails.\n" if DEBUG; + if(ref($handle) and $handle->{'fail'}) { + print "WARNING0: maketext fails looking for <$phrase>\n" if DEBUG; + my $fail; + if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference + return &{$fail}($handle, $phrase, @_); + # If it ever returns, it should return a good value. + } else { # It's a method name + return $handle->$fail($phrase, @_); + # If it ever returns, it should return a good value. + } + } else { + # All we know how to do is this; + Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed"); + } + } + + return $$value if ref($value) eq 'SCALAR'; + return $value unless ref($value) eq 'CODE'; + + { + local $SIG{'__DIE__'}; + eval { $value = &$value($handle, @_) }; + } + # If we make it here, there was an exception thrown in the + # call to $value, and so scream: + if($@) { + my $err = $@; + # pretty up the error message + $err =~ s<\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?> + <\n in bracket code [compiled line $1],>s; + #$err =~ s/\n?$/\n/s; + Carp::croak "Error in maketexting \"$phrase\":\n$err as used"; + # Rather unexpected, but suppose that the sub tried calling + # a method that didn't exist. + } else { + return $value; + } +} + +########################################################################### + +sub get_handle { # This is a constructor and, yes, it CAN FAIL. + # Its class argument has to be the base class for the current + # application's l10n files. + my($base_class, @languages) = @_; + $base_class = ref($base_class) || $base_class; + # Complain if they use __PACKAGE__ as a project base class? + + unless(@languages) { # Calling with no args is magical! wooo, magic! + if(length( $ENV{'REQUEST_METHOD'} || '' )) { # I'm a CGI + my $in = $ENV{'HTTP_ACCEPT_LANGUAGE'} || ''; + # supposedly that works under mod_perl, too. + $in =~ s<\([\)]*\)><>g; # Kill parens'd things -- just a hack. + @languages = &I18N::LangTags::extract_language_tags($in) if length $in; + # ...which untaints, incidentally. + + } else { # Not running as a CGI: try to puzzle out from the environment + if(length( $ENV{'LANG'} || '' )) { + push @languages, split m/[,:]/, $ENV{'LANG'}; + # LANG can be only /one/ locale as far as I know, but what the hey. + } + if(length( $ENV{'LANGUAGE'} || '' )) { + push @languages, split m/[,:]/, $ENV{'LANGUAGE'}; + } + print "Noting ENV LANG ", join(',', @languages),"\n" if DEBUG; + # Those are really locale IDs, but they get xlated a few lines down. + + if(&_try_use('Win32::Locale')) { + # If we have that module installed... + push @languages, Win32::Locale::get_language() + if defined &Win32::Locale::get_language; + } + } + } + + #------------------------------------------------------------------------ + print "Lgs1: ", map("<$_>", @languages), "\n" if DEBUG; + + if($USING_LANGUAGE_TAGS) { + @languages = map &I18N::LangTags::locale2language_tag($_), @languages; + # if it's a lg tag, fine, pass thru (untainted) + # if it's a locale ID, try converting to a lg tag (untainted), + # otherwise nix it. + + push @languages, map &I18N::LangTags::super_languages($_), @languages + if $MATCH_SUPERS; + + @languages = map { $_, &I18N::LangTags::alternate_language_tags($_) } + @languages; # catch alternation + + push @languages, $base_class->fallback_languages; + # You are free to override fallback_languages to return empty-list! + + @languages = # final bit of processing: + map { + my $it = $_; # copy + $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _ + $it =~ tr<_a-z0-9><>cd; # remove all but a-z0-9_ + $it; + } @languages + ; + } + print "Lgs2: ", map("<$_>", @languages), "\n" if DEBUG > 1; + + push @languages, $base_class->fallback_language_classes; + # You are free to override that to return whatever. + + + my %seen = (); + foreach my $module_name ( map { $base_class . "::" . $_ } @languages ) + { + next unless length $module_name; # sanity + next if $seen{$module_name}++ # Already been here, and it was no-go + || !&_try_use($module_name); # Try to use() it, but can't it. + return($module_name->new); # Make it! + } + + return undef; # Fail! +} + +########################################################################### +# +# This is where most people should stop reading. +# +########################################################################### + +sub _compile { + # This big scarp routine compiles an entry. + # It returns either a coderef if there's brackety bits in this, or + # otherwise a ref to a scalar. + + my $target = ref($_[0]) || $_[0]; + + my(@code); + my(@c) = (''); # "chunks" -- scratch. + my $call_count = 0; + my $big_pile = ''; + { + my $in_group = 0; # start out outside a group + my($m, @params); # scratch + + while($_[1] =~ # Iterate over chunks. + m<\G( + [^\~\[\]]+ # non-~[] stuff + | + ~. # ~[, ~], ~~, ~other + | + \x5B # [ + | + \x5D # ] + | + ~ # terminal ~? + | + $ + )>xgs + ) { + print " \"$1\"\n" if DEBUG > 2; + + if($1 eq '[' or $1 eq '') { # "[" or end + # Whether this is "[" or end, force processing of any + # preceding literal. + if($in_group) { + if($1 eq '') { + $target->_die_pointing($_[1], "Unterminated bracket group"); + } else { + $target->_die_pointing($_[1], "You can't nest bracket groups"); + } + } else { + if($1 eq '') { + print " [end-string]\n" if DEBUG > 2; + } else { + $in_group = 1; + } + die "How come \@c is empty?? in <$_[1]>" unless @c; # sanity + if(length $c[-1]) { + # Now actually processing the preceding literal + $big_pile .= $c[-1]; + if($USE_LITERALS and $c[-1] !~ m<[^\x20-\x7E]>s) { + # normal case -- all very safe chars + $c[-1] =~ s/'/\\'/g; + push @code, q{ '} . $c[-1] . "',\n"; + $c[-1] = ''; # reuse this slot + } else { + push @code, ' $c[' . $#c . "],\n"; + push @c, ''; # new chunk + } + } + # else just ignore the empty string. + } + + } elsif($1 eq ']') { # "]" + # close group -- go back in-band + if($in_group) { + $in_group = 0; + + print " --Closing group [$c[-1]]\n" if DEBUG > 2; + + # And now process the group... + + if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) { + DEBUG > 2 and print " -- (Ignoring)\n"; + $c[-1] = ''; # reset out chink + next; + } + + #$c[-1] =~ s/^\s+//s; + #$c[-1] =~ s/\s+$//s; + ($m,@params) = split(",", $c[-1], -1); # was /\s*,\s*/ + + foreach($m, @params) { tr/\x7F/,/ } + # A bit of a hack -- we've turned "~,"'s into \x7F's, so turn + # 'em into real commas here. + + if($m eq '_*' or $m =~ m<^_(-?\d+)$>s) { + # Treat [_1,...] as [,_1,...], etc. + unshift @params, $m; + $m = ''; + } + + # Most common case: a simple, legal-looking method name + if($m eq '') { + # 0-length method name means to just interpolate: + push @code, ' ('; + } elsif($m =~ m<^\w+(?:\:\:\w+)*$>s + and $m !~ m<(?:^|\:)\d>s + # exclude starting a (sub)package or symbol with a digit + ) { + # Yes, it even supports the demented (and undocumented?) + # $obj->Foo::bar(...) syntax. + $target->_die_pointing( + $_[1], "Can't (yet?) use \"SUPER::\" in a bracket-group method", + 2 + length($c[-1]) + ) + if $m =~ m/^SUPER::/s; + # Because for SUPER:: to work, we'd have to compile this into + # the right package, and that seems just not worth the bother, + # unless someone convinces me otherwise. + + push @code, ' $_[0]->' . $m . '('; + } else { + # TODO: implement something? or just too icky to consider? + $target->_die_pointing( + $_[1], + "Can't use \"$m\" as a method name in bracket group", + 2 + length($c[-1]) + ); + } + + pop @c; # we don't need that chunk anymore + ++$call_count; + + foreach my $p (@params) { + if($p eq '_*') { + # Meaning: all parameters except $_[0] + $code[-1] .= ' @_[1 .. $#_], '; + # and yes, that does the right thing for all @_ < 3 + } elsif($p =~ m<^_(-?\d+)$>s) { + # _3 meaning $_[3] + $code[-1] .= '$_[' . (0 + $1) . '], '; + } elsif($USE_LITERALS and $p !~ m<[^\x20-\x7E]>s) { + # Normal case: a literal containing only safe characters + $p =~ s/'/\\'/g; + $code[-1] .= q{'} . $p . q{', }; + } else { + # Stow it on the chunk-stack, and just refer to that. + push @c, $p; + push @code, ' $c[' . $#c . "], "; + } + } + $code[-1] .= "),\n"; + + push @c, ''; + } else { + $target->_die_pointing($_[1], "Unbalanced ']'"); + } + + } elsif(substr($1,0,1) ne '~') { + # it's stuff not containing "~" or "[" or "]" + # i.e., a literal blob + $c[-1] .= $1; + + } elsif($1 eq '~~') { # "~~" + $c[-1] .= '~'; + + } elsif($1 eq '~[') { # "~[" + $c[-1] .= '['; + + } elsif($1 eq '~]') { # "~]" + $c[-1] .= ']'; + + } elsif($1 eq '~,') { # "~," + if($in_group) { + $c[-1] .= "\x7F"; + # This is a hack, based on the assumption that no-one will actually + # want a \x7f inside a bracket group. Let's hope that's it's true. + } else { + $c[-1] .= '~,'; + } + + } elsif($1 eq '~') { # possible only at string-end, it seems. + $c[-1] .= '~'; + + } else { + # It's a "~X" where X is not a special character. + # Consider it a literal ~ and X. + $c[-1] .= $1; + } + } + } + + if($call_count) { + undef $big_pile; # Well, nevermind that. + } else { + # It's all literals! Ahwell, that can happen. + # So don't bother with the eval. Return a SCALAR reference. + return \$big_pile; + } + + die "Last chunk isn't null??" if @c and length $c[-1]; # sanity + print scalar(@c), " chunks under closure\n" if DEBUG; + if(@code == 0) { # not possible? + print "Empty code\n" if DEBUG; + return \''; + } elsif(@code > 1) { # most cases, presumably! + unshift @code, "join '',\n"; + } + unshift @code, "use strict; sub {\n"; + push @code, "}\n"; + + print @code if DEBUG; + my $sub = eval(join '', @code); + die "$@ while evalling" . join('', @code) if $@; # Should be impossible. + return $sub; +} + +# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +sub _die_pointing { + # This is used by _compile to throw a fatal error + my $target = shift; # class name + # ...leaving $_[0] the error-causing text, and $_[1] the error message + + my $i = index($_[0], "\n"); + + my $pointy; + my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1; + if($pos < 1) { + $pointy = "^=== near there\n"; + } else { # we need to space over + my $first_tab = index($_[0], "\t"); + if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) { + # No tabs, or the first tab is harmlessly after where we will point to, + # AND we're far enough from the margin that we can draw a proper arrow. + $pointy = ('=' x $pos) . "^ near there\n"; + } else { + # tabs screw everything up! + $pointy = substr($_[0],0,$pos); + $pointy =~ tr/\t //cd; + # make everything into whitespace, but preseving tabs + $pointy .= "^=== near there\n"; + } + } + + my $errmsg = "$_[1], in\:\n$_[0]"; + + if($i == -1) { + # No newline. + $errmsg .= "\n" . $pointy; + } elsif($i == (length($_[0]) - 1) ) { + # Already has a newline at end. + $errmsg .= $pointy; + } else { + # don't bother with the pointy bit, I guess. + } + Carp::croak( "$errmsg via $target, as used" ); +} + +########################################################################### + +my %tried = (); + # memoization of whether we've used this module, or found it unusable. + +sub _try_use { # Basically a wrapper around "require Modulename" + # "Many men have tried..." "They tried and failed?" "They tried and died." + return $tried{$_[0]} if exists $tried{$_[0]}; # memoization + + my $module = $_[0]; # ASSUME sane module name! + { no strict 'refs'; + return($tried{$module} = 1) + if defined(%{$module . "::Lexicon"}) or defined(@{$module . "::ISA"}); + # weird case: we never use'd it, but there it is! + } + + print " About to use $module ...\n" if DEBUG; + { + local $SIG{'__DIE__'}; + eval "require $module"; # used to be "use $module", but no point in that. + } + if($@) { + print "Error using $module \: $@\n" if DEBUG > 1; + return $tried{$module} = 0; + } else { + print " OK, $module is used\n" if DEBUG; + return $tried{$module} = 1; + } +} + +#-------------------------------------------------------------------------- + +sub _lex_refs { # report the lexicon references for this handle's class + # returns an arrayREF! + no strict 'refs'; + my $class = ref($_[0]) || $_[0]; + print "Lex refs lookup on $class\n" if DEBUG > 1; + return $isa_scan{$class} if exists $isa_scan{$class}; # memoization! + + my @lex_refs; + my $seen_r = ref($_[1]) ? $_[1] : {}; + + if( defined( *{$class . '::Lexicon'}{'HASH'} )) { + push @lex_refs, *{$class . '::Lexicon'}{'HASH'}; + print "%" . $class . "::Lexicon contains ", + scalar(keys %{$class . '::Lexicon'}), " entries\n" if DEBUG; + } + + # Implements depth(height?)-first recursive searching of superclasses + foreach my $superclass (@{$class . "::ISA"}) { + print " Super-class search into $superclass\n" if DEBUG; + next if $seen_r->{$superclass}++; + push @lex_refs, @{&_lex_refs($superclass, $seen_r)}; # call myself + } + + $isa_scan{$class} = \@lex_refs; # save for next time + return \@lex_refs; +} + +sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity! + +########################################################################### +1; + + diff --git a/lib/Locale/Maketext.pod b/lib/Locale/Maketext.pod new file mode 100644 index 0000000000..b28a9d83c8 --- /dev/null +++ b/lib/Locale/Maketext.pod @@ -0,0 +1,1302 @@ + +# Time-stamp: "2001-05-25 07:50:08 MDT" + +=head1 NAME + +Locale::Maketext -- framework for localization + +=head1 SYNOPSIS + + package MyProgram; + use strict; + use MyProgram::L10N; + # ...which inherits from Locale::Maketext + my $lh = MyProgram::L10N->get_handle() || die "What language?"; + ... + # And then any messages your program emits, like: + warn $lh->maketext( "Can't open file [_1]: [_2]\n", $f, $! ); + ... + +=head1 DESCRIPTION + +It is a common feature of applications (whether run directly, +or via the Web) for them to be "localized" -- i.e., for them +to a present an English interface to an English-speaker, a German +interface to a German-speaker, and so on for all languages it's +programmed with. Locale::Maketext +is a framework for software localization; it provides you with the +tools for organizing and accessing the bits of text and text-processing +code that you need for producing localized applications. + +In order to make sense of Maketext and how all its +components fit together, you should probably +go read L<Locale::Maketext::TPJ13|Locale::Maketext::TPJ13>, and +I<then> read the following documentation. + +You may also want to read over the source for C<File::Findgrep> +and its constituent modules -- they are a complete (if small) +example application that uses Maketext. + +=head1 QUICK OVERVIEW + +The basic design of Locale::Maketext is object-oriented, and +Locale::Maketext is an abstract base class, from which you +derive a "project class". +The project class (with a name like "TkBocciBall::Localize", +which you then use in your module) is in turn the base class +for all the "language classes" for your project +(with names "TkBocciBall::Localize::it", +"TkBocciBall::Localize::en", +"TkBocciBall::Localize::fr", etc.). + +A language class is +a class containing a lexicon of phrases as class data, +and possibly also some methods that are of use in interpreting +phrases in the lexicon, or otherwise dealing with text in that +language. + +An object belonging to a language class is called a "language +handle"; it's typically a flyweight object. + +The normal course of action is to call: + + use TkBocciBall::Localize; # the localization project class + $lh = TkBocciBall::Localize->get_handle(); + # Depending on the user's locale, etc., this will + # make a language handle from among the classes available, + # and any defaults that you declare. + die "Couldn't make a language handle??" unless $lh; + +From then on, you use the C<maketext> function to access +entries in whatever lexicon(s) belong to the language handle +you got. So, this: + + print $lh->maketext("You won!"), "\n"; + +...emits the right text for this language. If the object +in C<$lh> belongs to class "TkBocciBall::Localize::fr" and +%TkBocciBall::Localize::fr::Lexicon contains C<("You won!" +=E<gt> "Tu as gagnE<eacute>!")>, then the above +code happily tells the user "Tu as gagnE<eacute>!". + +=head1 METHODS + +Locale::Maketext offers a variety of methods, which fall +into three categories: + +=over + +=item * + +Methods to do with constructing language handles. + +=item * + +C<maketext> and other methods to do with accessing %Lexicon data +for a given language handle. + +=item * + +Methods that you may find it handy to use, from routines of +yours that you put in %Lexicon entries. + +=back + +These are covered in the following section. + +=head2 Construction Methods + +These are to do with constructing a language handle: + +=over + +=item $lh = YourProjClass->get_handle( ...langtags... ) || die "lg-handle?"; + +This tries loading classes based on the language-tags you give (like +C<("en-US", "sk", "kon", "es-MX", "ja", "i-klingon")>, and for the first class +that succeeds, returns YourProjClass::I<language>->new(). + +It runs thru the entire given list of language-tags, and finds no classes +for those exact terms, it then tries "superordinate" language classes. +So if no "en-US" class (i.e., YourProjClass::en_us) +was found, nor classes for anything else in that list, we then try +its superordinate, "en" (i.e., YourProjClass::en), and so on thru +the other language-tags in the given list: "es". +(The other language-tags in our example list: +happen to have no superordinates.) + +If none of those language-tags leads to loadable classes, we then +try classes derived from YourProjClass->fallback_languages() and +then if nothing comes of that, we use classes named by +YourProjClass->fallback_language_classes(). Then in the (probably +quite unlikely) event that that fails, we just return undef. + +=item $lh = YourProjClass->get_handleB<()> || die "lg-handle?"; + +When C<get_handle> is called with an empty parameter list, magic happens: + +If C<get_handle> senses that it's running in program that was +invoked as a CGI, then it tries to get language-tags out of the +environment variable "HTTP_ACCEPT_LANGUAGE", and it pretends that +those were the languages passed as parameters to C<get_handle>. + +Otherwise (i.e., if not a CGI), this tries various OS-specific ways +to get the language-tags for the current locale/language, and then +pretends that those were the value(s) passed to C<cet_handle>. + +Currently this OS-specific stuff consists of looking in the environment +variables "LANG" and "LANGUAGE"; and on MSWin machines (where those +variables are typically unused), this also tries using +the module Win32::Locale to get a language-tag for whatever language/locale +is currently selected in the "Regional Settings" (or "International"?) +Control Panel. I welcome further +suggestions for making this do the Right Thing under other operating +systems that support localization. + +If you're using localization in an application that keeps a configuration +file, you might consider something like this in your project class: + + sub get_handle_via_config { + my $class = $_[0]; + my $preferred_language = $Config_settings{'language'}; + my $lh; + if($preferred_language) { + $lh = $class->get_handle($chosen_language) + || die "No language handle for \"$chosen_language\" or the like"; + } else { + # Config file missing, maybe? + $lh = $class->get_handle() + || die "Can't get a language handle"; + } + return $lh; + } + +=item $lh = YourProjClass::langname->new(); + +This constructs a language handle. You usually B<don't> call this +directly, but instead let C<get_handle> find a language class to C<use> +and to then call ->new on. + +=item $lh->init(); + +This is called by ->new to initialize newly-constructed language handles. +If you define an init method in your class, remember that it's usually +considered a good idea to call $lh->SUPER::init in it (presumably at the +beginning), so that all classes get a chance to initialize a new object +however they see fit. + +=item YourProjClass->fallback_languages() + +C<get_handle> appends the return value of this to the end of +whatever list of languages you pass C<get_handle>. Unless +you override this method, your project class +will inherit Locale::Maketext's C<fallback_languages>, which +currently returns C<('i-default', 'en', 'en-US')>. +("i-default" is defined in RFC 2277). + +This method (by having it return the name +of a language-tag that has an existing language class) +can be used for making sure that +C<get_handle> will always manage to construct a language +handle (assuming your language classes are in an appropriate +@INC directory). Or you can use the next method: + +=item YourProjClass->fallback_language_classes() + +C<get_handle> appends the return value of this to the end +of the list of classes it will try using. Unless +you override this method, your project class +will inherit Locale::Maketext's C<fallback_language_classes>, +which currently returns an empty list, C<()>. +By setting this to some value (namely, the name of a loadable +language class), you can be sure that +C<get_handle> will always manage to construct a language +handle. + +=back + +=head2 The "maketext" Method + +This is the most important method in Locale::Maketext: + +$text = $lh->maketext(I<key>, ...parameters for this phrase...); + +This looks in the %Lexicon of the language handle +$lh and all its superclasses, looking +for an entry whose key is the string I<key>. Assuming such +an entry is found, various things then happen, depending on the +value found: + +If the value is a scalarref, the scalar is dereferenced and returned +(and any parameters are ignored). +If the value is a coderef, we return &$value($lh, ...parameters...). +If the value is a string that I<doesn't> look like it's in Bracket Notation, +we return it (after replacing it with a scalarref, in its %Lexicon). +If the value I<does> look like it's in Bracket Notation, then we compile +it into a sub, replace the string in the %Lexicon with the new coderef, +and then we return &$new_sub($lh, ...parameters...). + +Bracket Notation is discussed in a later section. Note +that trying to compile a string into Bracket Notation can throw +an exception if the string is not syntactically valid (say, by not +balancing brackets right.) + +Also, calling &$coderef($lh, ...parameters...) can throw any sort of +exception (if, say, code in that sub tries to divide by zero). But +a very common exception occurs when you have Bracket +Notation text that says to call a method "foo", but there is no such +method. (E.g., "You have [quaB<tn>,_1,ball]." will throw an exception +on trying to call $lh->quaB<tn>($_[1],'ball') -- you presumably meant +"quant".) C<maketext> catches these exceptions, but only to make the +error message more readable, at which point it rethrows the exception. + +An exception I<may> be thrown if I<key> is not found in any +of $lh's %Lexicon hashes. What happens if a key is not found, +is discussed in a later section, "Controlling Lookup Failure". + +Note that you might find it useful in some cases to override +the C<maketext> method with an "after method", if you want to +translate encodings, or even scripts: + + package YrProj::zh_cn; # Chinese with PRC-style glyphs + use base ('YrProj::zh_tw'); # Taiwan-style + sub maketext { + my $self = shift(@_); + my $value = $self->maketext(@_); + return Chineeze::taiwan2mainland($value); + } + +Or you may want to override it with something that traps +any exceptions, if that's critical to your program: + + sub maketext { + my($lh, @stuff) = @_; + my $out; + eval { $out = $lh->SUPER::maketext(@stuff) }; + return $out unless $@; + ...otherwise deal with the exception... + } + +Other than those two situations, I don't imagine that +it's useful to override the C<maketext> method. (If +you run into a situation where it is useful, I'd be +interested in hearing about it.) + +=over + +=item $lh->fail_with I<or> $lh->fail_with(I<PARAM>) + +=item $lh->failure_handler_auto + +These two methods are discussed in the section "Controlling +Lookup Failure". + +=back + +=head2 Utility Methods + +These are methods that you may find it handy to use, generally +from %Lexicon routines of yours (whether expressed as +Bracket Notation or not). + +=over + +=item $language->quant($number, $singular) + +=item $language->quant($number, $singular, $plural) + +=item $language->quant($number, $singular, $plural, $negative) + +This is generally meant to be called from inside Bracket Notation +(which is discussed later), as in + + "Your search matched [quant,_1,document]!" + +It's for I<quantifying> a noun (i.e., saying how much of it there is, +while giving the currect form of it). The behavior of this method is +handy for English and a few other Western European languages, and you +should override it for languages where it's not suitable. You can feel +free to read the source, but the current implementation is basically +as this pseudocode describes: + + if $number is 0 and there's a $negative, + return $negative; + elsif $number is 1, + return "1 $singular"; + elsif there's a $plural, + return "$number $plural"; + else + return "$number " . $singular . "s"; + # + # ...except that we actually call numf to + # stringify $number before returning it. + +So for English (with Bracket Notation) +C<"...[quant,_1,file]..."> is fine (for 0 it returns "0 files", +for 1 it returns "1 file", and for more it returns "2 files", etc.) + +But for "directory", you'd want C<"[quant,_1,direcory,directories]"> +so that our elementary C<quant> method doesn't think that the +plural of "directory" is "directorys". And you might find that the +output may sound better if you specify a negative form, as in: + + "[quant,_1,file,files,No files] matched your query.\n" + +Remember to keep in mind verb agreement (or adjectives too, in +other languages), as in: + + "[quant,_1,document] were matched.\n" + +Because if _1 is one, you get "1 document B<were> matched". +An acceptable hack here is to do something like this: + + "[quant,_1,document was, documents were] matched.\n" + +=item $language->numf($number) + +This returns the given number formatted nicely according to +this language's conventions. Maketext's default method is +mostly to just take the normal string form of the number +(applying sprintf "%G" for only very large numbers), and then +to add commas as necessary. (Except that +we apply C<tr/,./.,/> if $language->{'numf_comma'} is true; +that's a bit of a hack that's useful for languages that express +two million as "2.000.000" and not as "2,000,000"). + +If you want anything fancier, consider overriding this with something +that uses L<Number::Format|Number::Format>, or does something else +entirely. + +Note that numf is called by quant for stringifying all quantifying +numbers. + +=item $language->sprintf($format, @items) + +This is just a wrapper around Perl's normal C<sprintf> function. +It's provided so that you can use "sprintf" in Bracket Notation: + + "Couldn't access datanode [sprintf,%10x=~[%s~],_1,_2]!\n" + +returning... + + Couldn't access datanode Stuff=[thangamabob]! + +=item $language->language_tag() + +Currently this just takes the last bit of C<ref($language)>, turns +underscores to dashes, and returns it. So if $language is +an object of class Hee::HOO::Haw::en_us, $language->language_tag() +returns "en-us". (Yes, the usual representation for that language +tag is "en-US", but case is I<never> considered meaningful in +language-tag comparison.) + +You may override this as you like; Maketext doesn't use it for +anything. + +=item $language->encoding() + +Currently this isn't used for anything, but it's provided +(with default value of +C<(ref($language) && $language-E<gt>{'encoding'})) or "iso-8859-1"> +) as a sort of suggestion that it may be useful/necessary to +associate encodings with your language handles (whether on a +per-class or even per-handle basis.) + +=back + +=head2 Language Handle Attributes and Internals + +A language handle is a flyweight object -- i.e., it doesn't (necessarily) +carry any data of interest, other than just being a member of +whatever class it belongs to. + +A language handle is implemented as a blessed hash. Subclasses of yours +can store whatever data you want in the hash. Currently the only hash +entry used by any crucial Maketext method is "fail", so feel free to +use anything else as you like. + +B<Remember: Don't be afraid to read the Maketext source if there's +any point on which this documentation is unclear.> This documentation +is vastly longer than the module source itself. + +=over + +=back + +=head1 LANGUAGE CLASS HIERARCHIES + +These are Locale::Maketext's assumptions about the class +hierarchy formed by all your language classes: + +=over + +=item * + +You must have a project base class, which you load, and +which you then use as the first argument in +the call to YourProjClass->get_handle(...). It should derive +(whether directly or indirectly) from Locale::Maketext. +It B<doesn't matter> how you name this class, altho assuming this +is the localization component of your Super Mega Program, +good names for your project class might be +SuperMegaProgram::Localization, SuperMegaProgram::L10N, +SuperMegaProgram::I18N, SuperMegaProgram::International, +or even SuperMegaProgram::Languages or SuperMegaProgram::Messages. + +=item * + +Language classes are what YourProjClass->get_handle will try to load. +It will look for them by taking each language-tag (B<skipping> it +if it doesn't look like a language-tag or locale-tag!), turning it to +all lowercase, turning and dashes to underscores, and appending it +to YourProjClass . "::". So this: + + $lh = YourProjClass->get_handle( + 'en-US', 'fr', 'kon', 'i-klingon', 'i-klingon-romanized' + ); + +will try loading the classes +YourProjClass::en_us (note lowercase!), YourProjClass::fr, +YourProjClass::kon, +YourProjClass::i_klingon +and YourProjClass::i_klingon_romanized. (And it'll stop at the +first one that actually loads.) + +=item * + +I assume that each language class derives (directly or indirectly) +from your project class, and also defines its @ISA, its %Lexicon, +or both. But I anticipate no dire consequences if these assumptions +do not hold. + +=item * + +Language classes may derive from other language classes (altho they +should have "use I<Thatclassname>" or "use base qw(I<...classes...>)"). +They may derive from the project +class. They may derive from some other class altogether. Or via +multiple inheritance, it may derive from any mixture of these. + +=item * + +I foresee no problems with having multiple inheritance in +your hierarchy of language classes. (As usual, however, Perl will +complain bitterly if you have a cycle in the hierarchy: i.e., if +any class is its own ancestor.) + +=back + +=head1 ENTRIES IN EACH LEXICON + +A typical %Lexicon entry is meant to signify a phrase, +taking some number (0 or more) of parameters. An entry +is meant to be accessed by via +a string I<key> in $lh->maketext(I<key>, ...parameters...), +which should return a string that is generally meant for +be used for "output" to the user -- regardless of whether +this actually means printing to STDOUT, writing to a file, +or putting into a GUI widget. + +While the key must be a string value (since that's a basic +restriction that Perl places on hash keys), the value in +the lexicon can currenly be of several types: +a defined scalar, scalarref, or coderef. The use of these is +explained above, in the section 'The "maketext" Method', and +Bracket Notation for strings is discussed in the next section. + +While you can use arbitrary unique IDs for lexicon keys +(like "_min_larger_max_error"), it is often +useful for if an entry's key is itself a valid value, like +this example error message: + + "Minimum ([_1]) is larger than maximum ([_2])!\n", + +Compare this code that uses an arbitrary ID... + + die $lh->maketext( "_min_larger_max_error", $min, $max ) + if $min > $max; + +...to this code that uses a key-as-value: + + die $lh->maketext( + "Minimum ([_1]) is larger than maximum ([_2])!\n", + $min, $max + ) if $min > $max; + +The second is, in short, more readable. In particular, it's obvious +that the number of parameters you're feeding to that phrase (two) is +the number of parameters that it I<wants> to be fed. (Since you see +_1 and a _2 being used in the key there.) + +Also, once a project is otherwise +complete and you start to localize it, you can scrape together +all the various keys you use, and pass it to a translator; and then +the translator's work will go faster if what he's presented is this: + + "Minimum ([_1]) is larger than maximum ([_2])!\n", + => "", # fill in something here, Jacques! + +rather than this more cryptic mess: + + "_min_larger_max_error" + => "", # fill in something here, Jacques + +I think that keys as lexicon values makes the completed lexicon +entries more readable: + + "Minimum ([_1]) is larger than maximum ([_2])!\n", + => "Le minimum ([_1]) est plus grand que le maximum ([_2])!\n", + +Also, having valid values as keys becomes very useful if you set +up an _AUTO lexicon. _AUTO lexicons are discussed in a later +section. + +I almost always use keys that are themselves +valid lexicon values. One notable exception is when the value is +quite long. For example, to get the screenful of data that +a command-line program might returns when given an unknown switch, +I often just use a key "_USAGE_MESSAGE". At that point I then go +and immediately to define that lexicon entry in the +ProjectClass::L10N::en lexicon (since English is always my "project +lanuage"): + + '_USAGE_MESSAGE' => <<'EOSTUFF', + ...long long message... + EOSTUFF + +and then I can use it as: + + getopt('oDI', \%opts) or die $lh->maketext('_USAGE_MESSAGE'); + +Incidentally, +note that each class's C<%Lexicon> inherits-and-extends +the lexicons in its superclasses. This is not because these are +special hashes I<per se>, but because you access them via the +C<maketext> method, which looks for entries across all the +C<%Lexicon>'s in a language class I<and> all its ancestor classes. +(This is because the idea of "class data" isn't directly implemented +in Perl, but is instead left to individual class-systems to implement +as they see fit..) + +Note that you may have things stored in a lexicon +besides just phrases for output: for example, if your program +takes input from the keyboard, asking a "(Y/N)" question, +you probably need to know what equivalent of "Y[es]/N[o]" is +in whatever language. You probably also need to know what +the equivalents of the answers "y" and "n" are. You can +store that information in the lexicon (say, under the keys +"~answer_y" and "~answer_n", and the long forms as +"~answer_yes" and "~answer_no", where "~" is just an ad-hoc +character meant to indicate to programmers/translators that +these are not phrases for output). + +Or instead of storing this in the language class's lexicon, +you can (and, in some cases, really should) represent the same bit +of knowledge as code is a method in the language class. (That +leaves a tidy distinction between the lexicon as the things we +know how to I<say>, and the rest of the things in the lexicon class +as things that we know how to I<do>.) Consider +this example of a processor for responses to French "oui/non" +questions: + + sub y_or_n { + return undef unless defined $_[1] and length $_[1]; + my $answer = lc $_[1]; # smash case + return 1 if $answer eq 'o' or $answer eq 'oui'; + return 0 if $answer eq 'n' or $answer eq 'non'; + return undef; + } + +...which you'd then call in a construct like this: + + my $response; + until(defined $response) { + print $lh->maketext("Open the pod bay door (y/n)? "); + $response = $lh->y_or_n( get_input_from_keyboard_somehow() ); + } + if($response) { $pod_bay_door->open() } + else { $pod_bay_door->leave_closed() } + +Other data worth storing in a lexicon might be things like +filenames for language-targetted resources: + + ... + "_main_splash_png" + => "/styles/en_us/main_splash.png", + "_main_splash_imagemap" + => "/styles/en_us/main_splash.incl", + "_general_graphics_path" + => "/styles/en_us/", + "_alert_sound" + => "/styles/en_us/hey_there.wav", + "_forward_icon" + => "left_arrow.png", + "_backward_icon" + => "right_arrow.png", + # In some other languages, left equals + # BACKwards, and right is FOREwards. + ... + +You might want to do the same thing for expressing key bindings +or the like (since hardwiring "q" as the binding for the function +that quits a screen/menu/program is useful only if your language +happens to associate "q" with "quit"!) + +=head1 BRACKET NOTATION + +Bracket Notation is a crucial feature of Locale::Maketext. I mean +Bracket Notation to provide a replacement for sprintf formatting. +Everything you do with Bracket Notation could be done with a sub block, +but bracket notation is meant to be much more concise. + +Bracket Notation is a like a miniature "template" system (in the sense +of L<Text::Template|Text::Template>, not in the sense of C++ templates), +where normal text is passed thru basically as is, but text is special +regions is specially interpreted. In Bracket Notation, you use brackets +("[...]" -- not "{...}"!) to note sections that are specially interpreted. + +For example, here all the areas that are taken literally are underlined with +a "^", and all the in-bracket special regions are underlined with an X: + + "Minimum ([_1]) is larger than maximum ([_2])!\n", + ^^^^^^^^^ XX ^^^^^^^^^^^^^^^^^^^^^^^^^^ XX ^^^^ + +When that string is compiled from bracket notation into a real Perl sub, +it's basically turned into: + + sub { + my $lh = $_[0]; + my @params = @_; + return join '', + "Minimum (", + ...some code here... + ") is larger than maximum (", + ...some code here... + ")!\n", + } + # to be called by $lh->maketext(KEY, params...) + +In other words, text outside bracket groups is turned into string +literals. Text in brackets is rather more complex, and currently follows +these rules: + +=over + +=item * + +Bracket groups that are empty, or which consist only of whitespace, +are ignored. (Examples: "[]", "[ ]", or a [ and a ] with returns +and/or tabs and/or spaces between them. + +Otherwise, each group is taken to be a comma-separated group of items, +and each item is interpreted as follows: + +=item * + +An item that is "_I<digits>" or "_-I<digits>" is interpreted as +$_[I<value>]. I.e., "_1" is becomes with $_[1], and "_-3" is interpreted +as $_[-3] (in which case @_ should have at least three elements in it). +Note that $_[0] is the language handle, and is typically not named +directly. + +=item * + +An item "_*" is interpreted to mean "all of @_ except $_[0]". +I.e., C<@_[1..$#_]>. Note that this is an empty list in the case +of calls like $lh->maketext(I<key>) where there are no +parameters (except $_[0], the language handle). + +=item * + +Otherwise, each item is interpreted as a string literal. + +=back + +The group as a whole is interpreted as follows: + +=over + +=item * + +If the first item in a bracket group looks like a method name, +then that group is interpreted like this: + + $lh->that_method_name( + ...rest of items in this group... + ), + +=item * + +If the first item in a bracket group is empty-string, or "_*" +or "_I<digits>" or "_-I<digits>", then that group is interpreted +as just the interpolation of all its items: + + join('', + ...rest of items in this group... + ), + +Examples: "[_1]" and "[,_1]", which are synonymous; and +"[,ID-(,_4,-,_2,)]", which compiles as +C<join "", "ID-(", $_[4], "-", $_[2], ")">. + +=item * + +Otherwise this bracket group is invalid. For example, in the group +"[!@#,whatever]", the first item C<"!@#"> is neither empty-string, +"_I<number>", "_-I<number>", "_*", nor a valid method name; and so +Locale::Maketext will throw an exception of you try compiling an +expression containing this bracket group. + +=back + +Note, incidentally, that items in each group are comma-separated, +not C</\s*,\s*/>-separated. That is, you might expect that this +bracket group: + + "Hoohah [foo, _1 , bar ,baz]!" + +would compile to this: + + sub { + my $lh = $_[0]; + return join '', + "Hoohah ", + $lh->foo( $_[1], "bar", "baz"), + "!", + } + +But it actually compiles as this: + + sub { + my $lh = $_[0]; + return join '', + "Hoohah ", + $lh->foo(" _1 ", " bar ", "baz"), #!!! + "!", + } + +In the notation discussed so far, the characters "[" and "]" are given +special meaning, for opening and closing bracket groups, and "," has +a special meaning inside bracket groups, where it separates items in the +group. This begs the question of how you'd express a literal "[" or +"]" in a Bracket Notation string, and how you'd express a literal +comma inside a bracket group. For this purpose I've adopted "~" (tilde) +as an escape character: "~[" means a literal '[' character anywhere +in Bracket Notation (i.e., regardless of whether you're in a bracket +group or not), and ditto for "~]" meaning a literal ']', and "~," meaning +a literal comma. (Altho "," means a literal comma outside of +bracket groups -- it's only inside bracket groups that commas are special.) + +And on the off chance you need a literal tilde in a bracket expression, +you get it with "~~". + +Currently, an unescaped "~" before a character +other than a bracket or a comma is taken to mean just a "~" and that +charecter. I.e., "~X" means the same as "~~X" -- i.e., one literal tilde, +and then one literal "X". However, by using "~X", you are assuming that +no future version of Maketext will use "~X" as a magic escape sequence. +In practice this is not a great problem, since first off you can just +write "~~X" and not worry about it; second off, I doubt I'll add lots +of new magic characters to bracket notation; and third off, you +aren't likely to want literal "~" characters in your messages anyway, +since it's not a character with wide use in natural language text. + +Brackets must be balanced -- every openbracket must have +one matching closebracket, and vice versa. So these are all B<invalid>: + + "I ate [quant,_1,rhubarb pie." + "I ate [quant,_1,rhubarb pie[." + "I ate quant,_1,rhubarb pie]." + "I ate quant,_1,rhubarb pie[." + +Currently, bracket groups do not nest. That is, you B<cannot> say: + + "Foo [bar,baz,[quux,quuux]]\n"; + +If you need a notation that's that powerful, use normal Perl: + + %Lexicon = ( + ... + "some_key" => sub { + my $lh = $_[0]; + join '', + "Foo ", + $lh->bar('baz', $lh->quux('quuux')), + "\n", + }, + ... + ); + +Or write the "bar" method so you don't need to pass it the +output from calling quux. + +I do not anticipate that you will need (or particularly want) +to nest bracket groups, but you are welcome to email me with +convincing (real-life) arguments to the contrary. + +=head1 AUTO LEXICONS + +If maketext goes to look in an individual %Lexicon for an entry +for I<key> (where I<key> does not start with an underscore), and +sees none, B<but does see> an entry of "_AUTO" => I<some_true_value>, +then we actually define $Lexicon{I<key>} = I<key> right then and there, +and then use that value as if it had been there all +along. This happens before we even look in any superclass %Lexicons! + +(This is meant to be somewhat like the AUTOLOAD mechanism in +Perl's function call system -- or, looked at another way, +like the L<AutoLoader|AutoLoader> module.) + +I can picture all sorts of circumstances where you just +do not want lookup to be able to fail (since failing +normally means that maketext throws a C<die>, altho +see the next section for greater control over that). But +here's one circumstance where _AUTO lexicons are meant to +be I<especially> useful: + +As you're writing an application, you decide as you go what messages +you need to emit. Normally you'd go to write this: + + if(-e $filename) { + go_process_file($filename) + } else { + print "Couldn't find file \"$filename\"!\n"; + } + +but since you anticipate localizing this, you write: + + use ThisProject::I18N; + my $lh = ThisProject::I18N->get_handle(); + # For the moment, assume that things are set up so + # that we load class ThisProject::I18N::en + # and that that's the class that $lh belongs to. + ... + if(-e $filename) { + go_process_file($filename) + } else { + print $lh->maketext( + "Couldn't find file \"[_1]\"!\n", $filename + ); + } + +Now, right after you've just written the above lines, you'd +normally have to go open the file +ThisProject/I18N/en.pm, and immediately add an entry: + + "Couldn't find file \"[_1]\"!\n" + => "Couldn't find file \"[_1]\"!\n", + +But I consider that somewhat of a distraction from the work +of getting the main code working -- to say nothing of the fact +that I often have to play with the program a few times before +I can decide exactly what wording I want in the messages (which +in this case would require me to go changing three lines of code: +the call to maketext with that key, and then the two lines in +ThisProject/I18N/en.pm). + +However, if you set "_AUTO => 1" in the %Lexicon in, +ThisProject/I18N/en.pm (assuming that English (en) is +the language that all your programmers will be using for this +project's internal message keys), then you don't ever have to +go adding lines like this + + "Couldn't find file \"[_1]\"!\n" + => "Couldn't find file \"[_1]\"!\n", + +to ThisProject/I18N/en.pm, because if _AUTO is true there, +then just looking for an entry with the key "Couldn't find +file \"[_1]\"!\n" in that lexicon will cause it to be added, +with that value! + +Note that the reason that keys that start with "_" +are immune to _AUTO isn't anything generally magical about +the underscore character -- I just wanted a way to have most +lexicon keys be autoable, except for possibly a few, and I +arbitrarily decided to use a leading underscore as a signal +to distinguish those few. + +=head1 CONTROLLING LOOKUP FAILURE + +If you call $lh->maketext(I<key>, ...parameters...), +and there's no entry I<key> in $lh's class's %Lexicon, nor +in the superclass %Lexicon hash, I<and> if we can't auto-make +I<key> (because either it starts with a "_", or because none +of its lexicons have C<_AUTO =E<gt> 1,>), then we have +failed to find a normal way to maketext I<key>. What then +happens in these failure conditions, depends on the $lh object +"fail" attribute. + +If the language handle has no "fail" attribute, maketext +will simply throw an exception (i.e., it calls C<die>, mentioning +the I<key> whose lookup failed, and naming the line number where +the calling $lh->maketext(I<key>,...) was. + +If the language handle has a "fail" attribute whose value is a +coderef, then $lh->maketext(I<key>,...params...) gives up and calls: + + return &{$that_subref}($lh, $key, @params); + +Otherwise, the "fail" attribute's value should be a string denoting +a method name, so that $lh->maketext(I<key>,...params...) can +give up with: + + return $lh->$that_method_name($phrase, @params); + +The "fail" attribute can be accessed with the C<fail_with> method: + + # Set to a coderef: + $lh->fail_with( \&failure_handler ); + + # Set to a method name: + $lh->fail_with( 'failure_method' ); + + # Set to nothing (i.e., so failure throws a plain exception) + $lh->fail_with( undef ); + + # Simply read: + $handler = $lh->fail_with(); + +Now, as to what you may want to do with these handlers: Maybe you'd +want to log what key failed for what class, and then die. Maybe +you don't like C<die> and instead you want to send the error message +to STDOUT (or wherever) and then merely C<exit()>. + +Or maybe you don't want to C<die> at all! Maybe you could use a +handler like this: + + # Make all lookups fall back onto an English value, + # but after we log it for later fingerpointing. + my $lh_backup = ThisProject->get_handle('en'); + open(LEX_FAIL_LOG, ">>wherever/lex.log") || die "GNAARGH $!"; + sub lex_fail { + my($failing_lh, $key, $params) = @_; + print LEX_FAIL_LOG scalar(localtime), "\t", + ref($failing_lh), "\t", $key, "\n"; + return $lh_backup->maketext($key,@params); + } + +Some users have expressed that they think this whole mechanism of +having a "fail" attribute at all, seems a rather pointless complication. +But I want Locale::Maketext to be usable for software projects of I<any> +scale and type; and different software projects have different ideas +of what the right thing is to do in failure conditions. I could simply +say that failure always throws an exception, and that if you want to be +careful, you'll just have to wrap every call to $lh->maketext in an +S<eval { }>. However, I want programmers to reserve the right (via +the "fail" attribute) to treat lookup failure as something other than +an exception of the same level of severity as a config file being +unreadable, or some essential resource being inaccessable. + +One possibly useful value for the "fail" attribute is the method name +"failure_handler_auto". This is a method defined in class +Locale::Maketext itself. You set it with: + + $lh->fail_with('failure_handler_auto'); + +Then when you call $lh->maketext(I<key>, ...parameters...) and +there's no I<key> in any of those lexicons, maketext gives up with + + return $lh->failure_handler_auto($key, @params); + +But failure_handler_auto, instead of dying or anything, compiles +$key, caching it in $lh->{'failure_lex'}{$key} = $complied, +and then calls the compiled value, and returns that. (I.e., if +$key looks like bracket notation, $compiled is a sub, and we return +&{$compiled}(@params); but if $key is just a plain string, we just +return that.) + +The effect of using "failure_auto_handler" +is like an AUTO lexicon, except that it 1) compiles $key even if +it starts with "_", and 2) you have a record in the new hashref +$lh->{'failure_lex'} of all the keys that have failed for +this object. This should avoid your program dying -- as long +as your keys aren't actually invalid as bracket code, and as +long as they don't try calling methods that don't exist. + +"failure_auto_handler" may not be exactly what you want, but I +hope it at least shows you that maketext failure can be mitigated +in any number of very flexible ways. If you can formalize exactly +what you want, you should be able to express that as a failure +handler. You can even make it default for every object of a given +class, by setting it in that class's init: + + sub init { + my $lh = $_[0]; # a newborn handle + $lh->SUPER::init(); + $lh->fail_with('my_clever_failure_handler'); + return; + } + sub my_clever_failure_handler { + ...you clever things here... + } + +=head1 HOW TO USE MAKETEXT + +Here is a brief checklist on how to use Maketext to localize +applications: + +=over + +=item * + +Decide what system you'll use for lexicon keys. If you insist, +you can use opaque IDs (if you're nostalgic for C<catgets>), +but I have better suggestions in the +section "Entries in Each Lexicon", above. Assuming you opt for +meaningful keys that double as values (like "Minimum ([_1]) is +larger than maximum ([_2])!\n"), you'll have to settle on what +language those should be in. For the sake of argument, I'll +call this English, specifically American English, "en-US". + +=item * + +Create a class for your localization project. This is +the name of the class that you'll use in the idiom: + + use Projname::L10N; + my $lh = Projname::L10N->get_handle(...) || die "Language?"; + +Assuming your call your class Projname::L10N, create a class +consisting minimally of: + + package Projname::L10N; + use base qw(Locale::Maketext); + ...any methods you might want all your languages to share... + + # And, assuming you want the base class to be an _AUTO lexicon, + # as is discussed a few sections up: + + 1; + +=item * + +Create a class for the language your internal keys are in. Name +the class after the language-tag for that language, in lowercase, +with dashes changed to underscores. Assuming your project's first +language is US English, you should call this Projname::L10N::en_us. +It should consist minimally of: + + package Projname::L10N::en_us; + use base qw(Projname::L10N); + %Lexicon = ( + '_AUTO' => 1, + ); + 1; + +(For the rest of this section, I'll assume that this "first +language class" of Projname::L10N::en_us has +_AUTO lexicon.) + +=item * + +Go and write your program. Everywhere in your program where +you would say: + + print "Foobar $thing stuff\n"; + +instead do it thru maketext, using no variable interpolation in +the key: + + print $lh->maketext("Foobar [_1] stuff\n", $thing); + +If you get tired of constantly saying C<print $lh-E<gt>maketext>, +consider making a functional wrapper for it, like so: + + use Projname::L10N; + use vars qw($lh); + $lh = Projname::L10N->get_handle(...) || die "Language?"; + sub pmt (@) { print( $lh->maketext(@_)) } + # "pmt" is short for "Print MakeText" + $Carp::Verbose = 1; + # so if maketext fails, we see made the call to pmt + +Besides whole phrases meant for output, anything language-dependent +should be put into the class Projname::L10N::en_us, +whether as methods, or as lexicon entries -- this is discussed +in the section "Entries in Each Lexicon", above. + +=item * + +Once the program is otherwise done, and once its localization for +the first language works right (via the data and methods in +Projname::L10N::en_us), you can get together the data for translation. +If your first language lexicon isn't an _AUTO lexicon, then you already +have all the messages explicitly in the lexicon (or else you'd be +getting exceptions thrown when you call $lh->maketext to get +messages that aren't in there). But if you were (advisedly) lazy and are +using an _AUTO lexicon, then you've got to make a list of all the phrases +that you've so far been letting _AUTO generate for you. There are very +many ways to assemble such a list. The most straightforward is to simply +grep the source for every occurrence of "maketext" (or calls +to wrappers around it, like the above C<pmt> function), and to log the +following phrase. + +=item * + +You may at this point want to consider whether the your base class +(Projname::L10N) that all lexicons inherit from (Projname::L10N::en, +Projname::L10N::es, etc.) should be an _AUTO lexicon. It may be true +that in theory, all needed messages will be in each language class; +but in the presumably unlikely or "impossible" case of lookup failure, +you should consider whether your program should throw an exception, +emit text in English (or whatever your project's first language is), +or some more complex solution as described in the section +"Controlling Lookup Failure", above. + +=item * + +Submit all messages/phrases/etc. to translators. + +(You may, in fact, want to start with localizing to I<one> other language +at first, if you're not sure that you've property abstracted the +language-dependent parts of your code.) + +Translators may request clarification of the situation in which a +particular phrase is found. For example, in English we are entirely happy +saying "I<n> files found", regardless of whether we mean "I looked for files, +and found I<n> of them" or the rather distinct situation of "I looked for +something else (like lines in files), and along the way I saw I<n> +files." This may involve rethinking things that you thought quite clear: +should "Edit" on a toolbar be a noun ("editing") or a verb ("to edit")? Is +there already a conventionalized way to express that menu option, separate +from the target language's normal word for "to edit"? + +In all cases where the very common phenomenon of quantification +(saying "I<N> files", for B<any> value of N) +is involved, each translator should make clear what dependencies the +number causes in the sentence. In many cases, dependency is +limited to words adjacent to the number, in places where you might +expect them ("I found the-?PLURAL I<N> +empty-?PLURAL directory-?PLURAL"), but in some cases there are +unexpected dependencies ("I found-?PLURAL ..."!) as well as long-distance +dependencies "The I<N> directory-?PLURAL could not be deleted-?PLURAL"!). + +Remind the translators to consider the case where N is 0: +"0 files found" isn't exactly natural-sounding in any language, but it +may be unacceptable in many -- or it may condition special +kinds of agreement (similar to English "I didN'T find ANY files"). + +Remember to ask your translators about numeral formatting in their +language, so that you can override the C<numf> method as +appropriate. Typical variables in number formatting are: what to +use as a decimal point (comma? period?); what to use as a thousands +separator (space? nonbreakinng space? comma? period? small +middot? prime? apostrophe?); and even whether the so-called "thousands +separator" is actually for every third digit -- I've heard reports of +two hundred thousand being expressable as "2,00,000" for some Indian +(Subcontinental) languages, besides the less surprising "S<200 000>", +"200.000", "200,000", and "200'000". Also, using a set of numeral +glyphs other than the usual ASCII "0"-"9" might be appreciated, as via +C<tr/0-9/\x{0966}-\x{096F}/> for getting digits in Devanagari script +(for Hindi, Konkani, others). + +The basic C<quant> method that Locale::Maketext provides should be +good for many languages. For some languages, it might be useful +to modify it (or its constituent C<numerate> method) +to take a plural form in the two-argument call to C<quant> +(as in "[quant,_1,files]") if +it's all-around easier to infer the singular form from the plural, than +to infer the plural form from the singular. + +But for other languages (as is discussed at length +in L<Locale::Maketext::TPJ13|Locale::Maketext::TPJ13>), simple +C<quant>/C<numerify> is not enough. For the particularly problematic +Slavic languages, what you may need is a method which you provide +with the number, the citation form of the noun to quantify, and +the case and gender that the sentence's syntax projects onto that +noun slot. The method would then be responsible for determining +what grammatical number that numeral projects onto its noun phrase, +and what case and gender it may override the normal case and gender +with; and then it would look up the noun in a lexicon providing +all needed inflected forms. + +=item * + +You may also wish to discuss with the translators the question of +how to relate different subforms of the same language tag, +considering how this reacts with C<get_handle>'s treatment of +these. For example, if a user accepts interfaces in "en, fr", and +you have interfaces available in "en-US" and "fr", what should +they get? You may wish to resolve this by establishing that "en" +and "en-US" are effectively synonymous, by having one class +zero-derive from the other. + +For some languages this issue may never come up (Danish is rarely +expressed as "da-DK", but instead is just "da"). And for other +languages, the whole concept of a "generic" form may verge on +being uselessly vague, particularly for interfaces involving voice +media in forms of Arabic or Chinese. + +=item * + +Once you've localized your program/site/etc. for all desired +languages, be sure to show the result (whether live, or via +screenshots) to the translators. Once they approve, make every +effort to have it then checked by at least one other speaker of +that language. This holds true even when (or especially when) the +translation is done by one of your own programmers. Some +kinds of systems may be harder to find testers for than others, +depending on the amount of domain-specific jargon and concepts +involved -- it's easier to find people who can tell you whether +they approve of your translation for "delete this message" in an +email-via-Web interface, than to find people who can give you +an informed opinion on your translation for "attribute value" +in an XML query tool's interface. + +=back + +=head1 SEE ALSO + +I recommend reading all of these: + +L<Locale::Maketext::TPJ13|Locale::Maketext::TPJ13> -- my I<The Perl +Journal> article about Maketext. It explains many important concepts +underlying Locale::Maketext's design, and some insight into why +Maketext is better than the plain old approach of just having +message catalogs that are just databases of sprintf formats. + +L<File::Findgrep|File::Findgrep> is a sample application/module +that uses Locale::Maketext to localize its messages. + +L<I18N::LangTags|I18N::LangTags>. + +L<Win32::Locale|Win32::Locale>. + +RFC 3066, I<Tags for the Identification of Languages>, +as at http://sunsite.dk/RFC/rfc/rfc3066.html + +RFC 2277, I<IETF Policy on Character Sets and Languages> +is at http://sunsite.dk/RFC/rfc/rfc2277.html -- much of it is +just things of interest to protocol designers, but it explains +some basic concepts, like the distinction between locales and +language-tags. + +The manual for GNU C<gettext>. The gettext dist is available in +C<ftp://prep.ai.mit.edu/pub/gnu/> -- get +a recent gettext tarball and look in its "doc/" directory, there's +an easily browsable HTML version in there. The +gettext documentation asks lots of questions worth thinking +about, even if some of their answers are sometimes wonky, +particularly where they start talking about pluralization. + +The Locale/Maketext.pm source. Obverse that the module is much +shorter than its documentation! + +=head1 COPYRIGHT AND DISCLAIMER + +Copyright (c) 1999-2001 Sean M. Burke. All rights reserved. + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself. + +This program is distributed in the hope that it will be useful, but +without any warranty; without even the implied warranty of +merchantability or fitness for a particular purpose. + +=head1 AUTHOR + +Sean M. Burke C<sburke@cpan.org> + +=cut + +# Zing! diff --git a/lib/Locale/Maketext/TPJ13.pod b/lib/Locale/Maketext/TPJ13.pod new file mode 100644 index 0000000000..db22478215 --- /dev/null +++ b/lib/Locale/Maketext/TPJ13.pod @@ -0,0 +1,776 @@ + +# This document contains text in Perl "POD" format. +# Use a POD viewer like perldoc or perlman to render it. + +=head1 NAME + +Locale::Maketext::TPJ13 -- article about software localization + +=head1 SYNOPSIS + + # This an article, not a module. + +=head1 DESCRIPTION + +The following article by Sean M. Burke and Jordan Lachler +first appeared in I<The Perl +Journal> #13 and is copyright 1999 The Perl Journal. It appears +courtesy of Jon Orwant and The Perl Journal. This document may be +distributed under the same terms as Perl itself. + +=head1 Localization and Perl: gettext breaks, Maketext fixes + +by Sean M. Burke and Jordan Lachler + +This article points out cases where gettext (a common system for +localizing software interfaces -- i.e., making them work in the user's +language of choice) fails because of basic differences between human +languages. This article then describes Maketext, a new system capable +of correctly treating these differences. + +=head2 A Localization Horror Story: It Could Happen To You + +=over + +"There are a number of languages spoken by human beings in this +world." + +-- Harald Tveit Alvestrand, in RFC 1766, "Tags for the +Identification of Languages" + +=back + +Imagine that your task for the day is to localize a piece of software +-- and luckily for you, the only output the program emits is two +messages, like this: + + I scanned 12 directories. + + Your query matched 10 files in 4 directories. + +So how hard could that be? You look at the code that produces +produces the first item, and it reads: + + printf("I scanned %g directories.", + $directory_count); + +You think about that, and realize that it doesn't even work right for +English, as it can produce this output: + + I scanned 1 directories. + +So you rewrite it to read: + + printf("I scanned %g %s.", + $directory_count, + $directory_count == 1 ? + "directory" : "directories", + ); + +...which does the Right Thing. (In case you don't recall, "%g" is for +locale-specific number interpolation, and "%s" is for string +interpolation.) + +But you still have to localize it for all the languages you're +producing this software for, so you pull Locale::gettext off of CPAN +so you can access the C<gettext> C functions you've heard are standard +for localization tasks. + +And you write: + + printf(gettext("I scanned %g %s."), + $dir_scan_count, + $dir_scan_count == 1 ? + gettext("directory") : gettext("directory"), + ); + +But you then read in the gettext manual (Drepper, Miller, and Pinard 1995) +that this is not a good idea, since how a single word like "directory" +or "directories" is translated may depend on context -- and this is +true, since in a case language like German or Russian, you'd may need +these words with a different case ending in the first instance (where the +word is the object of a verb) than in the second instance, which you haven't even +gotten to yet (where the word is the object of a preposition, "in %g +directories") -- assuming these keep the same syntax when translated +into those languages. + +So, on the advice of the gettext manual, you rewrite: + + printf( $dir_scan_count == 1 ? + gettext("I scanned %g directory.") : + gettext("I scanned %g directories."), + $dir_scan_count ); + +So, you email your various translators (the boss decides that the +languages du jour are Chinese, Arabic, Russian, and Italian, so you +have one translator for each), asking for translations for "I scanned +%g directory." and "I scanned %g directories.". When they reply, +you'll put that in the lexicons for gettext to use when it localizes +your software, so that when the user is running under the "zh" +(Chinese) locale, gettext("I scanned %g directory.") will return the +appropriate Chinese text, with a "%g" in there where printf can then +interpolate $dir_scan. + +Your Chinese translator emails right back -- he says both of these +phrases translate to the same thing in Chinese, because, in linguistic +jargon, Chinese "doesn't have number as a grammatical category" -- +whereas English does. That is, English has grammatical rules that +refer to "number", i.e., whether something is grammatically singular +or plural; and one of these rules is the one that forces nouns to take +a plural suffix (generally "s") when in a plural context, as they are when +they follow a number other than "one" (including, oddly enough, "zero"). +Chinese has no such rules, and so has just the one phrase where English +has two. But, no problem, you can have this one Chinese phrase appear +as the translation for the two English phrases in the "zh" gettext +lexicon for your program. + +Emboldened by this, you dive into the second phrase that your software +needs to output: "Your query matched 10 files in 4 directories.". You notice +that if you want to treat phrases as indivisible, as the gettext +manual wisely advises, you need four cases now, instead of two, to +cover the permutations of singular and plural on the two items, +$dir_count and $file_count. So you try this: + + printf( $file_count == 1 ? + ( $directory_count == 1 ? + gettext("Your query matched %g file in %g directory.") : + gettext("Your query matched %g file in %g directories.") ) : + ( $directory_count == 1 ? + gettext("Your query matched %g files in %g directory.") : + gettext("Your query matched %g files in %g directories.") ), + $file_count, $directory_count, + ); + +(The case of "1 file in 2 [or more] directories" could, I suppose, +occur in the case of symlinking or something of the sort.) + +It occurs to you that this is not the prettiest code you've ever +written, but this seems the way to go. You mail off to the +translators asking for translations for these four cases. The +Chinese guy replies with the one phrase that these all translate to in +Chinese, and that phrase has two "%g"s in it, as it should -- but +there's a problem. He translates it word-for-word back: "To your +question, in %g directories you would find %g answers." The "%g" +slots are in an order reverse to what they are in English. You wonder +how you'll get gettext to handle that. + +But you put it aside for the moment, and optimistically hope that the +other translators won't have this problem, and that their languages +will be better behaved -- i.e., that they will be just like English. + +But the Arabic translator is the next to write back. First off, your +code for "I scanned %g directory." or "I scanned %g directories." +assumes there's only singular or plural. But, to use linguistic +jargon again, Arabic has grammatical number, like English (but unlike +Chinese), but it's a three-term category: singular, dual, and plural. +In other words, the way you say "directory" depends on whether there's +one directory, or I<two> of them, or I<more than two> of them. Your +test of C<($directory == 1)> no longer does the job. And it means +that where English's grammatical category of number necessitates +only the two permutations of the first sentence based on "directory +[singular]" and "directories [plural]", Arabic has three -- and, +worse, in the second sentence ("Your query matched %g file in %g +directory."), where English has four, Arabic has nine. You sense +an unwelcome, exponential trend taking shape. + +Your Italian translator emails you back and says that "I searched 0 +directories" (a possible English output of your program) is stilted, +and if you think that's fine English, that's your problem, but that +I<just will not do> in the language of Dante. He insists that where +$directory_count is 0, your program should produce the Italian text +for "I I<didn't> scan I<any> directories.". And ditto for "I didn't +match any files in any directories", although he says the last part +about "in any directories" should probably just be left off. + +You wonder how you'll get gettext to handle this; to accomodate the +ways Arabic, Chinese, and Italian deal with numbers in just these few +very simple phrases, you need to write code that will ask gettext for +different queries depending on whether the numerical values in +question are 1, 2, more than 2, or in some cases 0, and you still haven't +figured out the problem with the different word order in Chinese. + +Then your Russian translator calls on the phone, to I<personally> tell +you the bad news about how really unpleasant your life is about to +become: + +Russian, like German or Latin, is an inflectional language; that is, nouns +and adjectives have to take endings that depend on their case +(i.e., nominative, accusative, genitive, etc...) -- which is roughly a matter of +what role they have in syntax of the sentence -- +as well as on the grammatical gender (i.e., masculine, feminine, neuter) +and number (i.e., singular or plural) of the noun, as well as on the +declension class of the noun. But unlike with most other inflected languages, +putting a number-phrase (like "ten" or "forty-three", or their Arabic +numeral equivalents) in front of noun in Russian can change the case and +number that noun is, and therefore the endings you have to put on it. + +He elaborates: In "I scanned %g directories", you'd I<expect> +"directories" to be in the accusative case (since it is the direct +object in the sentnce) and the plural number, +except where $directory_count is 1, then you'd expect the singular, of +course. Just like Latin or German. I<But!> Where $directory_count % +10 is 1 ("%" for modulo, remember), assuming $directory count is an +integer, and except where $directory_count % 100 is 11, "directories" +is forced to become grammatically singular, which means it gets the +ending for the accusative singular... You begin to visualize the code +it'd take to test for the problem so far, I<and still work for Chinese +and Arabic and Italian>, and how many gettext items that'd take, but +he keeps going... But where $directory_count % 10 is 2, 3, or 4 +(except where $directory_count % 100 is 12, 13, or 14), the word for +"directories" is forced to be genitive singular -- which means another +ending... The room begins to spin around you, slowly at first... But +with I<all other> integer values, since "directory" is an inanimate +noun, when preceded by a number and in the nominative or accusative +cases (as it is here, just your luck!), it does stay plural, but it is +forced into the genitive case -- yet another another ending... And +you never hear him get to the part about how you're going to run into +similar (but maybe subtly different) problems with other Slavic +languages like Polish, because the floor comes up to meet you, and you +fade into unconsciousness. + + +The above cautionary tale relates how an attempt at localization can +lead from programmer consternation, to program obfuscation, to a need +for sedation. But careful evaluation shows that your choice of tools +merely needed further consideration. + +=head2 The Linguistic View + +=over + +"It is more complicated than you think." + +-- The Eighth Networking Truth, from RFC 1925 + +=back + +The field of Linguistics has expended a great deal of effort over the +past century trying to find grammatical patterns which hold across +languages; it's been a constant process +of people making generalizations that should apply to all languages, +only to find out that, all too often, these generalizations fail -- +sometimes failing for just a few languages, sometimes whole classes of +languages, and sometimes nearly every language in the world except +English. Broad statistical trends are evident in what the "average +language" is like as far as what its rules can look like, must look +like, and cannot look like. But the "average language" is just as +unreal a concept as the "average person" -- it runs up against the +fact no language (or person) is, in fact, average. The wisdom of past +experience leads us to believe that any given language can do whatever +it wants, in any order, with appeal to any kind of grammatical +categories wants -- case, number, tense, real or metaphoric +characteristics of the things that words refer to, arbitrary or +predictable classifications of words based on what endings or prefixes +they can take, degree or means of certainty about the truth of +statements expressed, and so on, ad infinitum. + +Mercifully, most localization tasks are a matter of finding ways to +translate whole phrases, generally sentences, where the context is +relatively set, and where the only variation in content is I<usually> +in a number being expressed -- as in the example sentences above. +Translating specific, fully-formed sentences is, in practice, fairly +foolproof -- which is good, because that's what's in the phrasebooks +that so many tourists rely on. Now, a given phrase (whether in a +phrasebook or in a gettext lexicon) in one language I<might> have a +greater or lesser applicability than that phrase's translation into +another language -- for example, strictly speaking, in Arabic, the +"your" in "Your query matched..." would take a different form +depending on whether the user is male or female; so the Arabic +translation "your[feminine] query" is applicable in fewer cases than +the corresponding English phrase, which doesn't distinguish the user's +gender. (In practice, it's not feasable to have a program know the +user's gender, so the masculine "you" in Arabic is usually used, by +default.) + +But in general, such surprises are rare when entire sentences are +being translated, especially when the functional context is restricted +to that of a computer interacting with a user either to convey a fact +or to prompt for a piece of information. So, for purposes of +localization, translation by phrase (generally by sentence) is both the +simplest and the least problematic. + +=head2 Breaking gettext + +=over + +"It Has To Work." + +-- First Networking Truth, RFC 1925 + +=back + +Consider that sentences in a tourist phrasebook are of two types: ones +like "How do I get to the marketplace?" that don't have any blanks to +fill in, and ones like "How much do these ___ cost?", where there's +one or more blanks to fill in (and these are usually linked to a +list of words that you can put in that blank: "fish", "potatoes", +"tomatoes", etc.) The ones with no blanks are no problem, but the +fill-in-the-blank ones may not be really straightforward. If it's a +Swahili phrasebook, for example, the authors probably didn't bother to +tell you the complicated ways that the verb "cost" changes its +inflectional prefix depending on the noun you're putting in the blank. +The trader in the marketplace will still understand what you're saying if +you say "how much do these potatoes cost?" with the wrong +inflectional prefix on "cost". After all, I<you> can't speak proper Swahili, +I<you're> just a tourist. But while tourists can be stupid, computers +are supposed to be smart; the computer should be able to fill in the +blank, and still have the results be grammatical. + +In other words, a phrasebook entry takes some values as parameters +(the things that you fill in the blank or blanks), and provides a value +based on these parameters, where the way you get that final value from +the given values can, properly speaking, involve an arbitrarily +complex series of operations. (In the case of Chinese, it'd be not at +all complex, at least in cases like the examples at the beginning of +this article; whereas in the case of Russian it'd be a rather complex +series of operations. And in some languages, the +complexity could be spread around differently: while the act of +putting a number-expression in front of a noun phrase might not be +complex by itself, it may change how you have to, for example, inflect +a verb elsewhere in the sentence. This is what in syntax is called +"long-distance dependencies".) + +This talk of parameters and arbitrary complexity is just another way +to say that an entry in a phrasebook is what in a programming language +would be called a "function". Just so you don't miss it, this is the +crux of this article: I<A phrase is a function; a phrasebook is a +bunch of functions.> + +The reason that using gettext runs into walls (as in the above +second-person horror story) is that you're trying to use a string (or +worse, a choice among a bunch of strings) to do what you really need a +function for -- which is futile. Preforming (s)printf interpolation +on the strings which you get back from gettext does allow you to do I<some> +common things passably well... sometimes... sort of; but, to paraphrase +what some people say about C<csh> script programming, "it fools you +into thinking you can use it for real things, but you can't, and you +don't discover this until you've already spent too much time trying, +and by then it's too late." + +=head2 Replacing gettext + +So, what needs to replace gettext is a system that supports lexicons +of functions instead of lexicons of strings. An entry in a lexicon +from such a system should I<not> look like this: + + "J'ai trouv\xE9 %g fichiers dans %g r\xE9pertoires" + +[\xE9 is e-acute in Latin-1. Some pod renderers would +scream if I used the actual character here. -- SB] + +but instead like this, bearing in mind that this is just a first stab: + + sub I_found_X1_files_in_X2_directories { + my( $files, $dirs ) = @_[0,1]; + $files = sprintf("%g %s", $files, + $files == 1 ? 'fichier' : 'fichiers'); + $dirs = sprintf("%g %s", $dirs, + $dirs == 1 ? "r\xE9pertoire" : "r\xE9pertoires"); + return "J'ai trouv\xE9 $files dans $dirs."; + } + +Now, there's no particularly obvious way to store anything but strings +in a gettext lexicon; so it looks like we just have to start over and +make something better, from scratch. I call my shot at a +gettext-replacement system "Maketext", or, in CPAN terms, +Locale::Maketext. + +When designing Maketext, I chose to plan its main features in terms of +"buzzword compliance". And here are the buzzwords: + +=head2 Buzzwords: Abstraction and Encapsulation + +The complexity of the language you're trying to output a phrase in is +entirely abstracted inside (and encapsulated within) the Maketext module +for that interface. When you call: + + print $lang->maketext("You have [quant,_1,piece] of new mail.", + scalar(@messages)); + +you don't know (and in fact can't easily find out) whether this will +involve lots of figuring, as in Russian (if $lang is a handle to the +Russian module), or relatively little, as in Chinese. That kind of +abstraction and encapsulation may encourage other pleasant buzzwords +like modularization and stratification, depending on what design +decisions you make. + +=head2 Buzzword: Isomorphism + +"Isomorphism" means "having the same structure or form"; in discussions +of program design, the word takes on the special, specific meaning that +your implementation of a solution to a problem I<has the same +structure> as, say, an informal verbal description of the solution, or +maybe of the problem itself. Isomorphism is, all things considered, +a good thing -- it's what problem-solving (and solution-implementing) +should look like. + +What's wrong the with gettext-using code like this... + + printf( $file_count == 1 ? + ( $directory_count == 1 ? + "Your query matched %g file in %g directory." : + "Your query matched %g file in %g directories." ) : + ( $directory_count == 1 ? + "Your query matched %g files in %g directory." : + "Your query matched %g files in %g directories." ), + $file_count, $directory_count, + ); + +is first off that it's not well abstracted -- these ways of testing +for grammatical number (as in the expressions like C<foo == 1 ? +singular_form : plural_form>) should be abstracted to each language +module, since how you get grammatical number is language-specific. + +But second off, it's not isomorphic -- the "solution" (i.e., the +phrasebook entries) for Chinese maps from these four English phrases to +the one Chinese phrase that fits for all of them. In other words, the +informal solution would be "The way to say what you want in Chinese is +with the one phrase 'For your question, in Y directories you would +find X files'" -- and so the implemented solution should be, +isomorphically, just a straightforward way to spit out that one +phrase, with numerals properly interpolated. It shouldn't have to map +from the complexity of other languages to the simplicity of this one. + +=head2 Buzzword: Inheritance + +There's a great deal of reuse possible for sharing of phrases between +modules for related dialects, or for sharing of auxiliary functions +between related languages. (By "auxiliary functions", I mean +functions that don't produce phrase-text, but which, say, return an +answer to "does this number require a plural noun after it?". Such +auxiliary functions would be used in the internal logic of functions +that actually do produce phrase-text.) + +In the case of sharing phrases, consider that you have an interface +already localized for American English (probably by having been +written with that as the native locale, but that's incidental). +Localizing it for UK English should, in practical terms, be just a +matter of running it past a British person with the instructions to +indicate what few phrases would benefit from a change in spelling or +possibly minor rewording. In that case, you should be able to put in +the UK English localization module I<only> those phrases that are +UK-specific, and for all the rest, I<inherit> from the American +English module. (And I expect this same situation would apply with +Brazilian and Continental Portugese, possbily with some I<very> +closely related languages like Czech and Slovak, and possibly with the +slightly different "versions" of written Mandarin Chinese, as I hear exist in +Taiwan and mainland China.) + +As to sharing of auxiliary functions, consider the problem of Russian +numbers from the beginning of this article; obviously, you'd want to +write only once the hairy code that, given a numeric value, would +return some specification of which case and number a given quanitified +noun should use. But suppose that you discover, while localizing an +interface for, say, Ukranian (a Slavic language related to Russian, +spoken by several million people, many of whom would be relieved to +find that your Web site's or software's interface is available in +their language), that the rules in Ukranian are the same as in Russian +for quantification, and probably for many other grammatical functions. +While there may well be no phrases in common between Russian and +Ukranian, you could still choose to have the Ukranian module inherit +from the Russian module, just for the sake of inheriting all the +various grammatical methods. Or, probably better organizationally, +you could move those functions to a module called C<_E_Slavic> or +something, which Russian and Ukranian could inherit useful functions +from, but which would (presumably) provide no lexicon. + +=head2 Buzzword: Concision + +Okay, concision isn't a buzzword. But it should be, so I decree that +as a new buzzword, "concision" means that simple common things should +be expressible in very few lines (or maybe even just a few characters) +of code -- call it a special case of "making simple things easy and +hard things possible", and see also the role it played in the +MIDI::Simple language, discussed elsewhere in this issue [TPJ#13]. + +Consider our first stab at an entry in our "phrasebook of functions": + + sub I_found_X1_files_in_X2_directories { + my( $files, $dirs ) = @_[0,1]; + $files = sprintf("%g %s", $files, + $files == 1 ? 'fichier' : 'fichiers'); + $dirs = sprintf("%g %s", $dirs, + $dirs == 1 ? "r\xE9pertoire" : "r\xE9pertoires"); + return "J'ai trouv\xE9 $files dans $dirs."; + } + +You may sense that a lexicon (to use a non-committal catch-all term for a +collection of things you know how to say, regardless of whether they're +phrases or words) consisting of functions I<expressed> as above would +make for rather long-winded and repetitive code -- even if you wisely +rewrote this to have quantification (as we call adding a number +expression to a noun phrase) be a function called like: + + sub I_found_X1_files_in_X2_directories { + my( $files, $dirs ) = @_[0,1]; + $files = quant($files, "fichier"); + $dirs = quant($dirs, "r\xE9pertoire"); + return "J'ai trouv\xE9 $files dans $dirs."; + } + +And you may also sense that you do not want to bother your translators +with having to write Perl code -- you'd much rather that they spend +their I<very costly time> on just translation. And this is to say +nothing of the near impossibility of finding a commercial translator +who would know even simple Perl. + +In a first-hack implementation of Maketext, each language-module's +lexicon looked like this: + + %Lexicon = ( + "I found %g files in %g directories" + => sub { + my( $files, $dirs ) = @_[0,1]; + $files = quant($files, "fichier"); + $dirs = quant($dirs, "r\xE9pertoire"); + return "J'ai trouv\xE9 $files dans $dirs."; + }, + ... and so on with other phrase => sub mappings ... + ); + +but I immediately went looking for some more concise way to basically +denote the same phrase-function -- a way that would also serve to +concisely denote I<most> phrase-functions in the lexicon for I<most> +languages. After much time and even some actual thought, I decided on +this system: + +* Where a value in a %Lexicon hash is a contentful string instead of +an anonymous sub (or, conceivably, a coderef), it would be interpreted +as a sort of shorthand expression of what the sub does. When accessed +for the first time in a session, it is parsed, turned into Perl code, +and then eval'd into an anonymous sub; then that sub replaces the +original string in that lexicon. (That way, the work of parsing and +evaling the shorthand form for a given phrase is done no more than +once per session.) + +* Calls to C<maketext> (as Maketext's main function is called) happen +thru a "language session handle", notionally very much like an IO +handle, in that you open one at the start of the session, and use it +for "sending signals" to an object in order to have it return the text +you want. + +So, this: + + $lang->maketext("You have [quant,_1,piece] of new mail.", + scalar(@messages)); + +basically means this: look in the lexicon for $lang (which may inherit +from any number of other lexicons), and find the function that we +happen to associate with the string "You have [quant,_1,piece] of new +mail" (which is, and should be, a functioning "shorthand" for this +function in the native locale -- English in this case). If you find +such a function, call it with $lang as its first parameter (as if it +were a method), and then a copy of scalar(@messages) as its second, +and then return that value. If that function was found, but was in +string shorthand instead of being a fully specified function, parse it +and make it into a function before calling it the first time. + +* The shorthand uses code in brackets to indicate method calls that +should be performed. A full explanation is not in order here, but a +few examples will suffice: + + "You have [quant,_1,piece] of new mail." + +The above code is shorthand for, and will be interpreted as, +this: + + sub { + my $handle = $_[0]; + my(@params) = @_; + return join '', + "You have ", + $handle->quant($params[1], 'piece'), + "of new mail."; + } + +where "quant" is the name of a method you're using to quantify the +noun "piece" with the number $params[0]. + +A string with no brackety calls, like this: + + "Your search expression was malformed." + +is somewhat of a degerate case, and just gets turned into: + + sub { return "Your search expression was malformed." } + +However, not everything you can write in Perl code can be written in +the above shorthand system -- not by a long shot. For example, consider +the Italian translator from the beginning of this article, who wanted +the Italian for "I didn't find any files" as a special case, instead +of "I found 0 files". That couldn't be specified (at least not easily +or simply) in our shorthand system, and it would have to be written +out in full, like this: + + sub { # pretend the English strings are in Italian + my($handle, $files, $dirs) = @_[0,1,2]; + return "I didn't find any files" unless $files; + return join '', + "I found ", + $handle->quant($files, 'file'), + " in ", + $handle->quant($dirs, 'directory'), + "."; + } + +Next to a lexicon full of shorthand code, that sort of sticks out like a +sore thumb -- but this I<is> a special case, after all; and at least +it's possible, if not as concise as usual. + +As to how you'd implement the Russian example from the beginning of +the article, well, There's More Than One Way To Do It, but it could be +something like this (using English words for Russian, just so you know +what's going on): + + "I [quant,_1,directory,accusative] scanned." + +This shifts the burden of complexity off to the quant method. That +method's parameters are: the numeric value it's going to use to +quantify something; the Russian word it's going to quantify; and the +parameter "accusative", which you're using to mean that this +sentence's syntax wants a noun in the accusative case there, although +that quantification method may have to overrule, for grammatical +reasons you may recall from the beginning of this article. + +Now, the Russian quant method here is responsible not only for +implementing the strange logic necessary for figuring out how Russian +number-phrases impose case and number on their noun-phrases, but also +for inflecting the Russian word for "directory". How that inflection +is to be carried out is no small issue, and among the solutions I've +seen, some (like variations on a simple lookup in a hash where all +possible forms are provided for all necessary words) are +straightforward but I<can> become cumbersome when you need to inflect +more than a few dozen words; and other solutions (like using +algorithms to model the inflections, storing only root forms and +irregularities) I<can> involve more overhead than is justifiable for +all but the largest lexicons. + +Mercifully, this design decision becomes crucial only in the hairiest +of inflected languages, of which Russian is by no means the I<worst> case +scenario, but is worse than most. Most languages have simpler +inflection systems; for example, in English or Swahili, there are +generally no more than two possible inflected forms for a given noun +("error/errors"; "kosa/makosa"), and the +rules for producing these forms are fairly simple -- or at least, +simple rules can be formulated that work for most words, and you can +then treat the exceptions as just "irregular", at least relative to +your ad hoc rules. A simpler inflection system (simpler rules, fewer +forms) means that design decisions are less crucial to maintaining +sanity, whereas the same decisions could incur +overhead-versus-scalability problems in languages like Russian. It +may I<also> be likely that code (possibly in Perl, as with +Lingua::EN::Inflect, for English nouns) has already +been written for the language in question, whether simple or complex. + +Moreover, a third possibility may even be simpler than anything +discussed above: "Just require that all possible (or at least +applicable) forms be provided in the call to the given language's quant +method, as in:" + + "I found [quant,_1,file,files]." + +That way, quant just has to chose which form it needs, without having +to look up or generate anything. While possibly not optimal for +Russian, this should work well for most other languages, where +quantification is not as complicated an operation. + +=head2 The Devil in the Details + +There's plenty more to Maketext than described above -- for example, +there's the details of how language tags ("en-US", "x-cree", "fi", +etc.) or locale IDs ("en_US") interact with actual module naming +("BogoQuery/Locale/en_us.pm"), and what magic can ensue; there's the +details of how to record (and possibly negotiate) what character +encoding Maketext will return text in (UTF8? Latin-1? KOI8?). There's +the interesting fact that Maketext is for localization, but nowhere +actually has a "C<use locale;>" anywhere in it. For the curious, +there's the somewhat frightening details of how I actually +implement something like data inheritance so that searches across +modules' %Lexicon hashes can parallel how Perl implements method +inheritance. + +And, most importantly, there's all the practical details of how to +actually go about deriving from Maketext so you can use it for your +interfaces, and the various tools and conventions for starting out and +maintaining individual language modules. + +That is all covered in the documentation for Locale::Maketext and the +modules that come with it, available in CPAN. After having read this +article, which covers the why's of Maketext, the documentation, +which covers the how's of it, should be quite straightfoward. + +=head2 The Proof in the Pudding: Localizing Web Sites + +Maketext and gettext have a notable difference: gettext is in C, +accessible thru C library calls, whereas Maketext is in Perl, and +really can't work without a Perl interpreter (although I suppose +something like it could be written for C). Accidents of history (and +not necessarily lucky ones) have made C++ the most common language for +the implementation of applications like word processors, Web browsers, +and even many in-house applications like custom query systems. Current +conditions make it somewhat unlikely that the next one of any of these +kinds of applications will be written in Perl, albeit clearly more for +reasons of custom and inertia than out of consideration of what is the +right tool for the job. + +However, other accidents of history have made Perl a well-accepted +language for design of server-side programs (generally in CGI form) +for Web site interfaces. Localization of static pages in Web sites is +trivial, feasable either with simple language-negotiation features in +servers like Apache, or with some kind of server-side inclusions of +language-appropriate text into layout templates. However, I think +that the localization of Perl-based search systems (or other kinds of +dynamic content) in Web sites, be they public or access-restricted, +is where Maketext will see the greatest use. + +I presume that it would be only the exceptional Web site that gets +localized for English I<and> Chinese I<and> Italian I<and> Arabic +I<and> Russian, to recall the languages from the beginning of this +article -- to say nothing of German, Spanish, French, Japanese, +Finnish, and Hindi, to name a few languages that benefit from large +numbers of programmers or Web viewers or both. + +However, the ever-increasing internationalization of the Web (whether +measured in terms of amount of content, of numbers of content writers +or programmers, or of size of content audiences) makes it increasingly +likely that the interface to the average Web-based dynamic content +service will be localized for two or maybe three languages. It is my +hope that Maketext will make that task as simple as possible, and will +remove previous barriers to localization for languages dissimilar to +English. + + __END__ + +Sean M. Burke (sburkeE<64>cpan.org) has a Master's in linguistics +from Northwestern University; he specializes in language technology. +Jordan Lachler (lachlerE<64>unm.edu) is a PhD student in the Department of +Linguistics at the University of New Mexico; he specializes in +morphology and pedagogy of North American native languages. + +=head2 References + +Alvestrand, Harald Tveit. 1995. I<RFC 1766: Tags for the +Identification of Languages.> +C<ftp://ftp.isi.edu/in-notes/rfc1766.txt> +[Now see RFC 3066.] + +Callon, Ross, editor. 1996. I<RFC 1925: The Twelve +Networking Truths.> +C<ftp://ftp.isi.edu/in-notes/rfc1925.txt> + +Drepper, Ulrich, Peter Miller, +and FranE<ccedil>ois Pinard. 1995-2001. GNU +C<gettext>. Available in C<ftp://prep.ai.mit.edu/pub/gnu/>, with +extensive docs in the distribution tarball. [Since +I wrote this article in 1998, I now see that the +gettext docs are now trying more to come to terms with +plurality. Whether useful conclusions have come from it +is another question altogether. -- SMB, May 2001] + +Forbes, Nevill. 1964. I<Russian Grammar.> Third Edition, revised +by J. C. Dumbreck. Oxford University Press. + +=cut + +#End + diff --git a/lib/PerlIO.pm b/lib/PerlIO.pm index 148a5a8de7..d52d58b41a 100644 --- a/lib/PerlIO.pm +++ b/lib/PerlIO.pm @@ -39,8 +39,8 @@ PerlIO - On demand loader for PerlIO layers and root of PerlIO::* name space =head1 DESCRIPTION -When an undefined layer 'foo' is encountered in an C<open> or C<binmode> layer -specification then C code performs the equivalent of: +When an undefined layer 'foo' is encountered in an C<open> or +C<binmode> layer specification then C code performs the equivalent of: use PerlIO 'foo'; @@ -99,7 +99,7 @@ and then read it back in. =item raw A pseudo-layer which performs two functions (which is messy, but -necessary to maintain compatibility with non-PerlIO builds of perl +necessary to maintain compatibility with non-PerlIO builds of Perl and their way things have been documented elsewhere). Firstly it forces the file handle to be considered binary at that @@ -118,8 +118,8 @@ which would interfere with binary nature of the stream. =head2 Defaults and how to override them -If the platform is MS-DOS like and normally does CRLF to "\n" translation -for text files then the default layers are : +If the platform is MS-DOS like and normally does CRLF to "\n" +translation for text files then the default layers are : unix crlf diff --git a/lib/Test.pm b/lib/Test.pm index 19a9089978..eef2d38a4d 100644 --- a/lib/Test.pm +++ b/lib/Test.pm @@ -1,18 +1,23 @@ -use strict; package Test; -use Test::Harness 1.1601 (); + +require 5.004; + +use strict; + use Carp; -our($VERSION, @ISA, @EXPORT, @EXPORT_OK, $ntest, $TestLevel); #public-ish -our($TESTOUT, $ONFAIL, %todo, %history, $planned, @FAILDETAIL); #private-ish -$VERSION = '1.15'; +use vars (qw($VERSION @ISA @EXPORT @EXPORT_OK $ntest $TestLevel), #public-ish + qw($TESTOUT $ONFAIL %todo %history $planned @FAILDETAIL)#private-ish + ); + +$VERSION = '1.17'; require Exporter; @ISA=('Exporter'); -@EXPORT=qw(&plan &ok &skip); -@EXPORT_OK=qw($ntest $TESTOUT); + +@EXPORT = qw(&plan &ok &skip); +@EXPORT_OK = qw($ntest $TESTOUT); $TestLevel = 0; # how many extra stack frames to skip $|=1; -#$^W=1; ? $ntest=1; $TESTOUT = *STDOUT{IO}; @@ -20,9 +25,90 @@ $TESTOUT = *STDOUT{IO}; # help test coverage analyzers know which test is running. $ENV{REGRESSION_TEST} = $0; + +=head1 NAME + +Test - provides a simple framework for writing test scripts + +=head1 SYNOPSIS + + use strict; + use Test; + + # use a BEGIN block so we print our plan before MyModule is loaded + BEGIN { plan tests => 14, todo => [3,4] } + + # load your module... + use MyModule; + + ok(0); # failure + ok(1); # success + + ok(0); # ok, expected failure (see todo list, above) + ok(1); # surprise success! + + ok(0,1); # failure: '0' ne '1' + ok('broke','fixed'); # failure: 'broke' ne 'fixed' + ok('fixed','fixed'); # success: 'fixed' eq 'fixed' + ok('fixed',qr/x/); # success: 'fixed' =~ qr/x/ + + ok(sub { 1+1 }, 2); # success: '2' eq '2' + ok(sub { 1+1 }, 3); # failure: '2' ne '3' + ok(0, int(rand(2)); # (just kidding :-) + + my @list = (0,0); + ok @list, 3, "\@list=".join(',',@list); #extra diagnostics + ok 'segmentation fault', '/(?i)success/'; #regex match + + skip($feature_is_missing, ...); #do platform specific test + +=head1 DESCRIPTION + +L<Test::Harness|Test::Harness> expects to see particular output when it +executes tests. This module aims to make writing proper test scripts just +a little bit easier (and less error prone :-). + + +=head2 Functions + +All the following are exported by Test by default. + +=over 4 + +=item B<plan> + + BEGIN { plan %theplan; } + +This should be the first thing you call in your test script. It +declares your testing plan, how many there will be, if any of them +should be allowed to fail, etc... + +Typical usage is just: + + use Test; + BEGIN { plan tests => 23 } + +Things you can put in the plan: + + tests The number of tests in your script. + This means all ok() and skip() calls. + todo A reference to a list of tests which are allowed + to fail. See L</TODO TESTS>. + onfail A subroutine reference to be run at the end of + the test script should any of the tests fail. + See L</ONFAIL>. + +You must call plan() once and only once. + +=cut + sub plan { croak "Test::plan(%args): odd number of arguments" if @_ & 1; croak "Test::plan(): should not be called more than once" if $planned; + + local($\, $,); # guard against -l and other things that screw with + # print + my $max=0; for (my $x=0; $x < @_; $x+=2) { my ($k,$v) = @_[$x,$x+1]; @@ -42,35 +128,119 @@ sub plan { print $TESTOUT "1..$max\n"; } ++$planned; + + # Never used. + return undef; } -sub to_value { + +=begin _private + +=item B<_to_value> + + my $value = _to_value($input); + +Converts an ok parameter to its value. Typically this just means +running it if its a code reference. You should run all inputed +values through this. + +=cut + +sub _to_value { my ($v) = @_; - (ref $v or '') eq 'CODE' ? $v->() : $v; + return (ref $v or '') eq 'CODE' ? $v->() : $v; } +=end _private + +=item B<ok> + + ok(1 + 1 == 2); + ok($have, $expect); + ok($have, $expect, $diagnostics); + +This is the reason for Test's existance. Its the basic function that +handles printing "ok" or "not ok" along with the current test number. + +In its most basic usage, it simply takes an expression. If its true, +the test passes, if false, the test fails. Simp. + + ok( 1 + 1 == 2 ); # ok if 1 + 1 == 2 + ok( $foo =~ /bar/ ); # ok if $foo contains 'bar' + ok( baz($x + $y) eq 'Armondo' ); # ok if baz($x + $y) returns + # 'Armondo' + ok( @a == @b ); # ok if @a and @b are the same length + +The expression is evaluated in scalar context. So the following will +work: + + ok( @stuff ); # ok if @stuff has any elements + ok( !grep !defined $_, @stuff ); # ok if everything in @stuff is + # defined. + +A special case is if the expression is a subroutine reference. In +that case, it is executed and its value (true or false) determines if +the test passes or fails. + +In its two argument form it compares the two values to see if they +equal (with C<eq>). + + ok( "this", "that" ); # not ok, 'this' ne 'that' + +If either is a subroutine reference, that is run and used as a +comparison. + +Should $expect either be a regex reference (ie. qr//) or a string that +looks like a regex (ie. '/foo/') ok() will perform a pattern match +against it rather than using eq. + + ok( 'JaffO', '/Jaff/' ); # ok, 'JaffO' =~ /Jaff/ + ok( 'JaffO', qr/Jaff/ ); # ok, 'JaffO' =~ qr/Jaff/; + ok( 'JaffO', '/(?i)jaff/ ); # ok, 'JaffO' =~ /jaff/i; + +Finally, an optional set of $diagnostics will be printed should the +test fail. This should usually be some useful information about the +test pertaining to why it failed or perhaps a description of the test. +Or both. + + ok( grep($_ eq 'something unique', @stuff), 1, + "Something that should be unique isn't!\n". + '@stuff = '.join ', ', @stuff + ); + +Unfortunately, a diagnostic cannot be used with the single argument +style of ok(). + +All these special cases can cause some problems. See L</BUGS and CAVEATS>. + +=cut + sub ok ($;$$) { croak "ok: plan before you test!" if !$planned; + + local($\,$,); # guard against -l and other things that screw with + # print + my ($pkg,$file,$line) = caller($TestLevel); my $repetition = ++$history{"$file:$line"}; my $context = ("$file at line $line". ($repetition > 1 ? " fail \#$repetition" : '')); my $ok=0; - my $result = to_value(shift); - my ($expected,$diag); + my $result = _to_value(shift); + my ($expected,$diag,$isregex,$regex); if (@_ == 0) { $ok = $result; } else { - $expected = to_value(shift); - my ($regex,$ignore); + $expected = _to_value(shift); if (!defined $expected) { $ok = !defined $result; } elsif (!defined $result) { $ok = 0; } elsif ((ref($expected)||'') eq 'Regexp') { $ok = $result =~ /$expected/; + $regex = $expected; } elsif (($regex) = ($expected =~ m,^ / (.+) / $,sx) or - ($ignore, $regex) = ($expected =~ m,^ m([^\w\s]) (.+) \1 $,sx)) { + (undef, $regex) = ($expected =~ m,^ m([^\w\s]) (.+) \1 $,sx)) { $ok = $result =~ /$regex/; } else { $ok = $result eq $expected; @@ -81,24 +251,24 @@ sub ok ($;$$) { $context .= ' TODO?!' if $todo; print $TESTOUT "ok $ntest # ($context)\n"; } else { - # Issuing two separate print()s causes severe trouble with - # Test::Harness on VMS. The "not "'s for failed tests occur - # on a separate line and would not get counted as failures. - #print $TESTOUT "not " if !$ok; - #print $TESTOUT "ok $ntest\n"; - # Replace with one of a pair of single print()'s as a workaround: - if (!$ok) { - print $TESTOUT "not ok $ntest\n"; + # Issuing two seperate prints() causes problems on VMS. + if (!$ok) { + print $TESTOUT "not ok $ntest\n"; } - else { - print $TESTOUT "ok $ntest\n"; + else { + print $TESTOUT "ok $ntest\n"; } if (!$ok) { my $detail = { 'repetition' => $repetition, 'package' => $pkg, 'result' => $result, 'todo' => $todo }; $$detail{expected} = $expected if defined $expected; - $diag = $$detail{diagnostic} = to_value(shift) if @_; + + # Get the user's diagnostic, protecting against multi-line + # diagnostics. + $diag = $$detail{diagnostic} = _to_value(shift) if @_; + $diag =~ s/\n/\n#/g if defined $diag; + $context .= ' *TODO*' if $todo; if (!defined $expected) { if (!$diag) { @@ -111,9 +281,10 @@ sub ok ($;$$) { print $TESTOUT "# $prefix got: ". (defined $result? "'$result'":'<UNDEF>')." ($context)\n"; $prefix = ' ' x (length($prefix) - 5); - if ((ref($expected)||'') eq 'Regexp') { - $expected = 'qr/'.$expected.'/' - } else { + if (defined $regex) { + $expected = 'qr{'.$regex.'}'; + } + else { $expected = "'$expected'"; } if (!$diag) { @@ -129,19 +300,40 @@ sub ok ($;$$) { $ok; } -sub skip ($$;$$) { - my $whyskip = to_value(shift); - if ($whyskip) { - $whyskip = 'skip' if $whyskip =~ m/^\d+$/; - print $TESTOUT "ok $ntest # $whyskip\n"; - ++ $ntest; - 1; +sub skip ($;$$$) { + local($\, $,); # guard against -l and other things that screw with + # print + + my $whyskip = _to_value(shift); + if (!@_ or $whyskip) { + $whyskip = '' if $whyskip =~ m/^\d+$/; + $whyskip =~ s/^[Ss]kip(?:\s+|$)//; # backwards compatibility, old + # versions required the reason + # to start with 'skip' + # We print in one shot for VMSy reasons. + my $ok = "ok $ntest # skip"; + $ok .= " $whyskip" if length $whyskip; + $ok .= "\n"; + print $TESTOUT $ok; + ++ $ntest; + return 1; } else { + # backwards compatiblity (I think). skip() used to be + # called like ok() and was expected to fail, which is weird. + warn <<WARN if $^W; +This looks like a skip() using the very old interface. Please upgrade to +the documented interface as this has been deprecated. +WARN + local($TestLevel) = $TestLevel+1; #ignore this stack frame - &ok; + return &ok(@_); } } +=back + +=cut + END { $ONFAIL->(\@FAILDETAIL) if @FAILDETAIL && $ONFAIL; } @@ -149,48 +341,6 @@ END { 1; __END__ -=head1 NAME - -Test - provides a simple framework for writing test scripts - -=head1 SYNOPSIS - - use strict; - use Test; - - # use a BEGIN block so we print our plan before MyModule is loaded - BEGIN { plan tests => 14, todo => [3,4] } - - # load your module... - use MyModule; - - ok(0); # failure - ok(1); # success - - ok(0); # ok, expected failure (see todo list, above) - ok(1); # surprise success! - - ok(0,1); # failure: '0' ne '1' - ok('broke','fixed'); # failure: 'broke' ne 'fixed' - ok('fixed','fixed'); # success: 'fixed' eq 'fixed' - ok('fixed',qr/x/); # success: 'fixed' =~ qr/x/ - - ok(sub { 1+1 }, 2); # success: '2' eq '2' - ok(sub { 1+1 }, 3); # failure: '2' ne '3' - ok(0, int(rand(2)); # (just kidding :-) - - my @list = (0,0); - ok @list, 3, "\@list=".join(',',@list); #extra diagnostics - ok 'segmentation fault', '/(?i)success/'; #regex match - - skip($feature_is_missing, ...); #do platform specific test - -=head1 DESCRIPTION - -L<Test::Harness|Test::Harness> expects to see particular output when it -executes tests. This module aims to make writing proper test scripts just -a little bit easier (and less error prone :-). - =head1 TEST TYPES =over 4 @@ -221,11 +371,6 @@ notes or change log. =back -=head1 RETURN VALUE - -Both C<ok> and C<skip> return true if their test succeeds and false -otherwise in a scalar context. - =head1 ONFAIL BEGIN { plan test => 4, onfail => sub { warn "CALL 911!" } } @@ -248,13 +393,55 @@ running. (It is run inside an C<END> block.) Besides, C<onfail> is probably over-kill in most cases. (Your test code should be simpler than the code it is testing, yes?) + +=head1 BUGS and CAVEATS + +ok()'s special handling of subroutine references is an unfortunate +"feature" that can't be removed due to compatibility. + +ok()'s use of string eq can sometimes cause odd problems when comparing +numbers, especially if you're casting a string to a number: + + $foo = "1.0"; + ok( $foo, 1 ); # not ok, "1.0" ne 1 + +Your best bet is to use the single argument form: + + ok( $foo == 1 ); # ok "1.0" == 1 + +ok()'s special handing of strings which look like they might be +regexes can also cause unexpected behavior. An innocent: + + ok( $fileglob, '/path/to/some/*stuff/' ); + +will fail since Test.pm considers the second argument to a regex. +Again, best bet is to use the single argument form: + + ok( $fileglob eq '/path/to/some/*stuff/' ); + + +=head1 TODO + +Add todo(). + +Allow named tests. + +Implement noplan(). + + =head1 SEE ALSO -L<Test::Harness> and, perhaps, test coverage analysis tools. +L<Test::Simple>, L<Test::More>, L<Test::Harness>, L<Devel::Cover> + +L<Test::Unit> is an interesting alternative testing library. + =head1 AUTHOR -Copyright (c) 1998-1999 Joshua Nathaniel Pritikin. All rights reserved. +Copyright (c) 1998-2000 Joshua Nathaniel Pritikin. All rights reserved. +Copyright (c) 2001 Michael G Schwern. + +Current maintainer, Michael G Schwern <schwern@pobox.com> This package is free software and is provided "as is" without express or implied warranty. It may be used, redistributed and/or modified diff --git a/lib/Test/Harness.pm b/lib/Test/Harness.pm index 73f3f79119..556d01a421 100644 --- a/lib/Test/Harness.pm +++ b/lib/Test/Harness.pm @@ -1,16 +1,18 @@ # -*- Mode: cperl; cperl-indent-level: 4 -*- +# $Id: Harness.pm,v 1.11 2001/05/23 18:24:41 schwern Exp $ + package Test::Harness; -use 5.005_64; +require 5.004; use Exporter; use Benchmark; use Config; use strict; -our($VERSION, $Verbose, $Switches, $Have_Devel_Corestack, $Curtest, - $Columns, $verbose, $switches, - @ISA, @EXPORT, @EXPORT_OK - ); +use vars qw($VERSION $Verbose $Switches $Have_Devel_Corestack $Curtest + $Columns $verbose $switches + @ISA @EXPORT @EXPORT_OK + ); # Backwards compatibility for exportable variable names. *verbose = \$Verbose; @@ -18,7 +20,7 @@ our($VERSION, $Verbose, $Switches, $Have_Devel_Corestack, $Curtest, $Have_Devel_Corestack = 0; -$VERSION = "1.1702"; +$VERSION = "1.21"; $ENV{HARNESS_ACTIVE} = 1; @@ -35,21 +37,326 @@ my $Files_In_Dir = $ENV{HARNESS_FILELEAK_IN_DIR}; $Verbose = 0; $Switches = "-w"; $Columns = $ENV{HARNESS_COLUMNS} || $ENV{COLUMNS} || 80; +$Columns--; # Some shells have trouble with a full line of text. + + +=head1 NAME + +Test::Harness - run perl standard test scripts with statistics + +=head1 SYNOPSIS + + use Test::Harness; + + runtests(@test_files); + +=head1 DESCRIPTION +B<STOP!> If all you want to do is write a test script, consider using +Test::Simple. Otherwise, read on. -sub globdir { opendir DIRH, shift; my @f = readdir DIRH; closedir DIRH; @f } +(By using the Test module, you can write test scripts without +knowing the exact output this module expects. However, if you need to +know the specifics, read on!) + +Perl test scripts print to standard output C<"ok N"> for each single +test, where C<N> is an increasing sequence of integers. The first line +output by a standard test script is C<"1..M"> with C<M> being the +number of tests that should be run within the test +script. Test::Harness::runtests(@tests) runs all the testscripts +named as arguments and checks standard output for the expected +C<"ok N"> strings. + +After all tests have been performed, runtests() prints some +performance statistics that are computed by the Benchmark module. + +=head2 The test script output + +The following explains how Test::Harness interprets the output of your +test program. + +=over 4 + +=item B<'1..M'> + +This header tells how many tests there will be. It should be the +first line output by your test program (but its okay if its preceded +by comments). + +In certain instanced, you may not know how many tests you will +ultimately be running. In this case, it is permitted (but not +encouraged) for the 1..M header to appear as the B<last> line output +by your test (again, it can be followed by further comments). But we +strongly encourage you to put it first. + +Under B<no> circumstances should 1..M appear in the middle of your +output or more than once. + + +=item B<'ok', 'not ok'. Ok?> + +Any output from the testscript to standard error is ignored and +bypassed, thus will be seen by the user. Lines written to standard +output containing C</^(not\s+)?ok\b/> are interpreted as feedback for +runtests(). All other lines are discarded. + +C</^not ok/> indicates a failed test. C</^ok/> is a successful test. + + +=item B<test numbers> + +Perl normally expects the 'ok' or 'not ok' to be followed by a test +number. It is tolerated if the test numbers after 'ok' are +omitted. In this case Test::Harness maintains temporarily its own +counter until the script supplies test numbers again. So the following +test script + + print <<END; + 1..6 + not ok + ok + not ok + ok + ok + END + +will generate + + FAILED tests 1, 3, 6 + Failed 3/6 tests, 50.00% okay + + +=item B<$Test::Harness::verbose> + +The global variable $Test::Harness::verbose is exportable and can be +used to let runtests() display the standard output of the script +without altering the behavior otherwise. + +=item B<$Test::Harness::switches> + +The global variable $Test::Harness::switches is exportable and can be +used to set perl command line options used for running the test +script(s). The default value is C<-w>. + +=item B<Skipping tests> + +If the standard output line contains the substring C< # Skip> (with +variations in spacing and case) after C<ok> or C<ok NUMBER>, it is +counted as a skipped test. If the whole testscript succeeds, the +count of skipped tests is included in the generated output. +C<Test::Harness> reports the text after C< # Skip\S*\s+> as a reason +for skipping. + + ok 23 # skip Insufficient flogiston pressure. + +Similarly, one can include a similar explanation in a C<1..0> line +emitted if the test script is skipped completely: + + 1..0 # Skipped: no leverage found + +=item B<Todo tests> + +If the standard output line contains the substring C< # TODO> after +C<not ok> or C<not ok NUMBER>, it is counted as a todo test. The text +afterwards is the thing that has to be done before this test will +succeed. + + not ok 13 # TODO harness the power of the atom + +These tests represent a feature to be implemented or a bug to be fixed +and act as something of an executable "thing to do" list. They are +B<not> expected to succeed. Should a todo test begin succeeding, +Test::Harness will report it as a bonus. This indicates that whatever +you were supposed to do has been done and you should promote this to a +normal test. + +=item B<Bail out!> + +As an emergency measure, a test script can decide that further tests +are useless (e.g. missing dependencies) and testing should stop +immediately. In that case the test script prints the magic words + + Bail out! + +to standard output. Any message after these words will be displayed by +C<Test::Harness> as the reason why testing is stopped. + +=item B<Comments> + +Additional comments may be put into the testing output on their own +lines. Comment lines should begin with a '#', Test::Harness will +ignore them. + + ok 1 + # Life is good, the sun is shining, RAM is cheap. + not ok 2 + # got 'Bush' expected 'Gore' + +=item B<Anything else> + +Any other output Test::Harness sees it will silently ignore B<BUT WE +PLAN TO CHANGE THIS!> If you wish to place additional output in your +test script, please use a comment. + +=back + + +=head2 Failure + +It will happen, your tests will fail. After you mop up your ego, you +can begin examining the summary report: + + t/base..............ok + t/nonumbers.........ok + t/ok................ok + t/test-harness......ok + t/waterloo..........dubious + Test returned status 3 (wstat 768, 0x300) + DIED. FAILED tests 1, 3, 5, 7, 9, 11, 13, 15, 17, 19 + Failed 10/20 tests, 50.00% okay + Failed Test Stat Wstat Total Fail Failed List of Failed + ----------------------------------------------------------------------- + t/waterloo.t 3 768 20 10 50.00% 1 3 5 7 9 11 13 15 17 19 + Failed 1/5 test scripts, 80.00% okay. 10/44 subtests failed, 77.27% okay. + +Everything passed but t/waterloo.t. It failed 10 of 20 tests and +exited with non-zero status indicating something dubious happened. + +The columns in the summary report mean: + +=over 4 + +=item B<Failed Test> + +The test file which failed. + +=item B<Stat> + +If the test exited with non-zero, this is its exit status. + +=item B<Wstat> + +The wait status of the test I<umm, I need a better explanation here>. + +=item B<Total> + +Total number of tests expected to run. + +=item B<Fail> + +Number which failed, either from "not ok" or because they never ran. + +=item B<Failed> + +Percentage of the total tests which failed. + +=item B<List of Failed> + +A list of the tests which failed. Successive failures may be +abbreviated (ie. 15-20 to indicate that tests 15, 16, 17, 18, 19 and +20 failed). + +=back + + +=head2 Functions + +Test::Harness currently only has one function, here it is. + +=over 4 + +=item B<runtests> + + my $allok = runtests(@test_files); + +This runs all the given @test_files and divines whether they passed +or failed based on their output to STDOUT (details above). It prints +out each individual test which failed along with a summary report and +a how long it all took. + +It returns true if everything was ok, false otherwise. + +=for _private +This is just _run_all_tests() plus _show_results() + +=cut sub runtests { my(@tests) = @_; - my($tot, $failedtests) = _runtests(@tests); + local ($\, $,); + + my($tot, $failedtests) = _run_all_tests(@tests); _show_results($tot, $failedtests); - return ($tot->{bad} == 0 && $tot->{max}) ; + my $ok = ($tot->{bad} == 0 && $tot->{max}); + + die q{Assert '$ok xor keys %$failedtests' failed!} + unless $ok xor keys %$failedtests; + + return $ok; +} + +=begin _private + +=item B<_globdir> + + my @files = _globdir $dir; + +Returns all the files in a directory. This is shorthand for backwards +compatibility on systems where glob() doesn't work right. + +=cut + +sub _globdir { + opendir DIRH, shift; + my @f = readdir DIRH; + closedir DIRH; + + return @f; } +=item B<_run_all_tests> + + my($total, $failed) = _run_all_tests(@test_files); + +Runs all the given @test_files (as runtests()) but does it quietly (no +report). $total is a hash ref summary of all the tests run. Its keys +and values are this: + + bonus Number of individual todo tests unexpectedly passed + max Number of individual tests ran + ok Number of individual tests passed + sub_skipped Number of individual tests skipped + + files Number of test files ran + good Number of test files passed + bad Number of test files failed + tests Number of test files originally given + skipped Number of test files skipped + +If $total->{bad} == 0 and $total->{max} > 0, you've got a successful +test. + +$failed is a hash ref of all the test scripts which failed. Each key +is the name of a test script, each value is another hash representing +how that script failed. Its keys are these: + + name Name of the test which failed + estat Script's exit value + wstat Script's wait status + max Number of individual tests + failed Number which failed + percent Percentage of tests which failed + canon List of tests which failed (as string). -sub _runtests { +Needless to say, $failed should be empty if everything passed. + +B<NOTE> Currently this function is still noisy. I'm working on it. + +=cut + +sub _run_all_tests { my(@tests) = @_; local($|) = 1; my(%failedtests); @@ -85,34 +392,19 @@ sub _runtests { local($ENV{'PERL5LIB'}) = $new5lib; - my @dir_files = globdir $Files_In_Dir if defined $Files_In_Dir; + my @dir_files = _globdir $Files_In_Dir if defined $Files_In_Dir; my $t_start = new Benchmark; - foreach my $test (@tests) { - my $te = $test; - chop($te); # XXX chomp? + foreach my $tfile (@tests) { + my($leader, $ml) = _mk_leader($tfile); + print $leader; - if ($^O eq 'VMS') { $te =~ s/^.*\.t\./[.t./s; } - my $blank = (' ' x 77); - my $leader = "$te" . '.' x (20 - length($te)); - my $ml = ""; - $ml = "\r$blank\r$leader" - if -t STDOUT and not $ENV{HARNESS_NOTTY} and not $Verbose; - print $leader; - - my $s = _set_switches($test); - - my $cmd = ($ENV{'HARNESS_COMPILE_TEST'}) - ? "./perl -I../lib ../utils/perlcc $test " - . "-r 2>> ./compilelog |" - : "$^X $s $test|"; - $cmd = "MCR $cmd" if $^O eq 'VMS'; - open(my $fh, $cmd) or print "can't run $test. $!\n"; + my $fh = _open_test($tfile); # state of the current test. my %test = ( ok => 0, - next => 0, + 'next' => 0, max => 0, failed => [], todo => {}, @@ -140,12 +432,14 @@ sub _runtests { my($estatus, $wstatus) = _close_fh($fh); + my $allok = $test{ok} == $test{max} && $test{'next'} == $test{max}+1; + if ($wstatus) { - $failedtests{$test} = _dubious_return(\%test, \%tot, + $failedtests{$tfile} = _dubious_return(\%test, \%tot, $estatus, $wstatus); - $failedtests{$test}{name} = $test; + $failedtests{$tfile}{name} = $tfile; } - elsif ($test{ok} == $test{max} && $test{next} == $test{max}+1) { + elsif ($allok) { if ($test{max} and $test{skipped} + $test{bonus}) { my @msg; push(@msg, "$test{skipped}/$test{max} skipped: $test{skip_reason}") @@ -163,51 +457,55 @@ sub _runtests { $tot{skipped}++; } $tot{good}++; - } elsif ($test{max}) { - if ($test{next} <= $test{max}) { - push @{$test{failed}}, $test{next}..$test{max}; - } - if (@{$test{failed}}) { - my ($txt, $canon) = canonfailed($test{max},$test{skipped}, - @{$test{failed}}); - print "$test{ml}$txt"; - $failedtests{$test} = { canon => $canon, - max => $test{max}, - failed => scalar @{$test{failed}}, - name => $test, - percent => 100*(scalar @{$test{failed}})/$test{max}, - estat => '', - wstat => '', - }; - } else { - print "Don't know which tests failed: got $test{ok} ok, ". - "expected $test{max}\n"; - $failedtests{$test} = { canon => '??', - max => $test{max}, - failed => '??', - name => $test, - percent => undef, - estat => '', - wstat => '', - }; - } - $tot{bad}++; - } elsif ($test{next} == 0) { - print "FAILED before any test output arrived\n"; - $tot{bad}++; - $failedtests{$test} = { canon => '??', - max => '??', - failed => '??', - name => $test, - percent => undef, - estat => '', - wstat => '', - }; } + else { + if ($test{max}) { + if ($test{'next'} <= $test{max}) { + push @{$test{failed}}, $test{'next'}..$test{max}; + } + if (@{$test{failed}}) { + my ($txt, $canon) = canonfailed($test{max},$test{skipped}, + @{$test{failed}}); + print "$test{ml}$txt"; + $failedtests{$tfile} = { canon => $canon, + max => $test{max}, + failed => scalar @{$test{failed}}, + name => $tfile, + percent => 100*(scalar @{$test{failed}})/$test{max}, + estat => '', + wstat => '', + }; + } else { + print "Don't know which tests failed: got $test{ok} ok, ". + "expected $test{max}\n"; + $failedtests{$tfile} = { canon => '??', + max => $test{max}, + failed => '??', + name => $tfile, + percent => undef, + estat => '', + wstat => '', + }; + } + $tot{bad}++; + } elsif ($test{'next'} == 0) { + print "FAILED before any test output arrived\n"; + $tot{bad}++; + $failedtests{$tfile} = { canon => '??', + max => '??', + failed => '??', + name => $tfile, + percent => undef, + estat => '', + wstat => '', + }; + } + } + $tot{sub_skipped} += $test{skipped}; if (defined $Files_In_Dir) { - my @new_dir_files = globdir $Files_In_Dir; + my @new_dir_files = _globdir $Files_In_Dir; if (@new_dir_files != @dir_files) { my %f; @f{@new_dir_files} = (1) x @new_dir_files; @@ -231,6 +529,32 @@ sub _runtests { return(\%tot, \%failedtests); } +=item B<_mk_leader> + + my($leader, $ml) = _mk_leader($test_file); + +Generates the 't/foo........' $leader for the given $test_file as well +as a similar version which will overwrite the current line (by use of +\r and such). $ml may be empty if Test::Harness doesn't think you're +on TTY. + +=cut + +sub _mk_leader { + my $te = shift; + chop($te); # XXX chomp? + + if ($^O eq 'VMS') { $te =~ s/^.*\.t\./\[.t./s; } + my $blank = (' ' x 77); + my $leader = "$te" . '.' x (20 - length($te)); + my $ml = ""; + + $ml = "\r$blank\r$leader" + if -t STDOUT and not $ENV{HARNESS_NOTTY} and not $Verbose; + + return($leader, $ml); +} + sub _show_results { my($tot, $failedtests) = @_; @@ -292,14 +616,14 @@ sub _parse_header { # 1..10 # 1..0 # skip Why? Because I said so! elsif ($line =~ /^1\.\.([0-9]+) - (\s*\#\s*[Ss]kip\S*(?>\s+) (.+))? + (\s*\#\s*[Ss]kip\S*\s* (.+))? /x ) { $test->{max} = $1; $tot->{max} += $test->{max}; $tot->{files}++; - $test->{next} = 1 unless $test->{next}; + $test->{'next'} = 1 unless $test->{'next'}; $test->{skip_reason} = $3 if not $test->{max} and defined $3; $is_header = 1; @@ -312,11 +636,39 @@ sub _parse_header { } +sub _open_test { + my($test) = shift; + + my $s = _set_switches($test); + + # XXX This is WAY too core specific! + my $cmd = ($ENV{'HARNESS_COMPILE_TEST'}) + ? "./perl -I../lib ../utils/perlcc $test " + . "-r 2>> ./compilelog |" + : "$^X $s $test|"; + $cmd = "MCR $cmd" if $^O eq 'VMS'; + + if( open(PERL, $cmd) ) { + return \*PERL; + } + else { + print "can't run $test. $!\n"; + return; + } +} + +sub _run_one_test { + my($test) = @_; + + +} + + sub _parse_test_line { my($line, $test, $tot) = @_; if ($line =~ /^(not\s+)?ok\b/i) { - my $this = $test->{next} || 1; + my $this = $test->{'next'} || 1; # "not ok 23" if ($line =~ /^(not )?ok\s*(\d*)(\s*#.*)?/) { my($not, $tnum, $extra) = ($1, $2, $3); @@ -373,18 +725,18 @@ sub _parse_test_line { next; } - if ($this > $test->{next}) { + if ($this > $test->{'next'}) { # print "Test output counter mismatch [test $this]\n"; # no need to warn probably - push @{$test->{failed}}, $test->{next}..$this-1; + push @{$test->{failed}}, $test->{'next'}..$this-1; } - elsif ($this < $test->{next}) { + elsif ($this < $test->{'next'}) { #we have seen more "ok" lines than the number suggests print "Confused test output: test $this answered after ". - "test ", $test->{next}-1, "\n"; - $test->{next} = $this; + "test ", $test->{'next'}-1, "\n"; + $test->{'next'} = $this; } - $test->{next} = $this + 1; + $test->{'next'} = $this + 1; } elsif ($line =~ /^Bail out!\s*(.*)/i) { # magic words @@ -440,15 +792,16 @@ sub _close_fh { sub _set_switches { my($test) = shift; - open(my $fh, $test) or print "can't open $test. $!\n"; - my $first = <$fh>; + local *TEST; + open(TEST, $test) or print "can't open $test. $!\n"; + my $first = <TEST>; my $s = $Switches; $s .= " $ENV{'HARNESS_PERL_SWITCHES'}" if exists $ENV{'HARNESS_PERL_SWITCHES'}; $s .= join " ", q[ "-T"], map {qq["-I$_"]} @INC if $first =~ /^#!.*\bperl.*-\w*T/; - close($fh) or print "can't close $test. $!\n"; + close(TEST) or print "can't close $test. $!\n"; return $s; } @@ -475,13 +828,13 @@ sub _dubious_return { $tot->{bad}++; if ($test->{max}) { - if ($test->{next} == $test->{max} + 1 and not @{$test->{failed}}) { + if ($test->{'next'} == $test->{max} + 1 and not @{$test->{failed}}) { print "\tafter all the subtests completed successfully\n"; $percent = 0; $failed = 0; # But we do not set $canon! } else { - push @{$test->{failed}}, $test->{next}..$test->{max}; + push @{$test->{failed}}, $test->{'next'}..$test->{max}; $failed = @{$test->{failed}}; (my $txt, $canon) = canonfailed($test->{max},$test->{skipped},@{$test->{failed}}); $percent = 100*(scalar @{$test->{failed}})/$test->{max}; @@ -507,7 +860,7 @@ sub _create_fmts { my($failedtests) = @_; my $failed_str = "Failed Test"; - my $middle_str = " Status Wstat Total Fail Failed "; + my $middle_str = " Stat Wstat Total Fail Failed "; my $list_str = "List of Failed"; # Figure out our longest name string for formatting purposes. @@ -536,7 +889,7 @@ sub _create_fmts { my $fmt = "format STDOUT =\n" . "@" . "<" x ($max_namelen - 1) - . " @>> @>>>> @>>>> @>>> ^##.##% " + . " @>> @>>>> @>>>> @>>> ^##.##% " . "^" . "<" x ($list_len - 1) . "\n" . '{ $Curtest->{name}, $Curtest->{estat},' . ' $Curtest->{wstat}, $Curtest->{max},' @@ -556,18 +909,20 @@ sub _create_fmts { return($fmt_top, $fmt); } +{ + my $tried_devel_corestack; -my $tried_devel_corestack; -sub corestatus { - my($st) = @_; + sub corestatus { + my($st) = @_; - eval {require 'wait.ph'}; - my $ret = defined &WCOREDUMP ? WCOREDUMP($st) : $st & 0200; + eval {require 'wait.ph'}; + my $ret = defined &WCOREDUMP ? WCOREDUMP($st) : $st & 0200; - eval { require Devel::CoreStack; $Have_Devel_Corestack++ } - unless $tried_devel_corestack++; + eval { require Devel::CoreStack; $Have_Devel_Corestack++ } + unless $tried_devel_corestack++; - $ret; + $ret; + } } sub canonfailed ($@) { @@ -594,7 +949,7 @@ sub canonfailed ($@) { } local $" = ", "; push @result, "FAILED tests @canon\n"; - $canon = "@canon"; + $canon = join ' ', @canon; } else { push @result, "FAILED test $last\n"; $canon = $last; @@ -613,159 +968,16 @@ sub canonfailed ($@) { ($txt, $canon); } -1; -__END__ - -=head1 NAME - -Test::Harness - run perl standard test scripts with statistics - -=head1 SYNOPSIS - - use Test::Harness; - - runtests(@test_files); - -=head1 DESCRIPTION - -(By using the Test module, you can write test scripts without -knowing the exact output this module expects. However, if you need to -know the specifics, read on!) - -Perl test scripts print to standard output C<"ok N"> for each single -test, where C<N> is an increasing sequence of integers. The first line -output by a standard test script is C<"1..M"> with C<M> being the -number of tests that should be run within the test -script. Test::Harness::runtests(@tests) runs all the testscripts -named as arguments and checks standard output for the expected -C<"ok N"> strings. - -After all tests have been performed, runtests() prints some -performance statistics that are computed by the Benchmark module. - -=head2 The test script output - -The following explains how Test::Harness interprets the output of your -test program. - -=over 4 - -=item B<'1..M'> - -This header tells how many tests there will be. It should be the -first line output by your test program (but its okay if its preceded -by comments). - -In certain instanced, you may not know how many tests you will -ultimately be running. In this case, it is permitted (but not -encouraged) for the 1..M header to appear as the B<last> line output -by your test (again, it can be followed by further comments). But we -strongly encourage you to put it first. - -Under B<no> circumstances should 1..M appear in the middle of your -output or more than once. - - -=item B<'ok', 'not ok'. Ok?> - -Any output from the testscript to standard error is ignored and -bypassed, thus will be seen by the user. Lines written to standard -output containing C</^(not\s+)?ok\b/> are interpreted as feedback for -runtests(). All other lines are discarded. - -C</^not ok/> indicates a failed test. C</^ok/> is a successful test. - +=end _private -=item B<test numbers> - -Perl normally expects the 'ok' or 'not ok' to be followed by a test -number. It is tolerated if the test numbers after 'ok' are -omitted. In this case Test::Harness maintains temporarily its own -counter until the script supplies test numbers again. So the following -test script - - print <<END; - 1..6 - not ok - ok - not ok - ok - ok - END - -will generate - - FAILED tests 1, 3, 6 - Failed 3/6 tests, 50.00% okay - - -=item B<$Test::Harness::verbose> - -The global variable $Test::Harness::verbose is exportable and can be -used to let runtests() display the standard output of the script -without altering the behavior otherwise. - -=item B<$Test::Harness::switches> - -The global variable $Test::Harness::switches is exportable and can be -used to set perl command line options used for running the test -script(s). The default value is C<-w>. - -=item B<Skipping tests> - -If the standard output line contains the substring C< # Skip> (with -variations in spacing and case) after C<ok> or C<ok NUMBER>, it is -counted as a skipped test. If the whole testscript succeeds, the -count of skipped tests is included in the generated output. -C<Test::Harness> reports the text after C< # Skip\S*\s+> as a reason -for skipping. - - ok 23 # skip Insufficient flogiston pressure. - -Similarly, one can include a similar explanation in a C<1..0> line -emitted if the test script is skipped completely: - - 1..0 # Skipped: no leverage found - -=item B<Todo tests> - -If the standard output line contains the substring C< # TODO> after -C<not ok> or C<not ok NUMBER>, it is counted as a todo test. The text -afterwards is the thing that has to be done before this test will -succeed. - - not ok 13 # TODO harness the power of the atom - -These tests represent a feature to be implemented or a bug to be fixed -and act as something of an executable "thing to do" list. They are -B<not> expected to succeed. Should a todo test begin succeeding, -Test::Harness will report it as a bonus. This indicates that whatever -you were supposed to do has been done and you should promote this to a -normal test. - -=item B<Bail out!> - -As an emergency measure, a test script can decide that further tests -are useless (e.g. missing dependencies) and testing should stop -immediately. In that case the test script prints the magic words - - Bail out! - -to standard output. Any message after these words will be displayed by -C<Test::Harness> as the reason why testing is stopped. +=back -=item B<Comments> +=cut -Additional comments may be put into the testing output on their own -lines. Comment lines should begin with a '#', Test::Harness will -ignore them. - ok 1 - # Life is good, the sun is shining, RAM is cheap. - not ok 2 - # got 'Bush' expected 'Gore' +1; +__END__ -=back =head1 EXPORT @@ -811,11 +1023,11 @@ the script dies with this message. =over 4 -=item C<HARNESS_IGNORE_EXITCODE> +=item C<HARNESS_IGNORE_EXITCODE> Makes harness ignore the exit status of child processes when defined. -=item C<HARNESS_NOTTY> +=item C<HARNESS_NOTTY> When set to a true value, forces it to behave as though STDOUT were not a console. You may need to set this if you don't want harness to @@ -823,12 +1035,15 @@ output more frequent progress messages using carriage returns. Some consoles may not handle carriage returns properly (which results in a somewhat messy output). -=item C<HARNESS_COMPILE_TEST> +=item C<HARNESS_COMPILE_TEST> When true it will make harness attempt to compile the test using C<perlcc> before running it. -=item C<HARNESS_FILELEAK_IN_DIR> +B<NOTE> This currently only works when sitting in the perl source +directory! + +=item C<HARNESS_FILELEAK_IN_DIR> When set to the name of a directory, harness will check after each test whether new files appeared in that directory, and report them as @@ -839,20 +1054,20 @@ If relative, directory name is with respect to the current directory at the moment runtests() was called. Putting absolute path into C<HARNESS_FILELEAK_IN_DIR> may give more predicatable results. -=item C<HARNESS_PERL_SWITCHES> +=item C<HARNESS_PERL_SWITCHES> Its value will be prepended to the switches used to invoke perl on -each test. For example, setting C<HARNESS_PERL_SWITCHES> to "-W" will +each test. For example, setting C<HARNESS_PERL_SWITCHES> to C<-W> will run all tests with all warnings enabled. -=item C<HARNESS_COLUMNS> +=item C<HARNESS_COLUMNS> This value will be used for the width of the terminal. If it is not set then it will default to C<COLUMNS>. If this is not set, it will default to 80. Note that users of Bourne-sh based shells will need to C<export COLUMNS> for this module to use that variable. -=item C<HARNESS_ACTIVE> +=item C<HARNESS_ACTIVE> Harness sets this before executing the individual tests. This allows the tests to determine if they are being executed through the harness @@ -860,21 +1075,61 @@ or by any other means. =back +=head1 EXAMPLE + +Here's how Test::Harness tests itself + + $ cd ~/src/devel/Test-Harness + $ perl -Mblib -e 'use Test::Harness qw(&runtests $verbose); + $verbose=0; runtests @ARGV;' t/*.t + Using /home/schwern/src/devel/Test-Harness/blib + t/base..............ok + t/nonumbers.........ok + t/ok................ok + t/test-harness......ok + All tests successful. + Files=4, Tests=24, 2 wallclock secs ( 0.61 cusr + 0.41 csys = 1.02 CPU) =head1 SEE ALSO -L<Test> for writing test scripts, L<Benchmark> for the underlying -timing routines and L<Devel::Coverage> for test coverage analysis. +L<Test> and L<Test::Simple> for writing test scripts, L<Benchmark> for +the underlying timing routines, L<Devel::CoreStack> to generate core +dumps from failed tests and L<Devel::Cover> for test coverage +analysis. =head1 AUTHORS Either Tim Bunce or Andreas Koenig, we don't know. What we know for sure is, that it was inspired by Larry Wall's TEST script that came with perl distributions for ages. Numerous anonymous contributors -exist. +exist. Andreas Koenig held the torch for many years. + +Current maintainer is Michael G Schwern E<lt>schwern@pobox.comE<gt> + +=head1 TODO + +Provide a way of running tests quietly (ie. no printing) for automated +validation of tests. This will probably take the form of a version +of runtests() which rather than printing its output returns raw data +on the state of the tests. + +Fix HARNESS_COMPILE_TEST without breaking its core usage. + +Figure a way to report test names in the failure summary. -Current maintainers are Andreas Koenig <andreas.koenig@anima.de> and -Michael G Schwern <schwern@pobox.com> +Rework the test summary so long test names are not truncated as badly. + +Merge back into bleadperl. + +Deal with VMS's "not \nok 4\n" mistake. + +Add option for coverage analysis. + +=for _private +Keeping whittling away at _run_all_tests() + +=for _private +Clean up how the summary is printed. Get rid of those damned formats. =head1 BUGS @@ -884,4 +1139,7 @@ portable because $^X is not consistent for shebang scripts across platforms. This is no problem when Test::Harness is run with an absolute path to the perl binary or when $^X can be found in the path. +HARNESS_COMPILE_TEST currently assumes its run from the Perl source +directory. + =cut diff --git a/lib/Time/Local.pm b/lib/Time/Local.pm index 9c81209ad3..35d74ab93b 100644 --- a/lib/Time/Local.pm +++ b/lib/Time/Local.pm @@ -4,7 +4,7 @@ require Exporter; use Carp; use strict; -our $VERSION = '1.00'; +our $VERSION = '1.01'; our @ISA = qw( Exporter ); our @EXPORT = qw( timegm timelocal ); our @EXPORT_OK = qw( timegm_nocheck timelocal_nocheck ); @@ -87,12 +87,14 @@ sub timelocal_nocheck { sub cheat { my($ym, @date) = @_; my($sec, $min, $hour, $day, $month, $year) = @date; + my($md); unless ($Options{no_range_check}) { - croak "Month '$month' out of range 0..11" if $month > 11 || $month < 0; - croak "Day '$day' out of range 1..31" if $day > 31 || $day < 1; - croak "Hour '$hour' out of range 0..23" if $hour > 23 || $hour < 0; - croak "Minute '$min' out of range 0..59" if $min > 59 || $min < 0; - croak "Second '$sec' out of range 0..59" if $sec > 59 || $sec < 0; + croak "Month '$month' out of range 0..11" if $month > 11 || $month < 0; + $md = (31, 29, 31, 30, 31, 30, 31, 30, 30, 31, 30, 31)[$month]; + croak "Day '$day' out of range 1..$md" if $day > $md || $day < 1; + croak "Hour '$hour' out of range 0..23" if $hour > 23 || $hour < 0; + croak "Minute '$min' out of range 0..59" if $min > 59 || $min < 0; + croak "Second '$sec' out of range 0..59" if $sec > 59 || $sec < 0; } my $guess = $^T; my @g = gmtime($guess); @@ -151,8 +153,8 @@ the corresponding time(2) value in seconds since the Epoch (Midnight, January 1, 1970). This value can be positive or negative. It is worth drawing particular attention to the expected ranges for -the values provided. While the day of the month is expected to be in -the range 1..31, the month should be in the range 0..11. +the values provided. The value for the day of the month is the actual day +(ie 1..31), while the month is the number of months since January (0..11). This is consistent with the values returned from localtime() and gmtime(). The timelocal() and timegm() functions perform range checking on the diff --git a/lib/attributes.pm b/lib/attributes.pm index f111645ae1..3c8923f6da 100644 --- a/lib/attributes.pm +++ b/lib/attributes.pm @@ -130,7 +130,8 @@ B<WARNING>: attribute declarations for variables are an I<experimental> feature. The semantics of such declarations could change or be removed in future versions. They are present for purposes of experimentation with what the semantics ought to be. Do not rely on the current -implementation of this feature. +implementation of this feature. Variable attributes are currently +not usable for tieing. There are only a few attributes currently handled by Perl itself (or directly by this module, depending on how you look at it.) However, diff --git a/lib/autouse.pm b/lib/autouse.pm index 5e4f30f649..7fb318be1c 100644 --- a/lib/autouse.pm +++ b/lib/autouse.pm @@ -39,7 +39,7 @@ sub import { my $closure_import_func = $func; # Full name my $closure_func = $func; # Name inside package - my $index = index($func, '::'); + my $index = rindex($func, '::'); if ($index == -1) { $closure_import_func = "${callpkg}::$func"; } else { diff --git a/lib/open.pm b/lib/open.pm index 1aef9040b5..c90181bb1b 100644 --- a/lib/open.pm +++ b/lib/open.pm @@ -59,57 +59,61 @@ open - perl pragma to set default disciplines for input and output =head1 DESCRIPTION -Full-fledged support for I/O disciplines is now implemented provided perl is -configured to use PerlIO as its IO system (which is now the default). +Full-fledged support for I/O disciplines is now implemented provided +Perl is configured to use PerlIO as its IO system (which is now the +default). The C<open> pragma serves as one of the interfaces to declare default "layers" (aka disciplines) for all I/O. The C<open> pragma is used to declare one or more default layers for -I/O operations. Any open(), readpipe() (aka qx//) and similar operators -found within the lexical scope of this pragma will use the declared defaults. +I/O operations. Any open(), readpipe() (aka qx//) and similar +operators found within the lexical scope of this pragma will use the +declared defaults. -When open() is given an explicit list of layers they are appended to the -list declared using this pragma. +When open() is given an explicit list of layers they are appended to +the list declared using this pragma. Directory handles may also support disciplines in future. =head1 NONPERLIO FUNCTIONALITY -If perl is not built to use PerlIO as its IO system then only the two pseudo-disciplines -":raw" and ":crlf" are available. +If Perl is not built to use PerlIO as its IO system then only the two +pseudo-disciplines ":raw" and ":crlf" are available. The ":raw" discipline corresponds to "binary mode" and the ":crlf" discipline corresponds to "text mode" on platforms that distinguish between the two modes when opening files (which is many DOS-like -platforms, including Windows). These two disciplines are -no-ops on platforms where binmode() is a no-op, but perform their -functions everywhere if PerlIO is enabled. +platforms, including Windows). These two disciplines are no-ops on +platforms where binmode() is a no-op, but perform their functions +everywhere if PerlIO is enabled. =head1 IMPLEMENTATION DETAILS -There are two package variables C<%layers> and C<@layers> which -are mainly manipulated by C code in F<perlio.c>, but are visible -to the nosy: +There are two package variables C<%layers> and C<@layers> which are +mainly manipulated by C code in F<perlio.c>, but are visible to the +nosy: print "Have ",join(',',keys %open::layers),"\n"; print "Using ",join(',',@open::layers),"\n"; -The C<%open::layers> hash is a record of the available "layers" that may be pushed -onto a C<PerlIO> stream. The values of the hash are perl objects, of class C<PerlIO::Layer> -which are created by the C code in F<perlio.c>. As yet there is nothing useful you -can do with the objects at the perl level. - -The C<@open::layers> array is the current set of layers and their arguments. -The array consists of layer => argument pairs and I<must> always have even number of -entries and the even entries I<must> be C<PerlIO::Layer> objects or perl will "die" -when it attempts to open a filehandle. In most cases the odd entry will be C<undef>, -but in the case of (say) ":encoding(iso-8859-1)" it will be 'iso-8859-1'. These +The C<%open::layers> hash is a record of the available "layers" that +may be pushed onto a C<PerlIO> stream. The values of the hash are Perl +objects, of class C<PerlIO::Layer> which are created by the C code in +F<perlio.c>. As yet there is nothing useful you can do with the +objects at the perl level. + +The C<@open::layers> array is the current set of layers and their +arguments. The array consists of layer => argument pairs and I<must> +always have even number of entries and the even entries I<must> be +C<PerlIO::Layer> objects or Perl will "die" when it attempts to open a +filehandle. In most cases the odd entry will be C<undef>, but in the +case of (say) ":encoding(iso-8859-1)" it will be 'iso-8859-1'. These argument entries are currently restricted to being strings. -When a new C<PerlIO> stream is opened, the C code looks at the -array to determine the default layers to be pushed. So with care it is possible -to manipulate the default layer "stack": +When a new C<PerlIO> stream is opened, the C code looks at the array +to determine the default layers to be pushed. So with care it is +possible to manipulate the default layer "stack": splice(@PerlIO::layers,-2,2); push(@PerlIO::layers,$PerlIO::layers{'stdio'} => undef); diff --git a/lib/perl5db.pl b/lib/perl5db.pl index 1e1b163b59..ab68bd5eab 100644 --- a/lib/perl5db.pl +++ b/lib/perl5db.pl @@ -1630,7 +1630,8 @@ sub break_on_line { my $pl = ''; die "Line $i$filename_error not breakable.\n" if $dbline[$i] == 0; $had_breakpoints{$filename} |= 1; - $dbline{$i} =~ s/^[^\0]*/$cond/ if $dbline{$i}; + if ($dbline{$i}) { $dbline{$i} =~ s/^[^\0]*/$cond/; } + else { $dbline{$i} = $cond; } } sub cmd_b_line { diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index ab2e15ddec..3ac4ef9b59 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -34,11 +34,16 @@ sub SWASHNEW { } else { $file =~ s#^(Is|To)([A-Z].*)#$1/$2#; } - $list ||= eval { $caller->$type(); } - || do "$file.pl" - || do "$encoding/$file.pl" - || do "$encoding/Is/${type}.pl" - || croak("Can't find $encoding character property \"$type\""); + + { + local $@; + + $list ||= ($caller ne 'main' && eval { $caller->$type(); }) + || do "$file.pl" + || do "$encoding/$file.pl" + || do "$encoding/Is/${type}.pl" + || croak("Can't find $encoding character property \"$type\""); + } $| = 1; @@ -51,7 +51,7 @@ S_save_magic(pTHX_ I32 mgs_ix, SV *sv) SvMAGICAL_off(sv); SvREADONLY_off(sv); - SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK)) >> PRIVSHIFT; + SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_NOK|SVp_POK)) >> PRIVSHIFT; } /* @@ -1331,6 +1331,10 @@ #define Perl_rsignal pPerl->Perl_rsignal #undef rsignal #define rsignal Perl_rsignal +#undef Perl_rsignal_state +#define Perl_rsignal_state pPerl->Perl_rsignal_state +#undef rsignal_state +#define rsignal_state Perl_rsignal_state #if !defined(HAS_RENAME) #endif #undef Perl_savepv @@ -2378,6 +2382,30 @@ #endif #if defined(PERL_OBJECT) #endif +#undef Perl_sv_setsv_flags +#define Perl_sv_setsv_flags pPerl->Perl_sv_setsv_flags +#undef sv_setsv_flags +#define sv_setsv_flags Perl_sv_setsv_flags +#undef Perl_sv_catpvn_flags +#define Perl_sv_catpvn_flags pPerl->Perl_sv_catpvn_flags +#undef sv_catpvn_flags +#define sv_catpvn_flags Perl_sv_catpvn_flags +#undef Perl_sv_catsv_flags +#define Perl_sv_catsv_flags pPerl->Perl_sv_catsv_flags +#undef sv_catsv_flags +#define sv_catsv_flags Perl_sv_catsv_flags +#undef Perl_sv_utf8_upgrade_flags +#define Perl_sv_utf8_upgrade_flags pPerl->Perl_sv_utf8_upgrade_flags +#undef sv_utf8_upgrade_flags +#define sv_utf8_upgrade_flags Perl_sv_utf8_upgrade_flags +#undef Perl_sv_pvn_force_flags +#define Perl_sv_pvn_force_flags pPerl->Perl_sv_pvn_force_flags +#undef sv_pvn_force_flags +#define sv_pvn_force_flags Perl_sv_pvn_force_flags +#undef Perl_sv_2pv_flags +#define Perl_sv_2pv_flags pPerl->Perl_sv_2pv_flags +#undef sv_2pv_flags +#define sv_2pv_flags Perl_sv_2pv_flags #endif /* PERL_CORE && PERL_OBJECT */ #endif /* __objXSUB_h__ */ diff --git a/os2/Makefile.SHs b/os2/Makefile.SHs index b3b472b640..9efc864afa 100644 --- a/os2/Makefile.SHs +++ b/os2/Makefile.SHs @@ -14,6 +14,15 @@ esac dll_post="`echo $perl_fullversion | sum | sed -e 's/^0*//' | awk '{print $1}'`" dll_post="`printf '%x' $dll_post | tr '[a-z]' '[A-Z]'`" +aout_extra_libs='' +aout_extra_sep='' +for xxx in $aout_extra_static_ext; do + aout_extra_dir=`echo "$xxx" | sed -e 's/::/\//g'` + aout_extra_lib="lib/auto/$aout_extra_dir/"`basename "$aout_extra_dir"` + aout_extra_libs="$aout_extra_libs$aout_extra_sep$aout_extra_lib$aout_lib_ext" + aout_extra_sep=' ' +done + $spitshell >>Makefile <<!GROK!THIS! PERL_FULLVERSION = $perl_fullversion @@ -37,6 +46,7 @@ PERL_DLL_BASE = perl$dll_post PERL_DLL = \$(PERL_DLL_BASE)\$(DLSUFFIX) TEST_PERL_DLL = perl_dll_t CONFIG_ARGS = $config_args +AOUT_EXTRA_LIBS = $aout_extra_libs !GROK!THIS! @@ -150,8 +160,9 @@ installcmd : aout_obj = $(addsuffix $(AOUT_OBJ_EXT),$(basename $(obj))) AOUT_DYNALOADER = $(addsuffix $(AOUT_LIB_EXT),$(basename $(DYNALOADER))) -aout_static_ext = $(addsuffix $(AOUT_LIB_EXT),$(basename $(dynamic_ext))) -aout_static_lib = $(addsuffix $(LIB_EXT),$(basename $(dynamic_ext))) +aout_ext = $(dynamic_ext) $(AOUT_EXTRA_LIBS) +aout_static_ext = $(addsuffix $(AOUT_LIB_EXT),$(basename $(aout_ext))) +aout_static_lib = $(addsuffix $(LIB_EXT),$(basename $(aout_ext))) aout_static_ext_dll = $(addsuffix $(AOUT_LIB_EXT),$(basename $(static_ext))) DYNALOADER_OBJ = ext/DynaLoader/DynaLoader$(OBJ_EXT) @@ -2009,21 +2009,31 @@ XS(XS_Cwd_sys_abspath) } typedef APIRET (*PELP)(PSZ path, ULONG type); +/* Kernels after 2000/09/15 understand this too: */ +#ifndef LIBPATHSTRICT +# define LIBPATHSTRICT 3 +#endif + APIRET -ExtLIBPATH(ULONG ord, PSZ path, ULONG type) +ExtLIBPATH(ULONG ord, PSZ path, IV type) { + ULONG what; + loadByOrd("doscalls",ord); /* Guarantied to load or die! */ - return (*(PELP)ExtFCN[ord])(path, type); + if (type > 0) + what = END_LIBPATH; + else if (type == 0) + what = BEGIN_LIBPATH; + else + what = LIBPATHSTRICT; + return (*(PELP)ExtFCN[ord])(path, what); } -#define extLibpath(type) \ - (CheckOSError(ExtLIBPATH(ORD_QUERY_ELP, to, ((type) ? END_LIBPATH \ - : BEGIN_LIBPATH))) \ - ? NULL : to ) +#define extLibpath(to,type) \ + (CheckOSError(ExtLIBPATH(ORD_QUERY_ELP, (to), (type))) ? NULL : (to) ) #define extLibpath_set(p,type) \ - (!CheckOSError(ExtLIBPATH(ORD_SET_ELP, (p), ((type) ? END_LIBPATH \ - : BEGIN_LIBPATH)))) + (!CheckOSError(ExtLIBPATH(ORD_SET_ELP, (p), (type)))) XS(XS_Cwd_extLibpath) { @@ -2031,7 +2041,7 @@ XS(XS_Cwd_extLibpath) if (items < 0 || items > 1) Perl_croak_nocontext("Usage: Cwd::extLibpath(type = 0)"); { - bool type; + IV type; char to[1024]; U32 rc; char * RETVAL; @@ -2039,10 +2049,13 @@ XS(XS_Cwd_extLibpath) if (items < 1) type = 0; else { - type = (int)SvIV(ST(0)); + type = SvIV(ST(0)); } - RETVAL = extLibpath(type); + to[0] = 1; to[1] = 0; /* Sometimes no error reported */ + RETVAL = extLibpath(to, type); + if (RETVAL && RETVAL[0] == 1 && RETVAL[1] == 0) + Perl_croak_nocontext("panic Cwd::extLibpath parameter"); ST(0) = sv_newmortal(); sv_setpv((SV*)ST(0), RETVAL); } @@ -2057,14 +2070,14 @@ XS(XS_Cwd_extLibpath_set) { STRLEN n_a; char * s = (char *)SvPV(ST(0),n_a); - bool type; + IV type; U32 rc; bool RETVAL; if (items < 2) type = 0; else { - type = (int)SvIV(ST(1)); + type = SvIV(ST(1)); } RETVAL = extLibpath_set(s, type); diff --git a/patchlevel.h b/patchlevel.h index faae9a8e04..865e0f8921 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -70,7 +70,7 @@ #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT) static char *local_patches[] = { NULL - ,"DEVEL10209" + ,"DEVEL10297" ,NULL }; @@ -2432,6 +2432,13 @@ Perl_rsignal(pTHXo_ int i, Sighandler_t t) { return ((CPerlObj*)pPerl)->Perl_rsignal(i, t); } + +#undef Perl_rsignal_state +Sighandler_t +Perl_rsignal_state(pTHXo_ int i) +{ + return ((CPerlObj*)pPerl)->Perl_rsignal_state(i); +} #if !defined(HAS_RENAME) #endif @@ -4226,6 +4233,48 @@ Perl_sys_intern_init(pTHXo) #if defined(PERL_OBJECT) #endif +#undef Perl_sv_setsv_flags +void +Perl_sv_setsv_flags(pTHXo_ SV* dsv, SV* ssv, I32 flags) +{ + ((CPerlObj*)pPerl)->Perl_sv_setsv_flags(dsv, ssv, flags); +} + +#undef Perl_sv_catpvn_flags +void +Perl_sv_catpvn_flags(pTHXo_ SV* sv, const char* ptr, STRLEN len, I32 flags) +{ + ((CPerlObj*)pPerl)->Perl_sv_catpvn_flags(sv, ptr, len, flags); +} + +#undef Perl_sv_catsv_flags +void +Perl_sv_catsv_flags(pTHXo_ SV* dsv, SV* ssv, I32 flags) +{ + ((CPerlObj*)pPerl)->Perl_sv_catsv_flags(dsv, ssv, flags); +} + +#undef Perl_sv_utf8_upgrade_flags +STRLEN +Perl_sv_utf8_upgrade_flags(pTHXo_ SV *sv, I32 flags) +{ + return ((CPerlObj*)pPerl)->Perl_sv_utf8_upgrade_flags(sv, flags); +} + +#undef Perl_sv_pvn_force_flags +char* +Perl_sv_pvn_force_flags(pTHXo_ SV* sv, STRLEN* lp, I32 flags) +{ + return ((CPerlObj*)pPerl)->Perl_sv_pvn_force_flags(sv, lp, flags); +} + +#undef Perl_sv_2pv_flags +char* +Perl_sv_2pv_flags(pTHXo_ SV* sv, STRLEN* lp, I32 flags) +{ + return ((CPerlObj*)pPerl)->Perl_sv_2pv_flags(sv, lp, flags); +} + #undef Perl_fprintf_nocontext int Perl_fprintf_nocontext(PerlIO *stream, const char *format, ...) @@ -311,6 +311,13 @@ HV *PerlIO_layer_hv; AV *PerlIO_layer_av; void +PerlIO_cleanup_layers(pTHXo_ void *data) +{ + PerlIO_layer_hv = Nullhv; + PerlIO_layer_av = Nullav; +} + +void PerlIO_cleanup() { dTHX; @@ -3596,6 +3603,8 @@ PerlIO_funcs PerlIO_mmap = { void PerlIO_init(void) { + dTHX; + call_atexit(PerlIO_cleanup_layers, NULL); if (!_perlio) { #ifndef WIN32 @@ -3647,8 +3656,14 @@ char * PerlIO_getname(PerlIO *f, char *buf) { dTHX; + char *name = NULL; +#ifdef VMS + FILE *stdio = PerlIOSelf(f,PerlIOStdio)->stdio; + if (stdio) name = fgetname(stdio, buf); +#else Perl_croak(aTHX_ "Don't know how to get file name"); - return NULL; +#endif + return name; } @@ -237,6 +237,9 @@ extern void PerlIO_releaseFILE (PerlIO *,FILE *); #ifndef PerlIO_read extern SSize_t PerlIO_read (PerlIO *,void *,Size_t); #endif +#ifndef PerlIO_unread +extern SSize_t PerlIO_unread (PerlIO *,const void *,Size_t); +#endif #ifndef PerlIO_write extern SSize_t PerlIO_write (PerlIO *,const void *,Size_t); #endif @@ -326,6 +329,9 @@ extern int PerlIO_apply_layers (pTHX_ PerlIO *f, const char *mode, const char *n #ifndef PerlIO_binmode extern int PerlIO_binmode (pTHX_ PerlIO *f, int iotype, int omode, const char *names); #endif +#ifndef PerlIO_getname +extern char * PerlIO_getname (PerlIO *, char *); +#endif extern void PerlIO_destruct(pTHX); @@ -115,8 +115,6 @@ extern SSize_t PerlIOBase_unread (PerlIO *f, const void *vbuf, Size_t count); extern IV PerlIOBase_eof (PerlIO *f); extern IV PerlIOBase_error (PerlIO *f); extern void PerlIOBase_clearerr (PerlIO *f); -extern IV PerlIOBase_flush (PerlIO *f); -extern IV PerlIOBase_fill (PerlIO *f); extern IV PerlIOBase_close (PerlIO *f); extern void PerlIOBase_setlinebuf(PerlIO *f); extern void PerlIOBase_flush_linebuf(void); diff --git a/perlsdio.h b/perlsdio.h index fd990c06d8..da45c32714 100644 --- a/perlsdio.h +++ b/perlsdio.h @@ -15,6 +15,7 @@ #define PerlIO_stdoutf printf #define PerlIO_vprintf(f,fmt,a) vfprintf(f,fmt,a) #define PerlIO_write(f,buf,count) fwrite1(buf,1,count,f) +#define PerlIO_unread(f,buf,count) (-1) #define PerlIO_open fopen #define PerlIO_fdopen fdopen #define PerlIO_reopen freopen diff --git a/pod/buildtoc.PL b/pod/buildtoc.PL index 6c43035889..a89c4c98eb 100644 --- a/pod/buildtoc.PL +++ b/pod/buildtoc.PL @@ -169,6 +169,7 @@ if (-d "pod") { perlos2 perlos390 perlsolaris + perltru64 perlvmesa perlvms perlvos @@ -190,6 +191,7 @@ if (-d "pod") { perlos2 perlos390 perlsolaris + perltru64 perlvmesa perlvms perlvos diff --git a/pod/perl.pod b/pod/perl.pod index c047d347d5..fd15c1725c 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -123,6 +123,7 @@ For ease of access, the Perl manual has been split up into several sections: perlos2 Perl notes for OS/2 perlos390 Perl notes for OS/390 perlsolaris Perl notes for Solaris + perltru64 Perl notes for Tru64 perlvmesa Perl notes for VM/ESA perlvms Perl notes for VMS perlvos Perl notes for Stratus VOS diff --git a/pod/perl5005delta.pod b/pod/perl5005delta.pod index 78bf90f616..c60d37c3e6 100644 --- a/pod/perl5005delta.pod +++ b/pod/perl5005delta.pod @@ -145,7 +145,7 @@ Most of the Perl documentation was previously under the implicit GNU General Public License or the Artistic License (at the user's choice). Now much of the documentation unambiguously states the terms under which it may be distributed. Those terms are in general much less restrictive -than the GNU GPL. See L<perl> and the individual perl man pages listed +than the GNU GPL. See L<perl> and the individual perl manpages listed therein. =head1 Core Changes diff --git a/pod/perl571delta.pod b/pod/perl571delta.pod index 9a069beebb..df132ccf77 100644 --- a/pod/perl571delta.pod +++ b/pod/perl571delta.pod @@ -62,7 +62,7 @@ natively, ASCII or EBCDIC, unless GLOB_ALPHASORT is specified.) =head1 Core Enhancements -=head1 AUTOLOAD Is Now Lvaluable +=head2 AUTOLOAD Is Now Lvaluable AUTOLOAD is now lvaluable, meaning that you can add the :lvalue attribute to AUTOLOAD subroutines and you can assign to the AUTOLOAD return value. diff --git a/pod/perlapi.pod b/pod/perlapi.pod index ca4d5e6a97..824ec5948e 100644 --- a/pod/perlapi.pod +++ b/pod/perlapi.pod @@ -318,7 +318,7 @@ L<perlsub/"Constant Functions">. SV* cv_const_sv(CV* cv) =for hackers -Found in file op.c +Found in file opmini.c =item dMARK @@ -1100,7 +1100,7 @@ method, similar to C<use Foo::Bar VERSION LIST>. void load_module(U32 flags, SV* name, SV* ver, ...) =for hackers -Found in file op.c +Found in file opmini.c =item looks_like_number @@ -1239,7 +1239,7 @@ eligible for inlining at compile-time. CV* newCONSTSUB(HV* stash, char* name, SV* sv) =for hackers -Found in file op.c +Found in file opmini.c =item newHV @@ -1385,7 +1385,7 @@ Found in file sv.c Used by C<xsubpp> to hook up XSUBs as Perl subs. =for hackers -Found in file op.c +Found in file opmini.c =item newXSproto @@ -2208,7 +2208,7 @@ Found in file sv.h Tells an SV that it is a string and disables all other OK bits, and leaves the UTF8 status as it was. - + void SvPOK_only_UTF8(SV* sv) =for hackers @@ -2608,6 +2608,20 @@ Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>. =for hackers Found in file sv.c +=item sv_catpvn_flags + +Concatenates the string onto the end of the string which is in the SV. The +C<len> indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<dsv> if +appropriate, else not. C<sv_catpvn> and C<sv_catpvn_nomg> are implemented +in terms of this function. + + void sv_catpvn_flags(SV* sv, const char* ptr, STRLEN len, I32 flags) + +=for hackers +Found in file sv.c + =item sv_catpvn_mg Like C<sv_catpvn>, but also handles 'set' magic. @@ -2637,6 +2651,18 @@ not 'set' magic. See C<sv_catsv_mg>. =for hackers Found in file sv.c +=item sv_catsv_flags + +Concatenates the string from SV C<ssv> onto the end of the string in +SV C<dsv>. Modifies C<dsv> but not C<ssv>. If C<flags> has C<SV_GMAGIC> +bit set, will C<mg_get> on the SVs if appropriate, else not. C<sv_catsv> +and C<sv_catsv_nomg> are implemented in terms of this function. + + void sv_catsv_flags(SV* dsv, SV* ssv, I32 flags) + +=for hackers +Found in file sv.c + =item sv_catsv_mg Like C<sv_catsv>, but also handles 'set' magic. @@ -2846,6 +2872,18 @@ Get a sensible string out of the SV somehow. =for hackers Found in file sv.c +=item sv_pvn_force_flags + +Get a sensible string out of the SV somehow. +If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<sv> if +appropriate, else not. C<sv_pvn_force> and C<sv_pvn_force_nomg> are +implemented in terms of this function. + + char* sv_pvn_force_flags(SV* sv, STRLEN* lp, I32 flags) + +=for hackers +Found in file sv.c + =item sv_pvutf8n_force Get a sensible UTF8-encoded string out of the SV somehow. See @@ -3083,6 +3121,19 @@ C<sv_setsv_mg>. =for hackers Found in file sv.c +=item sv_setsv_flags + +Copies the contents of the source SV C<ssv> into the destination SV C<dsv>. +The source SV may be destroyed if it is mortal. Does not handle 'set' +magic. If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<ssv> if +appropriate, else not. C<sv_setsv> and C<sv_setsv_nomg> are implemented +in terms of this function. + + void sv_setsv_flags(SV* dsv, SV* ssv, I32 flags) + +=for hackers +Found in file sv.c + =item sv_setsv_mg Like C<sv_setsv>, but also handles 'set' magic. @@ -3242,6 +3293,20 @@ if all the bytes have hibit clear. =for hackers Found in file sv.c +=item sv_utf8_upgrade_flags + +Convert the PV of an SV to its UTF8-encoded form. +Forces the SV to string form it it is not already. +Always sets the SvUTF8 flag to avoid future validity checks even +if all the bytes have hibit clear. If C<flags> has C<SV_GMAGIC> bit set, +will C<mg_get> on C<sv> if appropriate, else not. C<sv_utf8_upgrade> and +C<sv_utf8_upgrade_nomg> are implemented in terms of this function. + + STRLEN sv_utf8_upgrade_flags(SV *sv, I32 flags) + +=for hackers +Found in file sv.c + =item sv_vcatpvfn Processes its arguments like C<vsprintf> and appends the formatted output diff --git a/pod/perldebtut.pod b/pod/perldebtut.pod index e11102e567..693e938e94 100644 --- a/pod/perldebtut.pod +++ b/pod/perldebtut.pod @@ -666,7 +666,7 @@ and there's a B<vi> interface too. You don't have to do this all on the command line, though, there are a few GUI options out there. The nice thing about these is you can wave a mouse over a -variable and a dump of it's data will appear in an appropriate window, or in a +variable and a dump of its data will appear in an appropriate window, or in a popup balloon, no more tiresome typing of 'x $varname' :-) In particular have a hunt around for the following: diff --git a/pod/perlebcdic.pod b/pod/perlebcdic.pod index ccfe1392ba..c98b46c6e7 100644 --- a/pod/perlebcdic.pod +++ b/pod/perlebcdic.pod @@ -45,7 +45,7 @@ A particular 8-bit extension to ASCII that includes grave and acute accented Latin characters. Languages that can employ ISO 8859-1 include all the languages covered by ASCII as well as Afrikaans, Albanian, Basque, Catalan, Danish, Faroese, Finnish, Norwegian, -Portugese, Spanish, and Swedish. Dutch is covered albeit without +Portuguese, Spanish, and Swedish. Dutch is covered albeit without the ij ligature. French is covered too but without the oe ligature. German can use ISO 8859-1 but must do so without German-style quotation marks. This set is based on Western European extensions @@ -597,7 +597,7 @@ XPG operability often implies the presence of an I<iconv> utility available from the shell or from the C library. Consult your system's documentation for information on iconv. -On OS/390 or z/OS see the iconv(1) man page. One way to invoke the iconv +On OS/390 or z/OS see the iconv(1) manpage. One way to invoke the iconv shell utility from within perl would be to: # OS/390 or z/OS example @@ -1243,7 +1243,7 @@ translation difficulties. In particular one popular nroff implementation was known to strip accented characters to their unaccented counterparts while attempting to view this document through the B<pod2man> program (for example, you may see a plain C<y> rather than one with a diaeresis -as in E<yuml>). Another nroff truncated the resultant man page at +as in E<yuml>). Another nroff truncated the resultant manpage at the first occurrence of 8 bit characters. Not all shells will allow multiple C<-e> string arguments to perl to diff --git a/pod/perlfaq3.pod b/pod/perlfaq3.pod index fdfa26d277..dabe5687a5 100644 --- a/pod/perlfaq3.pod +++ b/pod/perlfaq3.pod @@ -11,7 +11,7 @@ and programming support. Have you looked at CPAN (see L<perlfaq2>)? The chances are that someone has already written a module that can solve your problem. -Have you read the appropriate man pages? Here's a brief index: +Have you read the appropriate manpages? Here's a brief index: Basics perldata, perlvar, perlsyn, perlop, perlsub Execution perlrun, perldebug @@ -25,12 +25,12 @@ Have you read the appropriate man pages? Here's a brief index: Various http://www.perl.com/CPAN/doc/FMTEYEWTK/index.html (not a man-page but still useful) -A crude table of contents for the Perl man page set is found in L<perltoc>. +A crude table of contents for the Perl manpage set is found in L<perltoc>. =head2 How can I use Perl interactively? The typical approach uses the Perl debugger, described in the -perldebug(1) man page, on an ``empty'' program, like this: +perldebug(1) manpage, on an ``empty'' program, like this: perl -de 42 diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index c2b9f39f66..89123011ca 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -4291,6 +4291,12 @@ loop control operators described in L<perlsyn> or with C<goto>. When C<use locale> is in effect, C<sort LIST> sorts LIST according to the current collation locale. See L<perllocale>. +Perl does B<not> guarantee that sort is stable. (A I<stable> sort +preserves the input order of elements that compare equal.) 5.7 and +5.8 happen to use a stable mergesort, but 5.6 and earlier used quicksort, +which is not stable. Do not assume that future perls will continue to +use a stable sort. + Examples: # sort lexically @@ -5616,6 +5622,13 @@ command if the files already exist: $now = time; utime $now, $now, @ARGV; +If the first two elements of the list are C<undef>, then the utime(2) +function in the C library will be called with a null second argument. +On most systems, this will set the file's access and modification +times to the current time. (i.e. equivalent to the example above.) + + utime undef, undef, @ARGV; + =item values HASH Returns a list consisting of all the values of the named hash. (In a diff --git a/pod/perlguts.pod b/pod/perlguts.pod index 391cf8ae74..44f3640183 100644 --- a/pod/perlguts.pod +++ b/pod/perlguts.pod @@ -1289,7 +1289,7 @@ where C<SP> is the macro that represents the local copy of the stack pointer, and C<num> is the number of elements the stack should be extended by. Now that there is room on the stack, values can be pushed on it using C<PUSHs> -macro. The values pushed will often need to be "mortal" (See L</Reference Counts and Mortality). +macro. The values pushed will often need to be "mortal" (See L</Reference Counts and Mortality>). PUSHs(sv_2mortal(newSViv(an_integer))) PUSHs(sv_2mortal(newSVpv("Some String",0))) diff --git a/pod/perlhack.pod b/pod/perlhack.pod index 94c6dfd96b..6b67e5706b 100644 --- a/pod/perlhack.pod +++ b/pod/perlhack.pod @@ -273,7 +273,7 @@ From http://rsync.samba.org/README.html: "Rsync uses rsh or ssh for communication. It does not need to be setuid and requires no special privileges for installation. It - does not require a inetd entry or a deamon. You must, however, + does not require an inetd entry or a daemon. You must, however, have a working rsh or ssh system. Using ssh is recommended for its security features." @@ -434,7 +434,7 @@ look how others apply the fix. =item Finding the source of misbehaviour When you keep in sync with bleadperl, the pumpking would love to -I<see> that the community efforts realy work. So after each of his +I<see> that the community efforts really work. So after each of his sync points, you are to 'make test' to check if everything is still in working order. If it is, you do 'make ok', which will send an OK report to perlbug@perl.org. (If you do not have access to a mailer @@ -442,7 +442,7 @@ from the system you just finished successfully 'make test', you can do 'make okfile', which creates the file C<perl.ok>, which you can than take to your favourite mailer and mail yourself). -But of course, as always, things will not allways lead to a success +But of course, as always, things will not always lead to a success path, and one or more test do not pass the 'make test'. Before sending in a bug report (using 'make nok' or 'make nokfile'), check the mailing list if someone else has reported the bug already and if @@ -1813,9 +1813,9 @@ In Tru64 the following options are available: =over 4 -=item -p[rodecures] +=item -p[rocedures] -Procecures sorted in descending order by the number of cycles executed +Procedures sorted in descending order by the number of cycles executed in each procedure. Useful for finding the hotspot procedures. (This is the default option.) diff --git a/pod/perlmodlib.pod b/pod/perlmodlib.pod index f63a984d07..1310b14bb0 100644 --- a/pod/perlmodlib.pod +++ b/pod/perlmodlib.pod @@ -374,6 +374,10 @@ Exporter guts Utilities to replace common UNIX commands in Makefiles etc. +=item ExtUtils::Constant + +Generate XS code to import C header constants + =item ExtUtils::Embed Utilities for embedding Perl in C/C++ applications @@ -538,6 +542,14 @@ Process single-character switches with switch clustering Compare 8-bit scalar data according to the current locale +=item I18N::LangTags + +Functions for dealing with RFC3066-style language tags + +=item I18N::LangTags::List + +List of tags for human languages + =item IO Load various IO modules @@ -566,6 +578,14 @@ ISO three letter codes for currency identification (ISO 4217) ISO two letter codes for language identification (ISO 639) +=item Locale::Maketext + +Framework for localization + +=item Locale::Maketext::TPJ13 + +Article about software localization + =item Math::BigFloat Arbitrary length float math package diff --git a/pod/perlnewmod.pod b/pod/perlnewmod.pod index ace8d85130..ccac5dc383 100644 --- a/pod/perlnewmod.pod +++ b/pod/perlnewmod.pod @@ -239,18 +239,21 @@ it connected to the rest of the CPAN, you'll need to tell the modules list about it. The best way to do this is to email them a line in the style of the modules list, like this: - Net::Acme bdpO Interface to Acme Frobnicator servers FOOBAR - ^ ^^^^ ^ ^ - | |||| Module description Your ID - | |||| - | |||\- Interface: (O)OP, (r)eferences, (h)ybrid, (f)unctions - | ||| - | ||\-- Language: (p)ure Perl, C(+)+, (h)ybrid, (C), (o)ther - | || - Module |\--- Support: (d)eveloper, (m)ailing list, (u)senet, (n)one - Name | - \---- Maturity: (i)dea, (c)onstructions, (a)lpha, (b)eta, - (R)eleased, (M)ature, (S)tandard + Net::Acme bdpOP Interface to Acme Frobnicator servers FOOBAR + ^ ^^^^^ ^ ^ + | ||||| Module description Your ID + | ||||| + | ||||\-Public Licence: (p)standard Perl, (g)GPL, (b)BSD, + | |||| (l)LGPL, (a)rtistic, (o)ther + | |||| + | |||\- Interface: (O)OP, (r)eferences, (h)ybrid, (f)unctions + | ||| + | ||\-- Language: (p)ure Perl, C(+)+, (h)ybrid, (C), (o)ther + | || + Module |\--- Support: (d)eveloper, (m)ailing list, (u)senet, (n)one + Name | + \---- Development: (i)dea, (c)onstructions, (a)lpha, (b)eta, + (R)eleased, (M)ature, (S)tandard plus a description of the module and why you think it should be included. If you hear nothing back, that means your module will diff --git a/pod/perltoc.pod b/pod/perltoc.pod index a75f39ea1f..35e70e3c46 100644 --- a/pod/perltoc.pod +++ b/pod/perltoc.pod @@ -361,16 +361,16 @@ lock, log EXPR, log, lstat EXPR, lstat, m//, map BLOCK LIST, map EXPR,LIST, mkdir FILENAME,MASK, mkdir FILENAME, msgctl ID,CMD,ARG, msgget KEY,FLAGS, msgrcv ID,VAR,SIZE,TYPE,FLAGS, msgsnd ID,MSG,FLAGS, my EXPR, my EXPR : ATTRIBUTES, next LABEL, next, no Module LIST, oct EXPR, oct, open -FILEHANDLE,MODE,LIST, open FILEHANDLE,EXPR, open FILEHANDLE, opendir -DIRHANDLE,EXPR, ord EXPR, ord, our EXPR, our EXPR : ATTRIBUTES, pack -TEMPLATE,LIST, package NAMESPACE, package, pipe READHANDLE,WRITEHANDLE, pop -ARRAY, pop, pos SCALAR, pos, print FILEHANDLE LIST, print LIST, print, -printf FILEHANDLE FORMAT, LIST, printf FORMAT, LIST, prototype FUNCTION, -push ARRAY,LIST, q/STRING/, qq/STRING/, qr/STRING/, qx/STRING/, qw/STRING/, -quotemeta EXPR, quotemeta, rand EXPR, rand, read -FILEHANDLE,SCALAR,LENGTH,OFFSET, read FILEHANDLE,SCALAR,LENGTH, readdir -DIRHANDLE, readline EXPR, readlink EXPR, readlink, readpipe EXPR, recv -SOCKET,SCALAR,LENGTH,FLAGS, redo LABEL, redo, ref EXPR, ref, rename +FILEHANDLE,EXPR, open FILEHANDLE,MODE,EXPR, open FILEHANDLE,MODE,EXPR,LIST, +open FILEHANDLE, opendir DIRHANDLE,EXPR, ord EXPR, ord, our EXPR, our EXPR +: ATTRIBUTES, pack TEMPLATE,LIST, package NAMESPACE, package, pipe +READHANDLE,WRITEHANDLE, pop ARRAY, pop, pos SCALAR, pos, print FILEHANDLE +LIST, print LIST, print, printf FILEHANDLE FORMAT, LIST, printf FORMAT, +LIST, prototype FUNCTION, push ARRAY,LIST, q/STRING/, qq/STRING/, +qr/STRING/, qx/STRING/, qw/STRING/, quotemeta EXPR, quotemeta, rand EXPR, +rand, read FILEHANDLE,SCALAR,LENGTH,OFFSET, read FILEHANDLE,SCALAR,LENGTH, +readdir DIRHANDLE, readline EXPR, readlink EXPR, readlink, readpipe EXPR, +recv SOCKET,SCALAR,LENGTH,FLAGS, redo LABEL, redo, ref EXPR, ref, rename OLDNAME,NEWNAME, require VERSION, require EXPR, require, reset EXPR, reset, return EXPR, return, reverse LIST, rewinddir DIRHANDLE, rindex STR,SUBSTR,POSITION, rindex STR,SUBSTR, rmdir FILENAME, rmdir, s///, scalar @@ -3903,18 +3903,19 @@ SvROK_on, SvRV, SvSETMAGIC, SvSetSV, SvSetSV_nosteal, SvSTASH, SvTAINT, SvTAINTED, SvTAINTED_off, SvTAINTED_on, SvTRUE, SvTYPE, svtype, SVt_IV, SVt_NV, SVt_PV, SVt_PVAV, SVt_PVCV, SVt_PVHV, SVt_PVMG, SvUOK, SvUPGRADE, SvUTF8, SvUTF8_off, SvUTF8_on, SvUV, SvUVX, sv_2mortal, sv_bless, sv_catpv, -sv_catpvf, sv_catpvf_mg, sv_catpvn, sv_catpvn_mg, sv_catpv_mg, sv_catsv, -sv_catsv_mg, sv_chop, sv_clear, sv_cmp, sv_cmp_locale, sv_dec, -sv_derived_from, sv_eq, sv_free, sv_gets, sv_grow, sv_inc, sv_insert, -sv_isa, sv_isobject, sv_len, sv_len_utf8, sv_magic, sv_mortalcopy, -sv_newmortal, sv_pvn_force, sv_pvutf8n_force, sv_reftype, sv_replace, -sv_rvweaken, sv_setiv, sv_setiv_mg, sv_setnv, sv_setnv_mg, sv_setpv, -sv_setpvf, sv_setpvf_mg, sv_setpviv, sv_setpviv_mg, sv_setpvn, -sv_setpvn_mg, sv_setpv_mg, sv_setref_iv, sv_setref_nv, sv_setref_pv, -sv_setref_pvn, sv_setref_uv, sv_setsv, sv_setsv_mg, sv_setuv, sv_setuv_mg, -sv_true, sv_unmagic, sv_unref, sv_unref_flags, sv_upgrade, sv_usepvn, -sv_usepvn_mg, sv_utf8_decode, sv_utf8_downgrade, sv_utf8_encode, -sv_utf8_upgrade, sv_vcatpvfn, sv_vsetpvfn, THIS, toLOWER, toUPPER, +sv_catpvf, sv_catpvf_mg, sv_catpvn, sv_catpvn_flags, sv_catpvn_mg, +sv_catpv_mg, sv_catsv, sv_catsv_flags, sv_catsv_mg, sv_chop, sv_clear, +sv_cmp, sv_cmp_locale, sv_dec, sv_derived_from, sv_eq, sv_free, sv_gets, +sv_grow, sv_inc, sv_insert, sv_isa, sv_isobject, sv_len, sv_len_utf8, +sv_magic, sv_mortalcopy, sv_newmortal, sv_pvn_force, sv_pvn_force_flags, +sv_pvutf8n_force, sv_reftype, sv_replace, sv_rvweaken, sv_setiv, +sv_setiv_mg, sv_setnv, sv_setnv_mg, sv_setpv, sv_setpvf, sv_setpvf_mg, +sv_setpviv, sv_setpviv_mg, sv_setpvn, sv_setpvn_mg, sv_setpv_mg, +sv_setref_iv, sv_setref_nv, sv_setref_pv, sv_setref_pvn, sv_setref_uv, +sv_setsv, sv_setsv_flags, sv_setsv_mg, sv_setuv, sv_setuv_mg, sv_true, +sv_unmagic, sv_unref, sv_unref_flags, sv_upgrade, sv_usepvn, sv_usepvn_mg, +sv_utf8_decode, sv_utf8_downgrade, sv_utf8_encode, sv_utf8_upgrade, +sv_utf8_upgrade_flags, sv_vcatpvfn, sv_vsetpvfn, THIS, toLOWER, toUPPER, utf8n_to_uvchr, utf8n_to_uvuni, utf8_distance, utf8_hop, utf8_length, utf8_to_bytes, utf8_to_uvchr, utf8_to_uvuni, uvchr_to_utf8, uvuni_to_utf8, warn, XPUSHi, XPUSHn, XPUSHp, XPUSHs, XPUSHu, XS, XSRETURN, XSRETURN_EMPTY, @@ -4076,6 +4077,8 @@ PerlIO_apply_layers(f,mode,layers), PerlIO_binmode(f,ptype,imode,layers), =item use Thread for iThreads +=item make perl_clone optionally clone ops + =item Work out exit/die semantics for threads =item Typed lexicals for compiler @@ -4214,12 +4217,13 @@ PerlIO_apply_layers(f,mode,layers), PerlIO_binmode(f,ptype,imode,layers), =item Rewrite perldoc -=item Install .3p man pages +=item Install .3p manpages =item Unicode tutorial =item Update POSIX.pm for 1003.1-2 -=head2 Retargetable installation + +=item Retargetable installation =item POSIX emulation on non-POSIX systems @@ -4242,8 +4246,10 @@ PerlIO_apply_layers(f,mode,layers), PerlIO_binmode(f,ptype,imode,layers), =item Make tr/// return histogram =item Compile to real threaded code -=head2 Structured types -=head2 Modifiable $1 et al. + +=item Structured types + +=item Modifiable $1 et al. =item Procedural interfaces for IO::*, etc. @@ -4352,10 +4358,12 @@ PerlIO_apply_layers(f,mode,layers), PerlIO_binmode(f,ptype,imode,layers), =item Lexically scoped typeglobs =item format BOTTOM -=head2 report HANDLE + +=item report HANDLE =item Generalised want()/caller()) -=head2 Named prototypes + +=item Named prototypes =item Built-in globbing @@ -4466,9 +4474,13 @@ DEFINES, USE_MULTI = define, #PERL_MALLOC = define, CFG = Debug =item PERL_DESTRUCT_LEVEL +=item Gprof Profiling + +-a, -b, -e routine, -f routine, -s, -z + =item Pixie Profiling --h, -l, -p[rodecures], -h[eavy], -i[nvocations], -l[ines], -testcoverage, +-h, -l, -p[rocedures], -h[eavy], -i[nvocations], -l[ines], -testcoverage, -z[ero] =item CONCLUSION @@ -4535,6 +4547,8 @@ I<The Road goes ever on and on, down from the door where it began.> =item Incompatible Changes +=item Future Deprecations + =item Core Enhancements =item Modules and Pragmata @@ -4635,10 +4649,10 @@ I<The Road goes ever on and on, down from the door where it began.> =item Core Enhancements -=item AUTOLOAD Is Now Lvaluable - =over 4 +=item AUTOLOAD Is Now Lvaluable + =item PerlIO is Now The Default =item Signals Are Now Safe @@ -5676,14 +5690,12 @@ finally close()d =over 4 -=item Prerequisites +=item Build Prerequisites =item Getting the perl source =item Making - remember to use a hefty wad of stack (I use 2000000) - =item Testing =item Installing the built perl @@ -5857,6 +5869,50 @@ Source, Compiled Module Source, Perl Modules/Scripts =back +=head2 perldos - Perl under DOS, W31, W95. + +=over 4 + +=item SYNOPSIS + +=item DESCRIPTION + +=over 4 + +=item Prerequisites + +DJGPP, Pthreads + +=item Shortcomings of Perl under DOS + +=item Building + +=item Testing + +=item Installation + +=back + +=item BUILDING AND INSTALLING MODULES + +=over 4 + +=item Building Prerequisites + +=item Unpacking CPAN Modules + +=item Building Non-XS Modules + +=item Building XS Modules + +=back + +=item AUTHOR + +=item SEE ALSO + +=back + =head2 perlepoc, README.epoc - Perl for EPOC =over 4 @@ -5918,6 +5974,8 @@ Source, Compiled Module Source, Perl Modules/Scripts =item Portability Between PA-RISC Versions +=item Itanium Processor Family + =item Building Dynamic Extensions on HP-UX =item The HP ANSI C Compiler @@ -5934,6 +5992,8 @@ Source, Compiled Module Source, Perl Modules/Scripts =item perl -P and // +=item Kernel parameters (maxdsiz) + =back =item AUTHOR @@ -5983,26 +6043,41 @@ op/lexassign.t, pragma/warnings.t =head2 perlmpeix, README.mpeix - Perl/iX for HP e3000 MPE -=head1 SYNOPSIS - =over 4 +=item SYNOPSIS + +=item NOTE + =item What's New +=item Welcome + =item System Requirements =item How to Obtain Perl/iX =item Distribution Contents Highlights -README, public_html/feedback.cgi, 4, 6 +README, INSTALL, LIBSHP3K, PERL, .cpan/, lib/, man/, +public_html/feedback.cgi, src/perl-5.6.0-mpe + +=item How to Compile Perl/iX + + 4, 6 =item Getting Started with Perl/iX =item MPE/iX Implementation Considerations +=item Known Bugs Under Investigation + +=item To-Do List + =item Change History +=item Author + =back =head2 perlos2 - Perl under OS/2, DOS, Win0.3*, Win0.95 and WinNT. @@ -6383,6 +6458,33 @@ DATAMODEL_NATIVE specified", sh: ar: not found =back +=head2 perltru64, README.tru64 - Perl version 5 on Tru64 (formerly known as +Digital UNIX formerly known as DEC OSF/1) systems + +=over 4 + +=item DESCRIPTION + +=over 4 + +=item Compiling Perl 5 on Tru64 + +=item Using Large Files with Perl on Tru64 + +=item Threaded Perl on Tru64 + +=item 64-bit Perl on Tru64 + +=item Warnings about floating-point overflow when compiling Perl on Tru64 + +=back + +=item Testing Perl on Tru64 + +=item AUTHOR + +=back + =head2 perlvmesa, README.vmesa - building and installing Perl for VM/ESA. =over 4 @@ -6399,21 +6501,10 @@ DATAMODEL_NATIVE specified", sh: ar: not found =item Configure -Don't turn on the compiler optimization flag "-O". There's a bug in the -compiler (APAR PQ18812) that generates some bad code the optimizer is on, -As VM/ESA doesn't fully support the fork() API programs relying on this -call will not work. I've replaced fork()/exec() with spawn() and the -standalone exec() with spawn(). This has a side effect when opening unnamed -pipes in a shell script: there is no child process generated under - =item testing anomalies =item Usage Hints -When using perl on VM/ESA please keep in mind that the EBCDIC and ASCII -character sets are different. Perl builtin functions that may behave -differently under EBCDIC are mentioned in the perlport.pod document. - =back =item AUTHORS @@ -6541,6 +6632,48 @@ LIST, waitpid PID,FLAGS =back +=head2 perlwin32 - Perl under Win32 + +=over 4 + +=item SYNOPSIS + +=item DESCRIPTION + +=over 4 + +=item Setting Up + +Make, Command Shell, Borland C++, Microsoft Visual C++, Mingw32 with GCC + +=item Building + +=item Testing + +=item Installation + +=item Usage Hints + +Environment Variables, File Globbing, Using perl from the command line, +Building Extensions, Command-line Wildcard Expansion, Win32 Specific +Extensions, Running Perl Scripts, Miscellaneous Things + +=back + +=item BUGS AND CAVEATS + +=item AUTHORS + +Gary Ng E<lt>71564.1743@CompuServe.COME<gt>, Gurusamy Sarathy +E<lt>gsar@activestate.comE<gt>, Nick Ing-Simmons +E<lt>nick@ing-simmons.netE<gt> + +=item SEE ALSO + +=item HISTORY + +=back + =head1 PRAGMA DOCUMENTATION =head2 attrs - set/get attributes of a subroutine (deprecated) @@ -8372,13 +8505,9 @@ C<byacc>, C<byteorder> C<c>, C<castflags>, C<cat>, C<cc>, C<cccdlflags>, C<ccdlflags>, C<ccflags>, C<ccflags_uselargefiles>, C<ccname>, C<ccsymbols>, C<ccversion>, C<cf_by>, C<cf_email>, C<cf_time>, C<charsize>, C<chgrp>, C<chmod>, C<chown>, -C<clocktype>, C<comm>, C<compress> - -=item C - -C<CONFIGDOTSH>, C<contains>, C<cp>, C<cpio>, C<cpp>, C<cpp_stuff>, -C<cppccsymbols>, C<cppflags>, C<cpplast>, C<cppminus>, C<cpprun>, -C<cppstdin>, C<cppsymbols>, C<crosscompile>, C<cryptlib>, C<csh> +C<clocktype>, C<comm>, C<compress>, C<contains>, C<cp>, C<cpio>, C<cpp>, +C<cpp_stuff>, C<cppccsymbols>, C<cppflags>, C<cpplast>, C<cppminus>, +C<cpprun>, C<cppstdin>, C<cppsymbols>, C<crosscompile>, C<cryptlib>, C<csh> =item d @@ -9292,6 +9421,45 @@ test_f file =back +=head2 ExtUtils::Constant - generate XS code to import C header constants + +=over 4 + +=item SYNOPSIS + +=item DESCRIPTION + +=item USAGE + +IV, UV, NV, PV, PVN + +=item FUNCTIONS + +=back + +C_stringify NAME + +constant_types + +memEQ_clause NAME, CHECKED_AT, INDENT + +return_clause VALUE, TYPE, INDENT, MACRO + +params WHAT + +C_constant SUBNAME, DEFAULT_TYPE, TYPES, INDENT, NAMELEN, ITEM.., name, +type, value, macro + +XS_constant PACKAGE, TYPES, SUBNAME, C_SUBNAME + +autoload PACKAGE, VERSION + +=over 4 + +=item AUTHOR + +=back + =head2 ExtUtils::Embed - Utilities for embedding Perl in C/C++ applications =over 4 @@ -10728,6 +10896,209 @@ locale =back +=head2 I18N::LangTags - functions for dealing with RFC3066-style language +tags + +=over 4 + +=item SYNOPSIS + +=item DESCRIPTION + +=back + +the function is_language_tag($lang1) + +the function extract_language_tags($whatever) + +the function same_language_tag($lang1, $lang2) + +the function similarity_language_tag($lang1, $lang2) + +the function is_dialect_of($lang1, $lang2) + +the function super_languages($lang1) + +the function locale2language_tag($locale_identifier) + +the function encode_language_tag($lang1) + +the function alternate_language_tags($lang1) + +=over 4 + +=item ABOUT LOWERCASING + +=item ABOUT UNICODE PLAINTEXT LANGUAGE TAGS + +=item SEE ALSO + +=item COPYRIGHT + +=item AUTHOR + +=back + +=head2 I18N::LangTags::List, I18n::LangTags::List -- list of tags for human +languages + +=over 4 + +=item SYNOPSIS + +=item ABOUT LANGUAGE TAGS + +=item LIST OF LANGUAGES + +I<{ab}> : Abkhazian, I<{ace}> : Achinese, I<{ach}> : Acoli, I<{ada}> : +Adangme, I<{aa}> : Afar, I<{afh}> : Afrihili, I<{af}> : Afrikaans, +[I<{afa}> : Afro-Asiatic (Other)], I<{aka}> : Akan, I<{akk}> : Akkadian, +I<{sq}> : Albanian, I<{ale}> : Aleut, [I<{alg}> : Algonquian languages], +[I<{tut}> : Altaic (Other)], I<{am}> : Amharic, I<{i-ami}> : Ami, [I<{apa}> +: Apache languages], I<{ar}> : Arabic, I<{arc}> : Aramaic, I<{arp}> : +Arapaho, I<{arn}> : Araucanian, I<{arw}> : Arawak, I<{hy}> : Armenian, +[I<{art}> : Artificial (Other)], I<{as}> : Assamese, [I<{ath}> : Athapascan +languages], [I<{aus}> : Australian languages], [I<{map}> : Austronesian +(Other)], I<{ava}> : Avaric, I<{ae}> : Avestan, I<{awa}> : Awadhi, I<{ay}> +: Aymara, I<{az}> : Azerbaijani, I<{ban}> : Balinese, [I<{bat}> : Baltic +(Other)], I<{bal}> : Baluchi, I<{bam}> : Bambara, [I<{bai}> : Bamileke +languages], I<{bad}> : Banda, [I<{bnt}> : Bantu (Other)], I<{bas}> : Basa, +I<{ba}> : Bashkir, I<{eu}> : Basque, I<{btk}> : Batak (Indonesia), I<{bej}> +: Beja, I<{be}> : Belarusian, I<{bem}> : Bemba, I<{bn}> : Bengali, +[I<{ber}> : Berber (Other)], I<{bho}> : Bhojpuri, I<{bh}> : Bihari, +I<{bik}> : Bikol, I<{bin}> : Bini, I<{bi}> : Bislama, I<{bs}> : Bosnian, +I<{bra}> : Braj, I<{br}> : Breton, I<{bug}> : Buginese, I<{bg}> : +Bulgarian, I<{i-bnn}> : Bunun, I<{bua}> : Buriat, I<{my}> : Burmese, +I<{cad}> : Caddo, I<{car}> : Carib, I<{ca}> : Catalan, [I<{cau}> : +Caucasian (Other)], I<{ceb}> : Cebuano, [I<{cel}> : Celtic (Other)], +[I<{cai}> : Central American Indian (Other)], I<{chg}> : Chagatai, +[I<{cmc}> : Chamic languages], I<{ch}> : Chamorro, I<{ce}> : Chechen, +I<{chr}> : Cherokee, I<{chy}> : Cheyenne, I<{chb}> : Chibcha, I<{ny}> : +Chichewa, I<{zh}> : Chinese, I<{chn}> : Chinook Jargon, I<{chp}> : +Chipewyan, I<{cho}> : Choctaw, I<{cu}> : Church Slavic, I<{chk}> : +Chuukese, I<{cv}> : Chuvash, I<{cop}> : Coptic, I<{kw}> : Cornish, I<{co}> +: Corsican, I<{cre}> : Cree, I<{mus}> : Creek, [I<{cpe}> : English-based +Creoles and pidgins (Other)], [I<{cpf}> : French-based Creoles and pidgins +(Other)], [I<{cpp}> : Portuguese-based Creoles and pidgins (Other)], +[I<{crp}> : Creoles and pidgins (Other)], I<{hr}> : Croatian, [I<{cus}> : +Cushitic (Other)], I<{cs}> : Czech, I<{dak}> : Dakota, I<{da}> : Danish, +I<{day}> : Dayak, I<{i-default}> : Default (Fallthru) Language, I<{del}> : +Delaware, I<{din}> : Dinka, I<{div}> : Divehi, I<{doi}> : Dogri, I<{dgr}> : +Dogrib, [I<{dra}> : Dravidian (Other)], I<{dua}> : Duala, I<{nl}> : Dutch, +I<{dum}> : Middle Dutch (ca.1050-1350), I<{dyu}> : Dyula, I<{dz}> : +Dzongkha, I<{efi}> : Efik, I<{egy}> : Ancient Egyptian, I<{eka}> : Ekajuk, +I<{elx}> : Elamite, I<{en}> : English, I<{enm}> : Old English (1100-1500), +I<{ang}> : Old English (ca.450-1100), I<{eo}> : Esperanto, I<{et}> : +Estonian, I<{ewe}> : Ewe, I<{ewo}> : Ewondo, I<{fan}> : Fang, I<{fat}> : +Fanti, I<{fo}> : Faroese, I<{fj}> : Fijian, I<{fi}> : Finnish, [I<{fiu}> : +Finno-Ugrian (Other)], I<{fon}> : Fon, I<{fr}> : French, I<{frm}> : Middle +French (ca.1400-1600), I<{fro}> : Old French (842-ca.1400), I<{fy}> : +Frisian, I<{fur}> : Friulian, I<{ful}> : Fulah, I<{gaa}> : Ga, I<{gd}> : +Scots Gaelic, I<{gl}> : Gallegan, I<{lug}> : Ganda, I<{gay}> : Gayo, +I<{gba}> : Gbaya, I<{gez}> : Geez, I<{ka}> : Georgian, I<{de}> : German, +I<{gmh}> : Middle High German (ca.1050-1500), I<{goh}> : Old High German +(ca.750-1050), [I<{gem}> : Germanic (Other)], I<{gil}> : Gilbertese, +I<{gon}> : Gondi, I<{gor}> : Gorontalo, I<{got}> : Gothic, I<{grb}> : +Grebo, I<{grc}> : Ancient Greek (to 1453), I<{el}> : Modern Greek (1453-), +I<{gn}> : Guarani, I<{gu}> : Gujarati, I<{gwi}> : Gwich'in, I<{hai}> : +Haida, I<{ha}> : Hausa, I<{haw}> : Hawaiian, I<{he}> : Hebrew, I<{hz}> : +Herero, I<{hil}> : Hiligaynon, I<{him}> : Himachali, I<{hi}> : Hindi, +I<{ho}> : Hiri Motu, I<{hit}> : Hittite, I<{hmn}> : Hmong, I<{hu}> : +Hungarian, I<{hup}> : Hupa, I<{iba}> : Iban, I<{is}> : Icelandic, I<{ibo}> +: Igbo, I<{ijo}> : Ijo, I<{ilo}> : Iloko, [I<{inc}> : Indic (Other)], +[I<{ine}> : Indo-European (Other)], I<{id}> : Indonesian, I<{ia}> : +Interlingua (International Auxiliary Language Association), I<{ie}> : +Interlingue, I<{iu}> : Inuktitut, I<{ik}> : Inupiaq, [I<{ira}> : Iranian +(Other)], I<{ga}> : Irish, I<{mga}> : Middle Irish (900-1200), I<{sga}> : +Old Irish (to 900), [I<{iro}> : Iroquoian languages], I<{it}> : Italian, +I<{ja}> : Japanese, I<{jw}> : Javanese, I<{jrb}> : Judeo-Arabic, I<{jpr}> : +Judeo-Persian, I<{kab}> : Kabyle, I<{kac}> : Kachin, I<{kl}> : Kalaallisut, +I<{kam}> : Kamba, I<{kn}> : Kannada, I<{kau}> : Kanuri, I<{kaa}> : +Kara-Kalpak, I<{kar}> : Karen, I<{ks}> : Kashmiri, I<{kaw}> : Kawi, I<{kk}> +: Kazakh, I<{kha}> : Khasi, I<{km}> : Khmer, [I<{khi}> : Khoisan (Other)], +I<{kho}> : Khotanese, I<{ki}> : Kikuyu, I<{kmb}> : Kimbundu, I<{rw}> : +Kinyarwanda, I<{ky}> : Kirghiz, I<{i-klingon}> : Klingon, I<{kv}> : Komi, +I<{kon}> : Kongo, I<{kok}> : Konkani, I<{ko}> : Korean, I<{kos}> : +Kosraean, I<{kpe}> : Kpelle, I<{kro}> : Kru, I<{kj}> : Kuanyama, I<{kum}> : +Kumyk, I<{ku}> : Kurdish, I<{kru}> : Kurukh, I<{kut}> : Kutenai, I<{lad}> : +Ladino, I<{lah}> : Lahnda, I<{lam}> : Lamba, I<{lo}> : Lao, I<{la}> : +Latin, I<{lv}> : Latvian, I<{lb}> : Letzeburgesch, I<{lez}> : Lezghian, +I<{ln}> : Lingala, I<{lt}> : Lithuanian, I<{nds}> : Low German, I<{loz}> : +Lozi, I<{lub}> : Luba-Katanga, I<{lua}> : Luba-Lulua, I<{lui}> : Luiseno, +I<{lun}> : Lunda, I<{luo}> : Luo (Kenya and Tanzania), I<{lus}> : Lushai, +I<{mk}> : Macedonian, I<{mad}> : Madurese, I<{mag}> : Magahi, I<{mai}> : +Maithili, I<{mak}> : Makasar, I<{mg}> : Malagasy, I<{ms}> : Malay, I<{ml}> +: Malayalam, I<{mt}> : Maltese, I<{mnc}> : Manchu, I<{mdr}> : Mandar, +I<{man}> : Mandingo, I<{mni}> : Manipuri, [I<{mno}> : Manobo languages], +I<{gv}> : Manx, I<{mi}> : Maori, I<{mr}> : Marathi, I<{chm}> : Mari, +I<{mh}> : Marshall, I<{mwr}> : Marwari, I<{mas}> : Masai, [I<{myn}> : Mayan +languages], I<{men}> : Mende, I<{mic}> : Micmac, I<{min}> : Minangkabau, +I<{i-mingo}> : Mingo, [I<{mis}> : Miscellaneous languages], I<{moh}> : +Mohawk, I<{mo}> : Moldavian, [I<{mkh}> : Mon-Khmer (Other)], I<{lol}> : +Mongo, I<{mn}> : Mongolian, I<{mos}> : Mossi, [I<{mul}> : Multiple +languages], [I<{mun}> : Munda languages], I<{nah}> : Nahuatl, I<{na}> : +Nauru, I<{nv}> : Navajo, I<{nd}> : North Ndebele, I<{nr}> : South Ndebele, +I<{ng}> : Ndonga, I<{ne}> : Nepali, I<{new}> : Newari, I<{nia}> : Nias, +[I<{nic}> : Niger-Kordofanian (Other)], [I<{ssa}> : Nilo-Saharan (Other)], +I<{niu}> : Niuean, I<{non}> : Old Norse, [I<{nai}> : North American +Indian], I<{se}> : Northern Sami, I<{no}> : Norwegian, I<{nb}> : Norwegian +BokmE<aring>l, I<{nn}> : Norwegian Nynorsk, [I<{nub}> : Nubian languages], +I<{nym}> : Nyamwezi, I<{nyn}> : Nyankole, I<{nyo}> : Nyoro, I<{nzi}> : +Nzima, I<{oc}> : Occitan (post 1500), I<{oji}> : Ojibwa, I<{or}> : Oriya, +I<{om}> : Oromo, I<{osa}> : Osage, I<{os}> : Ossetian; Ossetic, [I<{oto}> : +Otomian languages], I<{pal}> : Pahlavi, I<{i-pwn}> : Paiwan, I<{pau}> : +Palauan, I<{pi}> : Pali, I<{pam}> : Pampanga, I<{pag}> : Pangasinan, +I<{pa}> : Panjabi, I<{pap}> : Papiamento, [I<{paa}> : Papuan (Other)], +I<{fa}> : Persian, I<{peo}> : Old Persian (ca.600-400 B.C.), [I<{phi}> : +Philippine (Other)], I<{phn}> : Phoenician, I<{pon}> : Pohnpeian, I<{pl}> : +Polish, I<{pt}> : Portuguese, [I<{pra}> : Prakrit languages], I<{pro}> : +Old ProvenE<ccedil>al (to 1500), I<{ps}> : Pushto, I<{qu}> : Quechua, +I<{rm}> : Raeto-Romance, I<{raj}> : Rajasthani, I<{rap}> : Rapanui, +I<{rar}> : Rarotongan, [I<{qaa}>-I<qtz> : Reserved for local use.], +[I<{roa}> : Romance (Other)], I<{ro}> : Romanian, I<{rom}> : Romany, +I<{rn}> : Rundi, I<{ru}> : Russian, [I<{sal}> : Salishan languages], +I<{sam}> : Samaritan Aramaic, [I<{smi}> : Sami languages (Other)], I<{sm}> +: Samoan, I<{sad}> : Sandawe, I<{sg}> : Sango, I<{sa}> : Sanskrit, I<{sat}> +: Santali, I<{sc}> : Sardinian, I<{sas}> : Sasak, I<{sco}> : Scots, +I<{sel}> : Selkup, [I<{sem}> : Semitic (Other)], I<{sr}> : Serbian, +I<{srr}> : Serer, I<{shn}> : Shan, I<{sn}> : Shona, I<{sid}> : Sidamo, +I<{sgn-...}> : Sign Languages, I<{bla}> : Siksika, I<{sd}> : Sindhi, +I<{si}> : Sinhalese, [I<{sit}> : Sino-Tibetan (Other)], [I<{sio}> : Siouan +languages], I<{den}> : Slave (Athapascan), [I<{sla}> : Slavic (Other)], +I<{sk}> : Slovak, I<{sl}> : Slovenian, I<{sog}> : Sogdian, I<{so}> : +Somali, I<{son}> : Songhai, I<{snk}> : Soninke, I<{wen}> : Sorbian +languages, I<{nso}> : Northern Sotho, I<{st}> : Southern Sotho, [I<{sai}> : +South American Indian (Other)], I<{es}> : Spanish, I<{suk}> : Sukuma, +I<{sux}> : Sumerian, I<{su}> : Sundanese, I<{sus}> : Susu, I<{sw}> : +Swahili, I<{ss}> : Swati, I<{sv}> : Swedish, I<{syr}> : Syriac, I<{tl}> : +Tagalog, I<{ty}> : Tahitian, [I<{tai}> : Tai (Other)], I<{tg}> : Tajik, +I<{tmh}> : Tamashek, I<{ta}> : Tamil, I<{i-tao}> : Tao, I<{tt}> : Tatar, +I<{i-tay}> : Tayal, I<{te}> : Telugu, I<{ter}> : Tereno, I<{tet}> : Tetum, +I<{th}> : Thai, I<{bo}> : Tibetan, I<{tig}> : Tigre, I<{ti}> : Tigrinya, +I<{tem}> : Timne, I<{tiv}> : Tiv, I<{tli}> : Tlingit, I<{tpi}> : Tok Pisin, +I<{tkl}> : Tokelau, I<{tog}> : Tonga (Nyasa), I<{to}> : Tonga (Tonga +Islands), I<{tsi}> : Tsimshian, I<{ts}> : Tsonga, I<{i-tsu}> : Tsou, +I<{tn}> : Tswana, I<{tum}> : Tumbuka, I<{tr}> : Turkish, I<{ota}> : Ottoman +Turkish (1500-1928), I<{tk}> : Turkmen, I<{tvl}> : Tuvalu, I<{tyv}> : +Tuvinian, I<{tw}> : Twi, I<{uga}> : Ugaritic, I<{ug}> : Uighur, I<{uk}> : +Ukrainian, I<{umb}> : Umbundu, I<{und}> : Undetermined, I<{ur}> : Urdu, +I<{uz}> : Uzbek, I<{vai}> : Vai, I<{ven}> : Venda, I<{vi}> : Vietnamese, +I<{vo}> : VolapE<uuml>k, I<{vot}> : Votic, [I<{wak}> : Wakashan languages], +I<{wal}> : Walamo, I<{war}> : Waray, I<{was}> : Washo, I<{cy}> : Welsh, +I<{wo}> : Wolof, I<{x-...}> : Unregistered (Private Use), I<{xh}> : Xhosa, +I<{sah}> : Yakut, I<{yao}> : Yao, I<{yap}> : Yapese, I<{yi}> : Yiddish, +I<{yo}> : Yoruba, [I<{ypk}> : Yupik languages], I<{znd}> : Zande, [I<{zap}> +: Zapotec], I<{zen}> : Zenaga, I<{za}> : Zhuang, I<{zu}> : Zulu, I<{zun}> : +Zuni + +=item SEE ALSO + +=item COPYRIGHT AND DISCLAIMER + +=item AUTHOR + +=back + =head2 IO - load various IO modules =over 4 @@ -11585,6 +11956,101 @@ http://lcweb.loc.gov/standards/iso639-2/langhome.html =back +=head2 Locale::Maketext -- framework for localization + +=over 4 + +=item SYNOPSIS + +=item DESCRIPTION + +=item QUICK OVERVIEW + +=item METHODS + +=over 4 + +=item Construction Methods + +$lh = YourProjClass->get_handle( ...langtags... ) || die "lg-handle?";, $lh += YourProjClass->get_handleB<()> || die "lg-handle?";, $lh = +YourProjClass::langname->new();, $lh->init();, +YourProjClass->fallback_languages(), +YourProjClass->fallback_language_classes() + +=item The "maketext" Method + +$lh->fail_with I<or> $lh->fail_with(I<PARAM>), $lh->failure_handler_auto + +=item Utility Methods + +$language->quant($number, $singular), $language->quant($number, $singular, +$plural), $language->quant($number, $singular, $plural, $negative), +$language->numf($number), $language->sprintf($format, @items), +$language->language_tag(), $language->encoding() + +=item Language Handle Attributes and Internals + +=back + +=item LANGUAGE CLASS HIERARCHIES + +=item ENTRIES IN EACH LEXICON + +=item BRACKET NOTATION + +=item AUTO LEXICONS + +=item CONTROLLING LOOKUP FAILURE + +=item HOW TO USE MAKETEXT + +=item SEE ALSO + +=item COPYRIGHT AND DISCLAIMER + +=item AUTHOR + +=back + +=head2 Locale::Maketext::TPJ13 -- article about software localization + +=over 4 + +=item SYNOPSIS + +=item DESCRIPTION + +=item Localization and Perl: gettext breaks, Maketext fixes + +=over 4 + +=item A Localization Horror Story: It Could Happen To You + +=item The Linguistic View + +=item Breaking gettext + +=item Replacing gettext + +=item Buzzwords: Abstraction and Encapsulation + +=item Buzzword: Isomorphism + +=item Buzzword: Inheritance + +=item Buzzword: Concision + +=item The Devil in the Details + +=item The Proof in the Pudding: Localizing Web Sites + +=item References + +=back + +=back + =head2 MIME::Base64 - Encoding and decoding of base64 strings =over 4 @@ -14125,6 +14591,13 @@ getitimer ( $which ) =item Week Number +=item strftime method + +%%, %a, %A, %b, %B, %c, %C, %d, %D, %e, %h, %H, %I, %j, %m, %M, %n, %p, %r, +%R, %S, %t, %T, %u, %U, %V, %w, %W, %x, %y, %Y, %Z + +=item strptime function + =item Global Overriding =back diff --git a/pod/perltodo.pod b/pod/perltodo.pod index 8c94a29c9a..3882498750 100644 --- a/pod/perltodo.pod +++ b/pod/perltodo.pod @@ -68,6 +68,11 @@ They have some tricks Perl doesn't yet implement. Artur Bergman's C<iThreads> module is a start on this, but needs to be more mature. +=head2 make perl_clone optionally clone ops + +So that pseudoforking, mod_perl, iThreads and nvi will work properly +(but not as efficiently) until the regex engine is fixed to be threadsafe. + =head2 Work out exit/die semantics for threads =head2 Typed lexicals for compiler @@ -418,9 +423,9 @@ There are a few suggestions for what to do with C<perldoc>: maybe a full-text search, an index function, locating pages on a particular high-level subject, and so on. -=head2 Install .3p man pages +=head2 Install .3p manpages -This is a bone of contention; we can create C<.3p> man pages for each +This is a bone of contention; we can create C<.3p> manpages for each built-in function, but should we install them by default? Tcl does this, and it clutters up C<apropos>. @@ -429,6 +434,7 @@ and it clutters up C<apropos>. Simon Cozens promises to do this before he gets old. =head2 Update POSIX.pm for 1003.1-2 + =head2 Retargetable installation Allow C<@INC> to be changed after Perl is built. @@ -468,7 +474,9 @@ code. There is a patch for this, but it may require Unicodification. =head2 Compile to real threaded code + =head2 Structured types + =head2 Modifiable $1 et al. ($x = "elephant") =~ /e(ph)/; @@ -690,11 +698,13 @@ This would break old code; use C<do{{ }}> instead. Not needed now we have lexical IO handles. =head2 format BOTTOM + =head2 report HANDLE Damian Conway's text formatting modules seem to be the Way To Go. =head2 Generalised want()/caller()) + =head2 Named prototypes These both seem to be delayed until Perl 6. diff --git a/pod/perlutil.pod b/pod/perlutil.pod index 45341d9e85..f299f2d13f 100644 --- a/pod/perlutil.pod +++ b/pod/perlutil.pod @@ -29,7 +29,7 @@ described in this document. If it's run from a terminal, F<perldoc> will usually call F<pod2man> to translate POD (Plain Old Documentation - see L<perlpod> for an -explanation) into a man page, and then run F<man> to display it; if +explanation) into a manpage, and then run F<man> to display it; if F<man> isn't available, F<pod2text> will be used instead and the output piped through your favourite pager. diff --git a/pod/perlxstut.pod b/pod/perlxstut.pod index f06e166326..ad648ebf37 100644 --- a/pod/perlxstut.pod +++ b/pod/perlxstut.pod @@ -915,9 +915,9 @@ way to store and load your extra subroutines. There is absolutely no excuse for not documenting your extension. Documentation belongs in the .pm file. This file will be fed to pod2man, -and the embedded documentation will be converted to the man page format, -then placed in the blib directory. It will be copied to Perl's man -page directory when the extension is installed. +and the embedded documentation will be converted to the manpage format, +then placed in the blib directory. It will be copied to Perl's +manpage directory when the extension is installed. You may intersperse documentation and Perl code within the .pm file. In fact, if you want to use method autoloading, you must do this, @@ -344,6 +344,7 @@ See C<PUSHu>. if (PL_amagic_generation) { \ SV* tmpsv; \ SV* arg= sp[shift]; \ + if(0) goto am_again; /* shut up unused warning */ \ am_again: \ if ((SvAMAGIC(arg))&&\ (tmpsv=AMG_CALLun(arg,meth))) {\ @@ -1434,10 +1434,6 @@ Perl_die_where(pTHX_ char *message, STRLEN msglen) } else { sv_setpvn(ERRSV, message, msglen); - if (PL_hints & HINT_UTF8) - SvUTF8_on(ERRSV); - else - SvUTF8_off(ERRSV); } } else @@ -142,51 +142,56 @@ PP(pp_concat) dSP; dATARGET; tryAMAGICbin(concat,opASSIGN); { dPOPTOPssrl; - SV* rcopy = Nullsv; - - if (SvGMAGICAL(left)) - mg_get(left); - if (TARG == right && SvGMAGICAL(right)) - mg_get(right); - - if (TARG == right && left != right) - /* Clone since otherwise we cannot prepend. */ - rcopy = sv_2mortal(newSVsv(right)); - - if (TARG != left) - sv_setsv(TARG, left); + STRLEN llen; + char* lpv; + bool lbyte; + STRLEN rlen; + char* rpv = SvPV(right, rlen); /* mg_get(right) happens here */ + bool rbyte = !SvUTF8(right); + + if (TARG == right && right != left) { + right = sv_2mortal(newSVpvn(rpv, rlen)); + rpv = SvPV(right, rlen); /* no point setting UTF8 here */ + } + + if (TARG != left) { + lpv = SvPV(left, llen); /* mg_get(left) may happen here */ + lbyte = !SvUTF8(left); + sv_setpvn(TARG, lpv, llen); + if (!lbyte) + SvUTF8_on(TARG); + else + SvUTF8_off(TARG); + } + else { /* TARG == left */ + if (SvGMAGICAL(left)) + mg_get(left); /* or mg_get(left) may happen here */ + if (!SvOK(TARG)) + sv_setpv(left, ""); + lpv = SvPV_nomg(left, llen); + lbyte = !SvUTF8(left); + } #if defined(PERL_Y2KWARN) if ((SvIOK(right) || SvNOK(right)) && ckWARN(WARN_Y2K) && SvOK(TARG)) { - STRLEN n; - char *s = SvPV(TARG,n); - if (n >= 2 && s[n-2] == '1' && s[n-1] == '9' - && (n == 2 || !isDIGIT(s[n-3]))) - { - Perl_warner(aTHX_ WARN_Y2K, "Possible Y2K bug: %s", - "about to append an integer to '19'"); - } + if (llen >= 2 && lpv[llen - 2] == '1' && lpv[llen - 1] == '9' + && (llen == 2 || !isDIGIT(lpv[llen - 3]))) + { + Perl_warner(aTHX_ WARN_Y2K, "Possible Y2K bug: %s", + "about to append an integer to '19'"); + } } #endif - if (TARG == right) { - if (left == right) { - /* $right = $right . $right; */ - STRLEN rlen; - char *rpv = SvPV(right, rlen); - - sv_catpvn(TARG, rpv, rlen); + if (lbyte != rbyte) { + if (lbyte) + sv_utf8_upgrade_nomg(TARG); + else { + sv_utf8_upgrade_nomg(right); + rpv = SvPV(right, rlen); } - else /* $right = $left . $right; */ - sv_catsv(TARG, rcopy); - } - else { - if (!SvOK(TARG)) /* Avoid warning when concatenating to undef. */ - sv_setpv(TARG, ""); - /* $other = $left . $right; */ - /* $left = $left . $right; */ - sv_catsv(TARG, right); } + sv_catpvn_nomg(TARG, rpv, rlen); SETTARG; RETURN; @@ -1306,3 +1306,9 @@ STATIC void S_xstat(pTHX_ int); #if defined(PERL_OBJECT) }; #endif +PERL_CALLCONV void Perl_sv_setsv_flags(pTHX_ SV* dsv, SV* ssv, I32 flags); +PERL_CALLCONV void Perl_sv_catpvn_flags(pTHX_ SV* sv, const char* ptr, STRLEN len, I32 flags); +PERL_CALLCONV void Perl_sv_catsv_flags(pTHX_ SV* dsv, SV* ssv, I32 flags); +PERL_CALLCONV STRLEN Perl_sv_utf8_upgrade_flags(pTHX_ SV *sv, I32 flags); +PERL_CALLCONV char* Perl_sv_pvn_force_flags(pTHX_ SV* sv, STRLEN* lp, I32 flags); +PERL_CALLCONV char* Perl_sv_2pv_flags(pTHX_ SV* sv, STRLEN* lp, I32 flags); @@ -2211,6 +2211,7 @@ S_regmatch(pTHX_ regnode *prog) if (!nextchr) sayNO; if (do_utf8) { + LOAD_UTF8_CHARCLASS(alnum,"a"); if (!(OP(scan) == ALNUM ? swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8) : isALNUM_LC_utf8((U8*)locinput))) @@ -203,7 +203,7 @@ S_save_scalar_at(pTHX_ SV **sptr) mg->mg_obj = osv; } SvFLAGS(osv) |= (SvFLAGS(osv) & - (SVp_IOK|SVp_NOK|SVp_POK)) >> PRIVSHIFT; + (SVp_NOK|SVp_POK)) >> PRIVSHIFT; PL_tainted = oldtainted; } SvMAGIC(sv) = SvMAGIC(osv); @@ -699,7 +699,7 @@ Perl_leave_scope(pTHX_ I32 base) SvTYPE(value) != SVt_PVGV) { SvFLAGS(value) |= (SvFLAGS(value) & - (SVp_IOK|SVp_NOK|SVp_POK)) >> PRIVSHIFT; + (SVp_NOK|SVp_POK)) >> PRIVSHIFT; SvMAGICAL_off(value); /* XXX this is a leak when we get here because the * mg_get() in save_scalar_at() croaked */ @@ -2306,7 +2306,10 @@ Perl_sv_2nv(pTHX_ register SV *sv) } else if (SvTYPE(sv) < SVt_PVNV) sv_upgrade(sv, SVt_PVNV); - if (SvIOKp(sv) && + if (SvNOKp(sv) && !(SvIOK(sv) || SvPOK(sv))) { + SvNOK_on(sv); + } + else if (SvIOKp(sv) && (!SvPOKp(sv) || !strchr(SvPVX(sv),'.') || !looks_like_number(sv))) { SvNVX(sv) = SvIsUV(sv) ? (NV)SvUVX(sv) : (NV)SvIVX(sv); @@ -2643,6 +2646,12 @@ uiv_2buf(char *buf, IV iv, UV uv, int is_uv, char **peob) char * Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) { + return sv_2pv_flags(sv, lp, SV_GMAGIC); +} + +char * +Perl_sv_2pv_flags(pTHX_ register SV *sv, STRLEN *lp, I32 flags) +{ register char *s; int olderrno; SV *tsv; @@ -2654,7 +2663,8 @@ Perl_sv_2pv(pTHX_ register SV *sv, STRLEN *lp) return ""; } if (SvGMAGICAL(sv)) { - mg_get(sv); + if (flags & SV_GMAGIC) + mg_get(sv); if (SvPOKp(sv)) { *lp = SvCUR(sv); return SvPVX(sv); @@ -2965,6 +2975,25 @@ if all the bytes have hibit clear. STRLEN Perl_sv_utf8_upgrade(pTHX_ register SV *sv) { + return sv_utf8_upgrade_flags(sv, SV_GMAGIC); +} + +/* +=for apidoc sv_utf8_upgrade_flags + +Convert the PV of an SV to its UTF8-encoded form. +Forces the SV to string form it it is not already. +Always sets the SvUTF8 flag to avoid future validity checks even +if all the bytes have hibit clear. If C<flags> has C<SV_GMAGIC> bit set, +will C<mg_get> on C<sv> if appropriate, else not. C<sv_utf8_upgrade> and +C<sv_utf8_upgrade_nomg> are implemented in terms of this function. + +=cut +*/ + +STRLEN +Perl_sv_utf8_upgrade_flags(pTHX_ register SV *sv, I32 flags) +{ U8 *s, *t, *e; int hibit = 0; @@ -2973,7 +3002,7 @@ Perl_sv_utf8_upgrade(pTHX_ register SV *sv) if (!SvPOK(sv)) { STRLEN len = 0; - (void) sv_2pv(sv,&len); + (void) sv_2pv_flags(sv,&len, flags); if (!SvPOK(sv)) return len; } @@ -3149,9 +3178,30 @@ C<sv_setsv_mg>. =cut */ +/* sv_setsv() is aliased to Perl_sv_setsv_macro; this function provided + for binary compatibility only +*/ void Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) { + sv_setsv_flags(dstr, sstr, SV_GMAGIC); +} + +/* +=for apidoc sv_setsv_flags + +Copies the contents of the source SV C<ssv> into the destination SV C<dsv>. +The source SV may be destroyed if it is mortal. Does not handle 'set' +magic. If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<ssv> if +appropriate, else not. C<sv_setsv> and C<sv_setsv_nomg> are implemented +in terms of this function. + +=cut +*/ + +void +Perl_sv_setsv_flags(pTHX_ SV *dstr, register SV *sstr, I32 flags) +{ register U32 sflags; register int dtype; register int stype; @@ -3305,7 +3355,7 @@ Perl_sv_setsv(pTHX_ SV *dstr, register SV *sstr) /* FALL THROUGH */ default: - if (SvGMAGICAL(sstr)) { + if (SvGMAGICAL(sstr) && (flags & SV_GMAGIC)) { mg_get(sstr); if (SvTYPE(sstr) != stype) { stype = SvTYPE(sstr); @@ -3833,21 +3883,43 @@ Handles 'get' magic, but not 'set' magic. See C<sv_catpvn_mg>. =cut */ +/* sv_catpvn() is aliased to Perl_sv_catpvn_macro; this function provided + for binary compatibility only +*/ void -Perl_sv_catpvn(pTHX_ register SV *sv, register const char *ptr, register STRLEN len) +Perl_sv_catpvn(pTHX_ SV *dsv, const char* sstr, STRLEN slen) { - STRLEN tlen; - char *junk; + sv_catpvn_flags(dsv, sstr, slen, SV_GMAGIC); +} - junk = SvPV_force(sv, tlen); - SvGROW(sv, tlen + len + 1); - if (ptr == junk) - ptr = SvPVX(sv); - Move(ptr,SvPVX(sv)+tlen,len,char); - SvCUR(sv) += len; - *SvEND(sv) = '\0'; - (void)SvPOK_only_UTF8(sv); /* validate pointer */ - SvTAINT(sv); +/* +=for apidoc sv_catpvn_flags + +Concatenates the string onto the end of the string which is in the SV. The +C<len> indicates number of bytes to copy. If the SV has the UTF8 +status set, then the bytes appended should be valid UTF8. +If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<dsv> if +appropriate, else not. C<sv_catpvn> and C<sv_catpvn_nomg> are implemented +in terms of this function. + +=cut +*/ + +void +Perl_sv_catpvn_flags(pTHX_ register SV *dsv, register const char *sstr, register STRLEN slen, I32 flags) +{ + STRLEN dlen; + char *dstr; + + dstr = SvPV_force_flags(dsv, dlen, flags); + SvGROW(dsv, dlen + slen + 1); + if (sstr == dstr) + sstr = SvPVX(dsv); + Move(sstr, SvPVX(dsv) + dlen, slen, char); + SvCUR(dsv) += slen; + *SvEND(dsv) = '\0'; + (void)SvPOK_only_UTF8(dsv); /* validate pointer */ + SvTAINT(dsv); } /* @@ -3874,36 +3946,52 @@ not 'set' magic. See C<sv_catsv_mg>. =cut */ +/* sv_catsv() is aliased to Perl_sv_catsv_macro; this function provided + for binary compatibility only +*/ void -Perl_sv_catsv(pTHX_ SV *dsv, register SV *ssv) +Perl_sv_catsv(pTHX_ SV *dstr, register SV *sstr) +{ + sv_catsv_flags(dstr, sstr, SV_GMAGIC); +} + +/* +=for apidoc sv_catsv_flags + +Concatenates the string from SV C<ssv> onto the end of the string in +SV C<dsv>. Modifies C<dsv> but not C<ssv>. If C<flags> has C<SV_GMAGIC> +bit set, will C<mg_get> on the SVs if appropriate, else not. C<sv_catsv> +and C<sv_catsv_nomg> are implemented in terms of this function. + +=cut */ + +void +Perl_sv_catsv_flags(pTHX_ SV *dsv, register SV *ssv, I32 flags) { char *spv; STRLEN slen; if (!ssv) return; if ((spv = SvPV(ssv, slen))) { - bool dutf8 = DO_UTF8(dsv); bool sutf8 = DO_UTF8(ssv); + bool dutf8; - if (dutf8 == sutf8) - sv_catpvn(dsv,spv,slen); - else { + if (SvGMAGICAL(dsv) && (flags & SV_GMAGIC)) + mg_get(dsv); + dutf8 = DO_UTF8(dsv); + + if (dutf8 != sutf8) { if (dutf8) { /* Not modifying source SV, so taking a temporary copy. */ - SV* csv = sv_2mortal(newSVsv(ssv)); - char *cpv; - STRLEN clen; + SV* csv = sv_2mortal(newSVpvn(spv, slen)); sv_utf8_upgrade(csv); - cpv = SvPV(csv,clen); - sv_catpvn(dsv,cpv,clen); - } - else { - sv_utf8_upgrade(dsv); - sv_catpvn(dsv,spv,slen); - SvUTF8_on(dsv); /* If dsv has no wide characters. */ + spv = SvPV(csv, slen); } + else + sv_utf8_upgrade_nomg(dsv); } + sv_catpvn_nomg(dsv, spv, slen); } } @@ -4206,7 +4294,7 @@ Perl_sv_unmagic(pTHX_ SV *sv, int type) } if (!SvMAGIC(sv)) { SvMAGICAL_off(sv); - SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_IOK|SVp_NOK|SVp_POK)) >> PRIVSHIFT; + SvFLAGS(sv) |= (SvFLAGS(sv) & (SVp_NOK|SVp_POK)) >> PRIVSHIFT; } return 0; @@ -6168,6 +6256,23 @@ Get a sensible string out of the SV somehow. char * Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) { + return sv_pvn_force_flags(sv, lp, SV_GMAGIC); +} + +/* +=for apidoc sv_pvn_force_flags + +Get a sensible string out of the SV somehow. +If C<flags> has C<SV_GMAGIC> bit set, will C<mg_get> on C<sv> if +appropriate, else not. C<sv_pvn_force> and C<sv_pvn_force_nomg> are +implemented in terms of this function. + +=cut +*/ + +char * +Perl_sv_pvn_force_flags(pTHX_ SV *sv, STRLEN *lp, I32 flags) +{ char *s; if (SvTHINKFIRST(sv) && !SvROK(sv)) @@ -6182,7 +6287,7 @@ Perl_sv_pvn_force(pTHX_ SV *sv, STRLEN *lp) PL_op_name[PL_op->op_type]); } else - s = sv_2pv(sv, lp); + s = sv_2pv_flags(sv, lp, flags); if (s != SvPVX(sv)) { /* Almost, but not quite, sv_setpvn() */ STRLEN len = *lp; @@ -7980,9 +8085,9 @@ Perl_sv_dup(pTHX_ SV *sstr) break; case SVt_RV: SvANY(dstr) = new_XRV(); - SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + SvRV(dstr) = SvRV(sstr) && SvWEAKREF(SvRV(sstr)) + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); break; case SVt_PV: SvANY(dstr) = new_XPV(); @@ -7990,8 +8095,8 @@ Perl_sv_dup(pTHX_ SV *sstr) SvLEN(dstr) = SvLEN(sstr); if (SvROK(sstr)) SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -8004,8 +8109,8 @@ Perl_sv_dup(pTHX_ SV *sstr) SvIVX(dstr) = SvIVX(sstr); if (SvROK(sstr)) SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -8019,8 +8124,8 @@ Perl_sv_dup(pTHX_ SV *sstr) SvNVX(dstr) = SvNVX(sstr); if (SvROK(sstr)) SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -8036,8 +8141,8 @@ Perl_sv_dup(pTHX_ SV *sstr) SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -8053,8 +8158,8 @@ Perl_sv_dup(pTHX_ SV *sstr) SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -8073,8 +8178,8 @@ Perl_sv_dup(pTHX_ SV *sstr) SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -8106,8 +8211,8 @@ Perl_sv_dup(pTHX_ SV *sstr) SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -8129,8 +8234,8 @@ Perl_sv_dup(pTHX_ SV *sstr) SvSTASH(dstr) = hv_dup_inc(SvSTASH(sstr)); if (SvROK(sstr)) SvRV(dstr) = SvWEAKREF(SvRV(sstr)) - ? sv_dup_inc(SvRV(sstr)) - : sv_dup(SvRV(sstr)); + ? sv_dup(SvRV(sstr)) + : sv_dup_inc(SvRV(sstr)); else if (SvPVX(sstr) && SvLEN(sstr)) SvPVX(dstr) = SAVEPVN(SvPVX(sstr), SvLEN(sstr)-1); else @@ -594,7 +594,7 @@ Unsets the UTF8 status of an SV. =for apidoc Am|void|SvPOK_only_UTF8|SV* sv Tells an SV that it is a string and disables all other OK bits, and leaves the UTF8 status as it was. - + =cut */ @@ -890,16 +890,51 @@ false, defined or undefined. Does not handle 'get' magic. #undef SvNV #define SvNV(sv) (SvNOK(sv) ? SvNVX(sv) : sv_2nv(sv)) +/* flag values for sv_*_flags functions */ +#define SV_IMMEDIATE_UNREF 1 +#define SV_GMAGIC 2 + +#define sv_setsv_macro(dsv, ssv) sv_setsv_flags(dsv, ssv, SV_GMAGIC) +#define sv_setsv_nomg(dsv, ssv) sv_setsv_flags(dsv, ssv, 0) +#define sv_catsv_macro(dsv, ssv) sv_catsv_flags(dsv, ssv, SV_GMAGIC) +#define sv_catsv_nomg(dsv, ssv) sv_catsv_flags(dsv, ssv, 0) +#define sv_catpvn_macro(dsv, sstr, slen) sv_catpvn_flags(dsv, sstr, slen, SV_GMAGIC) +#define sv_catpvn_nomg(dsv, sstr, slen) sv_catpvn_flags(dsv, sstr, slen, 0) +#define sv_2pv_macro(sv, lp) sv_2pv_flags(sv, lp, SV_GMAGIC) +#define sv_2pv_nomg(sv, lp) sv_2pv_flags(sv, lp, 0) +#define sv_pvn_force_macro(sv, lp) sv_pvn_force_flags(sv, lp, SV_GMAGIC) +#define sv_pvn_force_nomg(sv, lp) sv_pvn_force_flags(sv, lp, 0) +#define sv_utf8_upgrade_macro(sv) sv_utf8_upgrade_flags(sv, SV_GMAGIC) +#define sv_utf8_upgrade_nomg(sv) sv_utf8_upgrade_flags(sv, 0) + +/* function style also available for bincompat */ +#define sv_setsv(dsv, ssv) sv_setsv_macro(dsv, ssv) +#define sv_catsv(dsv, ssv) sv_catsv_macro(dsv, ssv) +#define sv_catpvn(dsv, sstr, slen) sv_catpvn_macro(dsv, sstr, slen) +#define sv_2pv(sv, lp) sv_2pv_macro(sv, lp) +#define sv_pvn_force(sv, lp) sv_pvn_force_macro(sv, lp) +#define sv_utf8_upgrade(sv) sv_utf8_upgrade_macro(sv) + #undef SvPV -#define SvPV(sv, lp) \ - ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ - ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_2pv(sv, &lp)) +#define SvPV(sv, lp) SvPV_flags(sv, lp, SV_GMAGIC) +#undef SvPV_nomg +#define SvPV_nomg(sv, lp) SvPV_flags(sv, lp, 0) + +#undef SvPV_flags +#define SvPV_flags(sv, lp, flags) \ + ((SvFLAGS(sv) & (SVf_POK)) == SVf_POK \ + ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_2pv_flags(sv, &lp, flags)) #undef SvPV_force -#define SvPV_force(sv, lp) \ +#define SvPV_force(sv, lp) SvPV_force_flags(sv, lp, SV_GMAGIC) +#undef SvPV_force_nomg +#define SvPV_force_nomg(sv, lp) SvPV_force_flags(sv, lp, 0) + +#undef SvPV_force_flags +#define SvPV_force_flags(sv, lp, flags) \ ((SvFLAGS(sv) & (SVf_POK|SVf_THINKFIRST)) == SVf_POK \ - ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_pvn_force(sv, &lp)) + ? ((lp = SvCUR(sv)), SvPVX(sv)) : sv_pvn_force_flags(sv, &lp, flags)) #undef SvPV_nolen #define SvPV_nolen(sv) \ @@ -1108,8 +1143,6 @@ Returns a pointer to the character buffer. #define SvGROW(sv,len) (SvLEN(sv) < (len) ? sv_grow(sv,len) : SvPVX(sv)) #define Sv_Grow sv_grow -#define SV_IMMEDIATE_UNREF 1 - #define CLONEf_COPY_STACKS 1 #define CLONEf_KEEP_PTR_TABLE 2 @@ -44,12 +44,34 @@ $ENV{PERL_DESTRUCT_LEVEL} = 2 unless exists $ENV{PERL_DESTRUCT_LEVEL}; $ENV{EMXSHELL} = 'sh'; # For OS/2 -if ($#ARGV == -1) { - @ARGV = split(/[ \n]/, - `echo base/*.t comp/*.t cmd/*.t run/*.t io/*.t; echo op/*.t pragma/*.t lib/*.t pod/*.t`); + +# Roll your own File::Find! +use TestInit; +use File::Spec; +my $curdir = File::Spec->curdir; +my $updir = File::Spec->updir; + +sub _find_tests { + my($dir) = @_; + opendir DIR, $dir || die "Trouble opening $dir: $!"; + foreach my $f (readdir DIR) { + next if $f eq $curdir or $f eq $updir; + + my $fullpath = File::Spec->catdir($dir, $f); + + _find_tests($fullpath) if -d $fullpath; + push @ARGV, $fullpath if $f =~ /\.t$/; + } +} + +unless (@ARGV) { + foreach my $dir (qw(base comp cmd run io op pragma lib pod)) { + _find_tests($dir); + } } # %infinite = ( 'comp/require.t', 1, 'op/bop.t', 1, 'lib/hostname.t', 1 ); +%infinite = (); if ($deparse) { _testprogs('deparse', @ARGV); @@ -170,8 +192,9 @@ EOT print $_; } unless (/^#/) { - if (/^1\.\.([0-9]+)/) { + if (/^1\.\.([0-9]+)( todo ([\d ]+))?/) { $max = $1; + %todo = map { $_ => 1 } split / /, $3 if $3; $totmax += $max; $files += 1; $next = 1; @@ -183,6 +206,7 @@ EOT { my($not, $num, $extra) = ($1, $2, $3); my($istodo) = $extra =~ /^\s*#\s*TODO/ if $extra; + $istodo = 1 if $todo{$num}; if( $not && !$istodo ) { $ok = 0; diff --git a/t/io/utf8.t b/t/io/utf8.t index ac5cde7a6e..fee0fe6ace 100755 --- a/t/io/utf8.t +++ b/t/io/utf8.t @@ -9,6 +9,8 @@ BEGIN { } } +no utf8; # so that the naked 8-bit chars won't gripe under use utf8 + $| = 1; my $total_tests = 25; if (ord('A') == 193) { $total_tests = 24; } # EBCDIC platforms do not warn on UTF-8 diff --git a/t/lib/1_compile.t b/t/lib/1_compile.t index 2190e35321..eb2d70bc7e 100644 --- a/t/lib/1_compile.t +++ b/t/lib/1_compile.t @@ -106,6 +106,8 @@ sub compile_module { return scalar `$^X "-Ilib" t/lib/compmod.pl $module` =~ /^ok/; } +# Add here modules that have their own test scripts and therefore +# need not be test-compiled by 1_compile.t. __DATA__ AnyDBM_File AutoLoader @@ -119,6 +121,7 @@ CGI CGI::Pretty CGI::Util Carp +Carp::Heavy Class::ISA Class::Struct CPAN @@ -138,6 +141,8 @@ Env Errno Exporter Exporter::Heavy +ExtUtils::Constant +ExtUtils::MakeMaker Fatal Fcntl File::Basename @@ -150,6 +155,7 @@ File::Path File::Spec File::Spec::Functions File::Temp +File::stat FileCache FileHandle Filter::Simple @@ -158,7 +164,8 @@ FindBin GDBM_File Getopt::Long Getopt::Std -I18N:Collate +I18N::LangTags +I18N::Collate IO::Dir IO::File IO::Handle @@ -177,6 +184,7 @@ Locale::Constants Locale::Country Locale::Currency Locale::Language +Locale::Maketext MIME::Base64 MIME::QuotedPrint Math::BigFloat @@ -186,8 +194,12 @@ Math::Trig NDBM_File NEXT Net::hostent +Net::netent +Net::protoent +Net::servent ODBM_File Opcode +PerlIO POSIX Pod::Checker Pod::Find @@ -225,9 +237,15 @@ Tie::SubstrHash Time::HiRes Time::Local Time::Piece +Time::gmtime +Time::localtime +Time::tm UNIVERSAL +User::grent +User::pwent XS::Typemap attrs +autouse base bytes charnames diff --git a/t/lib/Test/fail.t b/t/lib/Test/fail.t new file mode 100644 index 0000000000..b431502b8a --- /dev/null +++ b/t/lib/Test/fail.t @@ -0,0 +1,93 @@ +# -*-perl-*- +use strict; +use vars qw($Expect); +use Test qw($TESTOUT $ntest ok skip plan); +plan tests => 14; + +open F, ">fails"; +$TESTOUT = *F{IO}; + +my $r=0; +{ + # Shut up deprecated usage warning. + local $^W = 0; + $r |= skip(0,0); +} +$r |= ok(0); +$r |= ok(0,1); +$r |= ok(sub { 1+1 }, 3); +$r |= ok(sub { 1+1 }, sub { 2 * 0}); + +my @list = (0,0); +$r |= ok @list, 1, "\@list=".join(',',@list); +$r |= ok @list, 1, sub { "\@list=".join ',',@list }; +$r |= ok 'segmentation fault', '/bongo/'; + +for (1..2) { $r |= ok(0); } + +$r |= ok(1, undef); +$r |= ok(undef, 1); + +ok($r); # (failure==success :-) + +close F; +$TESTOUT = *STDOUT{IO}; +$ntest = 1; + +open F, "fails"; +my $O; +while (<F>) { $O .= $_; } +close F; +unlink "fails"; + +ok join(' ', map { m/(\d+)/; $1 } grep /^not ok/, split /\n+/, $O), + join(' ', 1..13); + +my @got = split /not ok \d+\n/, $O; +shift @got; + +$Expect =~ s/\n+$//; +my @expect = split /\n\n/, $Expect; + +for (my $x=0; $x < @got; $x++) { + ok $got[$x], $expect[$x]."\n"; +} + + +BEGIN { + $Expect = <<"EXPECT"; +# Failed test 1 in $0 at line 14 + +# Failed test 2 in $0 at line 16 + +# Test 3 got: '0' ($0 at line 17) +# Expected: '1' + +# Test 4 got: '2' ($0 at line 18) +# Expected: '3' + +# Test 5 got: '2' ($0 at line 19) +# Expected: '0' + +# Test 6 got: '2' ($0 at line 22) +# Expected: '1' (\@list=0,0) + +# Test 7 got: '2' ($0 at line 23) +# Expected: '1' (\@list=0,0) + +# Test 8 got: 'segmentation fault' ($0 at line 24) +# Expected: qr{bongo} + +# Failed test 9 in $0 at line 26 + +# Failed test 10 in $0 at line 26 fail #2 + +# Failed test 11 in $0 at line 28 + +# Test 12 got: <UNDEF> ($0 at line 29) +# Expected: '1' + +# Failed test 13 in $0 at line 31 +EXPECT + +} diff --git a/t/lib/Test/mix.t b/t/lib/Test/mix.t new file mode 100644 index 0000000000..d911689845 --- /dev/null +++ b/t/lib/Test/mix.t @@ -0,0 +1,17 @@ +# -*-perl-*- +use strict; +use Test; +BEGIN { plan tests => 4, todo => [2,3] } + +ok(sub { + my $r = 0; + for (my $x=0; $x < 10; $x++) { + $r += $x*($r+1); + } + $r + }, 3628799); + +ok(0); +ok(1); + +skip(1,0); diff --git a/t/lib/Test/onfail.t b/t/lib/Test/onfail.t new file mode 100644 index 0000000000..dce4373401 --- /dev/null +++ b/t/lib/Test/onfail.t @@ -0,0 +1,31 @@ +# -*-perl-*- + +use strict; +use Test qw($ntest plan ok $TESTOUT); +use vars qw($mycnt); + +BEGIN { plan test => 6, onfail => \&myfail } + +$mycnt = 0; + +my $why = "zero != one"; +# sneak in a test that Test::Harness wont see +open J, ">junk"; +$TESTOUT = *J{IO}; +ok(0, 1, $why); +$TESTOUT = *STDOUT{IO}; +close J; +unlink "junk"; +$ntest = 1; + +sub myfail { + my ($f) = @_; + ok(@$f, 1); + + my $t = $$f[0]; + ok($$t{diagnostic}, $why); + ok($$t{'package'}, 'main'); + ok($$t{repetition}, 1); + ok($$t{result}, 0); + ok($$t{expected}, 1); +} diff --git a/t/lib/Test/qr.t b/t/lib/Test/qr.t new file mode 100644 index 0000000000..ea40f87308 --- /dev/null +++ b/t/lib/Test/qr.t @@ -0,0 +1,13 @@ +#!./perl -w + +use strict; +BEGIN { + if ($] < 5.005) { + print "1..0\n"; + print "ok 1 # skipped; this test requires at least perl 5.005\n"; + exit; + } +} +use Test; plan tests => 1; + +ok 'abc', qr/b/; diff --git a/t/lib/Test/skip.t b/t/lib/Test/skip.t new file mode 100644 index 0000000000..7db35e65dc --- /dev/null +++ b/t/lib/Test/skip.t @@ -0,0 +1,40 @@ +# -*-perl-*- +use strict; +use Test qw($TESTOUT $ntest plan ok skip); plan tests => 6; + +open F, ">skips" or die "open skips: $!"; +$TESTOUT = *F{IO}; + +skip(1, 0); #should skip + +my $skipped=1; +skip('hop', sub { $skipped = 0 }); +skip(sub {'jump'}, sub { $skipped = 0 }); +skip('skipping stones is more fun', sub { $skipped = 0 }); + +close F; + +$TESTOUT = *STDOUT{IO}; +$ntest = 1; +open F, "skips" or die "open skips: $!"; + +ok $skipped, 1, 'not skipped?'; + +my @T = <F>; +chop @T; +my @expect = split /\n+/, join('',<DATA>); +ok @T, 4; +for (my $x=0; $x < @T; $x++) { + ok $T[$x], $expect[$x]; +} + +END { close F; unlink "skips" } + +__DATA__ +ok 1 # skip + +ok 2 # skip hop + +ok 3 # skip jump + +ok 4 # skip skipping stones is more fun diff --git a/t/lib/Test/success.t b/t/lib/Test/success.t new file mode 100644 index 0000000000..a580f0a567 --- /dev/null +++ b/t/lib/Test/success.t @@ -0,0 +1,11 @@ +# -*-perl-*- +use strict; +use Test; +BEGIN { plan tests => 11 } + +ok(ok(1)); +ok(ok('fixed', 'fixed')); +ok(skip(1,0)); +ok(undef, undef); +ok(ok 'the brown fox jumped over the lazy dog', '/lazy/'); +ok(ok 'the brown fox jumped over the lazy dog', 'm,fox,'); diff --git a/t/lib/Test/todo.t b/t/lib/Test/todo.t new file mode 100644 index 0000000000..ae02a04f6b --- /dev/null +++ b/t/lib/Test/todo.t @@ -0,0 +1,13 @@ +# -*-perl-*- +use strict; +use Test; +BEGIN { + my $tests = 5; + plan tests => $tests, todo => [1..$tests]; +} + +ok(0); +ok(1); +ok(0,1); +ok(0,1,"need more tuits"); +ok(1,1); diff --git a/t/lib/carp.t b/t/lib/carp.t new file mode 100644 index 0000000000..a318c19751 --- /dev/null +++ b/t/lib/carp.t @@ -0,0 +1,53 @@ +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use Carp qw(carp cluck croak confess); + +print "1..7\n"; + +print "ok 1\n"; + +$SIG{__WARN__} = sub { + print "ok $1\n" + if $_[0] =~ m!ok (\d+)$! }; + +carp "ok 2\n"; + +$SIG{__WARN__} = sub { + print "ok $1\n" + if $_[0] =~ m!(\d+) at .+\b(?i:carp\.t) line \d+$! }; + +carp 3; + +sub sub_4 { + +$SIG{__WARN__} = sub { + print "ok $1\n" + if $_[0] =~ m!^(\d+) at .+\b(?i:carp\.t) line \d+\n\tmain::sub_4\(\) called at .+\b(?i:carp\.t) line \d+$! }; + +cluck 4; + +} + +sub_4; + +$SIG{__DIE__} = sub { + print "ok $1\n" + if $_[0] =~ m!^(\d+) at .+\b(?i:carp\.t) line \d+\n\teval \Q{...}\E called at .+\b(?i:carp\.t) line \d+$! }; + +eval { croak 5 }; + +sub sub_6 { + $SIG{__DIE__} = sub { + print "ok $1\n" + if $_[0] =~ m!^(\d+) at .+\b(?i:carp\.t) line \d+\n\teval \Q{...}\E called at .+\b(?i:carp\.t) line \d+\n\tmain::sub_6\(\) called at .+\b(?i:carp\.t) line \d+$! }; + + eval { confess 6 }; +} + +sub_6; + +print "ok 7\n"; + diff --git a/t/lib/charnames.t b/t/lib/charnames.t index 07c91e6682..124dad0971 100644 --- a/t/lib/charnames.t +++ b/t/lib/charnames.t @@ -117,6 +117,8 @@ sub to_bytes { { # 20001114.001 + no utf8; # so that the naked 8-bit character won't gripe under use utf8 + if (ord("Ä") == 0xc4) { # Try to do this only on Latin-1. use charnames ':full'; my $text = "\N{LATIN CAPITAL LETTER A WITH DIAERESIS}"; diff --git a/t/lib/extutils.t b/t/lib/extutils.t new file mode 100644 index 0000000000..cc34740b42 --- /dev/null +++ b/t/lib/extutils.t @@ -0,0 +1,229 @@ +#!./perl -w + +print "1..10\n"; + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use warnings; +use strict; +use ExtUtils::MakeMaker; +use ExtUtils::Constant qw (constant_types C_constant XS_constant autoload); +use Config; + +my $runperl = $^X; + +$| = 1; + +my $dir = "ext-$$"; +my @files; + +print "# $dir being created...\n"; +mkdir $dir, 0777 or die "mkdir: $!\n"; + +use File::Spec::Functions; + +END { + use File::Path; + print "# $dir being removed...\n"; + rmtree($dir); +} + +my @names = ("THREE", {name=>"OK4", type=>"PV",}, + {name=>"OK5", type=>"PVN", + value=>['"not ok 5\\n\\0ok 5\\n"', 15]}, + {name => "FARTHING", type=>"NV"}, + {name => "NOT_ZERO", type=>"UV", value=>"~(UV)0"}); + +my @names_only = map {(ref $_) ? $_->{name} : $_} @names; + +my $package = "ExtTest"; +################ Header +my $header = catfile($dir, "test.h"); +push @files, "test.h"; +open FH, ">$header" or die "open >$header: $!\n"; +print FH <<'EOT'; +#define THREE 3 +#define OK4 "ok 4\n" +#define OK5 1 +#define FARTHING 0.25 +#define NOT_ZERO 1 +EOT +close FH or die "close $header: $!\n"; + +################ XS +my $xs = catfile($dir, "$package.xs"); +push @files, "$package.xs"; +open FH, ">$xs" or die "open >$xs: $!\n"; + +print FH <<'EOT'; +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +EOT + +print FH "#include \"test.h\"\n\n"; +print FH constant_types(); # macro defs +my $types = {}; +foreach (C_constant (undef, "IV", $types, undef, undef, @names) ) { + print FH $_, "\n"; # C constant subs +} +print FH "MODULE = $package PACKAGE = $package\n"; +print FH "PROTOTYPES: ENABLE\n"; +print FH XS_constant ($package, $types); # XS for ExtTest::constant +close FH or die "close $xs: $!\n"; + +################ PM +my $pm = catfile($dir, "$package.pm"); +push @files, "$package.pm"; +open FH, ">$pm" or die "open >$pm: $!\n"; +print FH "package $package;\n"; +print FH "use $];\n"; + +print FH <<'EOT'; + +use strict; +use warnings; +use Carp; + +require Exporter; +require DynaLoader; +use AutoLoader; +use vars qw ($VERSION @ISA @EXPORT_OK); + +$VERSION = '0.01'; +@ISA = qw(Exporter DynaLoader); +@EXPORT_OK = qw( +EOT + +print FH "\t$_\n" foreach (@names_only); +print FH ");\n"; +print FH autoload ($package, $]); +print FH "bootstrap $package \$VERSION;\n1;\n__END__\n"; +close FH or die "close $pm: $!\n"; + +################ test.pl +my $testpl = catfile($dir, "test.pl"); +push @files, "test.pl"; +open FH, ">$testpl" or die "open >$testpl: $!\n"; + +print FH "use $package qw(@names_only);\n"; +print FH <<'EOT'; + +my $three = THREE; +if ($three == 3) { + print "ok 3\n"; +} else { + print "not ok 3 # $three\n"; +} + +print OK4; + +$_ = OK5; +s/.*\0//s; +print; + +my $farthing = FARTHING; +if ($farthing == 0.25) { + print "ok 6\n"; +} else { + print "not ok 6 # $farthing\n"; +} + +my $not_zero = NOT_ZERO; +if ($not_zero > 0 && $not_zero == ~0) { + print "ok 7\n"; +} else { + print "not ok 7 # \$not_zero=$not_zero ~0=" . (~0) . "\n"; +} + + +EOT + +close FH or die "close $testpl: $!\n"; + +################ dummy Makefile.PL +# Keep the dependancy in the Makefile happy +my $makefilePL = catfile($dir, "Makefile.PL"); +push @files, "Makefile.PL"; +open FH, ">$makefilePL" or die "open >$makefilePL: $!\n"; +close FH or die "close $makefilePL: $!\n"; + +chdir $dir or die $!; push @INC, '../../lib'; +END {chdir ".." or warn $!}; + +# Grr. MakeMaker hardwired to write its message to STDOUT. +print "# "; +WriteMakefile( + 'NAME' => $package, + 'VERSION_FROM' => "$package.pm", # finds $VERSION + ($] >= 5.005 ? + (#ABSTRACT_FROM => "$package.pm", # XXX add this + AUTHOR => $0) : ()) + ); +if (-f "Makefile") { + print "ok 1\n"; +} else { + print "not ok 1\n"; +} +push @files, "Makefile.old"; # Renamed by make clean + +my $make = $Config{make}; + +$make = $ENV{MAKE} if exists $ENV{MAKE}; + +my $makeout; + +print "# make = '$make'\n"; +$makeout = `$make`; +if ($?) { + print "not ok 2 # $make failed: $?\n"; + exit($?); +} else { + print "ok 2\n"; +} + +my $maketest = "$make test"; +print "# make = '$maketest'\n"; +$makeout = `$maketest`; +if ($?) { + print "not ok 8 # $make failed: $?\n"; +} else { + # Perl babblings + $makeout =~ s/^\s*PERL_DL_NONLAZY=.+?\n//m; + + # GNU make babblings + $makeout =~ s/^\w*?make.+?(?:entering|leaving) directory.+?\n//mig; + + print $makeout; + print "ok 8\n"; +} + +my $makeclean = "$make clean"; +print "# make = '$makeclean'\n"; +$makeout = `$makeclean`; +if ($?) { + print "not ok 9 # $make failed: $?\n"; +} else { + print "ok 9\n"; +} + +foreach (@files) { + unlink $_ or warn "unlink $_: $!"; +} + +my $fail; +opendir DIR, "." or die "opendir '.': $!"; +while (defined (my $entry = readdir DIR)) { + next if $entry =~ /^\.\.?$/; + print "# Extra file '$entry'\n"; + $fail = 1; +} +closedir DIR or warn "closedir '.': $!"; +if ($fail) { + print "not ok 10\n"; +} else { + print "ok 10\n"; +} diff --git a/t/lib/filestat.t b/t/lib/filestat.t new file mode 100644 index 0000000000..ac6d95f745 --- /dev/null +++ b/t/lib/filestat.t @@ -0,0 +1,70 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { + our $hasst; + eval { my @n = stat "TEST" }; + $hasst = 1 unless $@ && $@ =~ /unimplemented/; + unless ($hasst) { print "1..0 # Skip: no stat\n"; exit 0 } + use Config; + $hasst = 0 unless $Config{'i_sysstat'} eq 'define'; + unless ($hasst) { print "1..0 # Skip: no sys/stat.h\n"; exit 0 } +} + +BEGIN { + our @stat = stat "TEST"; # This is the function stat. + unless (@stat) { print "1..0 # Skip: no file TEST\n"; exit 0 } +} + +print "1..14\n"; + +use File::stat; + +print "ok 1\n"; + +my $stat = stat "TEST"; # This is the OO stat. + +print "not " unless $stat->dev == $stat[ 0]; +print "ok 2\n"; + +print "not " unless $stat->ino == $stat[ 1]; +print "ok 3\n"; + +print "not " unless $stat->mode == $stat[ 2]; +print "ok 4\n"; + +print "not " unless $stat->nlink == $stat[ 3]; +print "ok 5\n"; + +print "not " unless $stat->uid == $stat[ 4]; +print "ok 6\n"; + +print "not " unless $stat->gid == $stat[ 5]; +print "ok 7\n"; + +print "not " unless $stat->rdev == $stat[ 6]; +print "ok 8\n"; + +print "not " unless $stat->size == $stat[ 7]; +print "ok 9\n"; + +print "not " unless $stat->atime == $stat[ 8]; +print "ok 10\n"; + +print "not " unless $stat->mtime == $stat[ 9]; +print "ok 11\n"; + +print "not " unless $stat->ctime == $stat[10]; +print "ok 12\n"; + +print "not " unless $stat->blksize == $stat[11]; +print "ok 13\n"; + +print "not " unless $stat->blocks == $stat[12]; +print "ok 14\n"; + +# Testing pretty much anything else is unportable. diff --git a/t/lib/i18n-langtags.t b/t/lib/i18n-langtags.t new file mode 100644 index 0000000000..06c178ef27 --- /dev/null +++ b/t/lib/i18n-langtags.t @@ -0,0 +1,45 @@ +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +######################### We start with some black magic to print on failure. +require 5; + +use strict; +use Test; +BEGIN { plan tests => 23 }; +BEGIN { ok 1 } +use I18N::LangTags qw(is_language_tag same_language_tag + extract_language_tags super_languages + similarity_language_tag is_dialect_of + locale2language_tag alternate_language_tags + encode_language_tag + ); + +ok !is_language_tag(''); +ok is_language_tag('fr'); +ok is_language_tag('fr-ca'); +ok is_language_tag('fr-CA'); +ok !is_language_tag('fr-CA-'); +ok !is_language_tag('fr_CA'); +ok is_language_tag('fr-ca-joual'); +ok !is_language_tag('frca'); +ok is_language_tag('nav'); +ok is_language_tag('nav-shiprock'); +ok !is_language_tag('nav-ceremonial'); # subtag too long +ok !is_language_tag('x'); +ok !is_language_tag('i'); +ok is_language_tag('i-borg'); # NB: fictitious tag +ok is_language_tag('x-borg'); +ok is_language_tag('x-borg-prot5123'); +ok same_language_tag('x-borg-prot5123', 'i-BORG-Prot5123' ); +ok !same_language_tag('en', 'en-us' ); + +ok 0 == similarity_language_tag('en-ca', 'fr-ca'); +ok 1 == similarity_language_tag('en-ca', 'en-us'); +ok 2 == similarity_language_tag('en-us-southern', 'en-us-western'); +ok 2 == similarity_language_tag('en-us-southern', 'en-us'); + +# print "So there!\n"; + diff --git a/t/lib/lc-language.t b/t/lib/lc-language.t index 6a70b79ef9..9facd3509d 100644 --- a/t/lib/lc-language.t +++ b/t/lib/lc-language.t @@ -10,6 +10,8 @@ BEGIN { use Locale::Language; +no utf8; # so that the naked 8-bit characters won't gripe under use utf8 + #----------------------------------------------------------------------- # This is an array of tests. Each test is eval'd as an expression. # If it evaluates to FALSE, then "not ok N" is printed for the test, diff --git a/t/lib/lc-maketext.t b/t/lib/lc-maketext.t new file mode 100644 index 0000000000..743d8eecbd --- /dev/null +++ b/t/lib/lc-maketext.t @@ -0,0 +1,37 @@ +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { $| = 1; print "1..3\n"; } +END {print "not ok 1\n" unless $loaded;} +use Locale::Maketext 1.01; +print "# Perl v$], Locale::Maketext v$Locale::Maketext::VERSION\n"; +$loaded = 1; +print "ok 1\n"; +{ + package Woozle; + @ISA = ('Locale::Maketext'); + sub dubbil { return $_[1] * 2 } +} +{ + package Woozle::elx; + @ISA = ('Woozle'); + %Lexicon = ( + 'd2' => 'hum [dubbil,_1]', + ); +} + +$lh = Woozle->get_handle('elx'); +if($lh) { + print "ok 2\n"; + my $x = $lh->maketext('d2', 7); + if($x eq "hum 14") { + print "ok 3\n"; + } else { + print "not ok 3\n (got \"$x\")\n"; + } +} else { + print "not ok 2\n"; +} +#Shazam! diff --git a/t/lib/net-nent.t b/t/lib/net-nent.t new file mode 100644 index 0000000000..e73122ccc4 --- /dev/null +++ b/t/lib/net-nent.t @@ -0,0 +1,36 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { + our $hasne; + eval { my @n = getnetbyname "loopback" }; + $hasne = 1 unless $@ && $@ =~ /unimplemented/; + unless ($hasne) { print "1..0 # Skip: no getnetbyname\n"; exit 0 } + use Config; + $hasne = 0 unless $Config{'i_netdb'} eq 'define'; + unless ($hasne) { print "1..0 # Skip: no netdb.h\n"; exit 0 } +} + +BEGIN { + our @netent = getnetbyname "loopback"; # This is the function getnetbyname. + unless (@netent) { print "1..0 # Skip: no loopback net\n"; exit 0 } +} + +print "1..2\n"; + +use Net::netent; + +print "ok 1\n"; + +my $netent = getnetbyname "loopback"; # This is the OO getnetbyname. + +print "not " unless $netent->name eq $netent[0]; +print "ok 2\n"; + +# Testing pretty much anything else is unportable; +# e.g. the canonical name of the "loopback" net may be "loop". + diff --git a/t/lib/net-pent.t b/t/lib/net-pent.t new file mode 100644 index 0000000000..6c5a1547b3 --- /dev/null +++ b/t/lib/net-pent.t @@ -0,0 +1,38 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { + our $haspe; + eval { my @n = getprotobyname "tcp" }; + $haspe = 1 unless $@ && $@ =~ /unimplemented/; + unless ($haspe) { print "1..0 # Skip: no getprotobyname\n"; exit 0 } + use Config; + $haspe = 0 unless $Config{'i_netdb'} eq 'define'; + unless ($haspe) { print "1..0 # Skip: no netdb.h\n"; exit 0 } +} + +BEGIN { + our @protoent = getprotobyname "tcp"; # This is the function getprotobyname. + unless (@protoent) { print "1..0 # Skip: no tcp protocol\n"; exit 0 } +} + +print "1..3\n"; + +use Net::protoent; + +print "ok 1\n"; + +my $protoent = getprotobyname "tcp"; # This is the OO getprotobyname. + +print "not " unless $protoent->name eq $protoent[0]; +print "ok 2\n"; + +print "not " unless $protoent->proto == $protoent[2]; +print "ok 3\n"; + +# Testing pretty much anything else is unportable. + diff --git a/t/lib/net-sent.t b/t/lib/net-sent.t new file mode 100644 index 0000000000..ef4a04dee8 --- /dev/null +++ b/t/lib/net-sent.t @@ -0,0 +1,38 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { + our $hasse; + eval { my @n = getservbyname "echo", "tcp" }; + $hasse = 1 unless $@ && $@ =~ /unimplemented/; + unless ($hasse) { print "1..0 # Skip: no getservbyname\n"; exit 0 } + use Config; + $hasse = 0 unless $Config{'i_netdb'} eq 'define'; + unless ($hasse) { print "1..0 # Skip: no netdb.h\n"; exit 0 } +} + +BEGIN { + our @servent = getservbyname "echo", "tcp"; # This is the function getservbyname. + unless (@servent) { print "1..0 # Skip: no echo service\n"; exit 0 } +} + +print "1..3\n"; + +use Net::servent; + +print "ok 1\n"; + +my $servent = getservbyname "echo", "tcp"; # This is the OO getservbyname. + +print "not " unless $servent->name eq $servent[0]; +print "ok 2\n"; + +print "not " unless $servent->port == $servent[2]; +print "ok 3\n"; + +# Testing pretty much anything else is unportable. + diff --git a/t/lib/perlio.t b/t/lib/perlio.t new file mode 100644 index 0000000000..d71ab8ec4f --- /dev/null +++ b/t/lib/perlio.t @@ -0,0 +1,90 @@ +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; import Config; + if ($Config{'extensions'} !~ /\bPerlIO\b/) { + print "1..0 # Skip: PerlIO was not built\n"; + exit 0; + } +} + +use PerlIO; + +print "1..19\n"; + +print "ok 1\n"; + +my $txt = "txt$$"; +my $bin = "bin$$"; +my $utf = "utf$$"; + +my $txtfh; +my $binfh; +my $utffh; + +print "not " unless open($txtfh, ">:crlf", $txt); +print "ok 2\n"; + +print "not " unless open($binfh, ">:raw", $bin); +print "ok 3\n"; + +print "not " unless open($utffh, ">:utf8", $utf); +print "ok 4\n"; + +print $txtfh "foo\n"; +print $txtfh "bar\n"; +print "not " unless close($txtfh); +print "ok 5\n"; + +print $binfh "foo\n"; +print $binfh "bar\n"; +print "not " unless close($binfh); +print "ok 6\n"; + +print $utffh "foo\x{ff}\n"; +print $utffh "bar\x{abcd}\n"; +print "not " unless close($utffh); +print "ok 7\n"; + +print "not " unless open($txtfh, "<:crlf", $txt); +print "ok 8\n"; + +print "not " unless open($binfh, "<:raw", $bin); +print "ok 9\n"; + +print "not " unless open($utffh, "<:utf8", $utf); +print "ok 10\n"; + +print "not " unless <$txtfh> eq "foo\n" && <$txtfh> eq "bar\n"; +print "ok 11\n"; + +print "not " unless <$binfh> eq "foo\n" && <$binfh> eq "bar\n"; +print "ok 12\n"; + +print "not " unless <$utffh> eq "foo\x{ff}\n" && <$utffh> eq "bar\x{abcd}\n"; +print "ok 13\n"; + +print "not " unless eof($txtfh); +print "ok 14\n"; + +print "not " unless eof($binfh); +print "ok 15\n"; + +print "not " unless eof($utffh); +print "ok 16\n"; + +print "not " unless close($txtfh); +print "ok 17\n"; + +print "not " unless close($binfh); +print "ok 18\n"; + +print "not " unless close($utffh); +print "ok 19\n"; + +END { + 1 while unlink $txt; + 1 while unlink $bin; + 1 while unlink $utf; +} + diff --git a/t/lib/sigaction.t b/t/lib/sigaction.t index 8b0a907e44..1815b19510 100644 --- a/t/lib/sigaction.t +++ b/t/lib/sigaction.t @@ -44,7 +44,7 @@ my $oldaction=POSIX::SigAction->new('::bar', new POSIX::SigSet(), 0); } if($oldaction->{HANDLER} eq 'DEFAULT' || - (! -t STDIN && $oldaction->{HANDLER} eq 'IGNORE')) + $oldaction->{HANDLER} eq 'IGNORE') { print "ok 2\n" } else { print "not ok 2 # ", $oldaction->{HANDLER}, "\n"} print $SIG{HUP} eq '::foo' ? "ok 3\n" : "not ok 3\n"; diff --git a/t/lib/test-harness.t b/t/lib/test-harness.t index 4ce6e1774a..a4c423ddd3 100644 --- a/t/lib/test-harness.t +++ b/t/lib/test-harness.t @@ -1,15 +1,16 @@ -#!./perl +#!perl BEGIN { chdir 't' if -d 't'; @INC = '../lib'; } +use strict; # For shutting up Test::Harness. package My::Dev::Null; use Tie::Handle; -@ISA = qw(Tie::StdHandle); +@My::Dev::Null::ISA = qw(Tie::StdHandle); sub WRITE { } @@ -41,6 +42,7 @@ sub eqhash { return $ok; } +use vars qw($Total_tests %samples); my $loaded; BEGIN { $| = 1; $^W = 1; } @@ -56,7 +58,7 @@ BEGIN { simple => { bonus => 0, max => 5, - ok => 5, + 'ok' => 5, files => 1, bad => 0, good => 1, @@ -67,7 +69,7 @@ BEGIN { simple_fail => { bonus => 0, max => 5, - ok => 3, + 'ok' => 3, files => 1, bad => 1, good => 0, @@ -78,7 +80,7 @@ BEGIN { descriptive => { bonus => 0, max => 5, - ok => 5, + 'ok' => 5, files => 1, bad => 0, good => 1, @@ -89,7 +91,7 @@ BEGIN { no_nums => { bonus => 0, max => 5, - ok => 4, + 'ok' => 4, files => 1, bad => 1, good => 0, @@ -100,7 +102,7 @@ BEGIN { todo => { bonus => 1, max => 5, - ok => 5, + 'ok' => 5, files => 1, bad => 0, good => 1, @@ -111,7 +113,7 @@ BEGIN { skip => { bonus => 0, max => 5, - ok => 5, + 'ok' => 5, files => 1, bad => 0, good => 1, @@ -123,7 +125,7 @@ BEGIN { combined => { bonus => 1, max => 10, - ok => 8, + 'ok' => 8, files => 1, bad => 1, good => 0, @@ -134,7 +136,7 @@ BEGIN { duplicates => { bonus => 0, max => 10, - ok => 11, + 'ok' => 11, files => 1, bad => 1, good => 0, @@ -145,7 +147,7 @@ BEGIN { header_at_end => { bonus => 0, max => 4, - ok => 4, + 'ok' => 4, files => 1, bad => 0, good => 1, @@ -156,7 +158,7 @@ BEGIN { skip_all => { bonus => 0, max => 0, - ok => 0, + 'ok' => 0, files => 1, bad => 0, good => 1, @@ -167,7 +169,7 @@ BEGIN { with_comments => { bonus => 2, max => 5, - ok => 5, + 'ok' => 5, files => 1, bad => 0, good => 1, @@ -183,12 +185,12 @@ BEGIN { tie *NULL, 'My::Dev::Null' or die $!; while (my($test, $expect) = each %samples) { - # _runtests() runs the tests but skips the formatting. + # _run_all_tests() runs the tests but skips the formatting. my($totals, $failed); eval { - select NULL; # _runtests() isn't as quiet as it should be. + select NULL; # _run_all_tests() isn't as quiet as it should be. ($totals, $failed) = - Test::Harness::_runtests("lib/sample-tests/$test"); + Test::Harness::_run_all_tests("lib/sample-tests/$test"); }; select STDOUT; diff --git a/t/lib/time-gmtime.t b/t/lib/time-gmtime.t new file mode 100644 index 0000000000..853ec3b6e3 --- /dev/null +++ b/t/lib/time-gmtime.t @@ -0,0 +1,57 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { + our $hasgm; + eval { my $n = gmtime 0 }; + $hasgm = 1 unless $@ && $@ =~ /unimplemented/; + unless ($hasgm) { print "1..0 # Skip: no gmtime\n"; exit 0 } +} + +BEGIN { + our @gmtime = gmtime 0; # This is the function gmtime. + unless (@gmtime) { print "1..0 # Skip: gmtime failed\n"; exit 0 } +} + +print "1..10\n"; + +use Time::gmtime; + +print "ok 1\n"; + +my $gmtime = gmtime 0 ; # This is the OO gmtime. + +print "not " unless $gmtime->sec == $gmtime[0]; +print "ok 2\n"; + +print "not " unless $gmtime->min == $gmtime[1]; +print "ok 3\n"; + +print "not " unless $gmtime->hour == $gmtime[2]; +print "ok 4\n"; + +print "not " unless $gmtime->mday == $gmtime[3]; +print "ok 5\n"; + +print "not " unless $gmtime->mon == $gmtime[4]; +print "ok 6\n"; + +print "not " unless $gmtime->year == $gmtime[5]; +print "ok 7\n"; + +print "not " unless $gmtime->wday == $gmtime[6]; +print "ok 8\n"; + +print "not " unless $gmtime->yday == $gmtime[7]; +print "ok 9\n"; + +print "not " unless $gmtime->isdst == $gmtime[8]; +print "ok 10\n"; + + + + diff --git a/t/lib/time-localtime.t b/t/lib/time-localtime.t new file mode 100644 index 0000000000..357615c780 --- /dev/null +++ b/t/lib/time-localtime.t @@ -0,0 +1,57 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { + our $haslocal; + eval { my $n = localtime 0 }; + $haslocal = 1 unless $@ && $@ =~ /unimplemented/; + unless ($haslocal) { print "1..0 # Skip: no localtime\n"; exit 0 } +} + +BEGIN { + our @localtime = localtime 0; # This is the function localtime. + unless (@localtime) { print "1..0 # Skip: localtime failed\n"; exit 0 } +} + +print "1..10\n"; + +use Time::localtime; + +print "ok 1\n"; + +my $localtime = localtime 0 ; # This is the OO localtime. + +print "not " unless $localtime->sec == $localtime[0]; +print "ok 2\n"; + +print "not " unless $localtime->min == $localtime[1]; +print "ok 3\n"; + +print "not " unless $localtime->hour == $localtime[2]; +print "ok 4\n"; + +print "not " unless $localtime->mday == $localtime[3]; +print "ok 5\n"; + +print "not " unless $localtime->mon == $localtime[4]; +print "ok 6\n"; + +print "not " unless $localtime->year == $localtime[5]; +print "ok 7\n"; + +print "not " unless $localtime->wday == $localtime[6]; +print "ok 8\n"; + +print "not " unless $localtime->yday == $localtime[7]; +print "ok 9\n"; + +print "not " unless $localtime->isdst == $localtime[8]; +print "ok 10\n"; + + + + diff --git a/t/lib/time-piece.t b/t/lib/time-piece.t index bf41a7ddd3..c62e36d95e 100644 --- a/t/lib/time-piece.t +++ b/t/lib/time-piece.t @@ -314,7 +314,9 @@ print "ok 84\n"; print "not " unless Time::Piece::_is_leap_year(1904); print "ok 85\n"; -my %T = $t->strptime("%T", "12:34:56"); +use Time::Piece 'strptime'; + +my %T = strptime("%T", "12:34:56"); print "not " unless keys %T == 3 && $T{H} == 12 && $T{M} == 34 && $T{S} == 56; print "ok 86\n"; diff --git a/t/lib/user-grent.t b/t/lib/user-grent.t new file mode 100644 index 0000000000..760b814d54 --- /dev/null +++ b/t/lib/user-grent.t @@ -0,0 +1,44 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { + our $hasgr; + eval { my @n = getgrgid 0 }; + $hasgr = 1 unless $@ && $@ =~ /unimplemented/; + unless ($hasgr) { print "1..0 # Skip: no getgrgid\n"; exit 0 } + use Config; + $hasgr = 0 unless $Config{'i_grp'} eq 'define'; + unless ($hasgr) { print "1..0 # Skip: no grp.h\n"; exit 0 } +} + +BEGIN { + our @grent = getgrgid 0; # This is the function getgrgid. + unless (@grent) { print "1..0 # Skip: no gid 0\n"; exit 0 } +} + +print "1..5\n"; + +use User::grent; + +print "ok 1\n"; + +my $grent = getgrgid 0; # This is the OO getgrgid. + +print "not " unless $grent->gid == 0; +print "ok 2\n"; + +print "not " unless $grent->name == $grent[0]; +print "ok 3\n"; + +print "not " unless $grent->passwd eq $grent[1]; +print "ok 4\n"; + +print "not " unless $grent->gid == $grent[2]; +print "ok 5\n"; + +# Testing pretty much anything else is unportable. + diff --git a/t/lib/user-pwent.t b/t/lib/user-pwent.t new file mode 100644 index 0000000000..e274265bd1 --- /dev/null +++ b/t/lib/user-pwent.t @@ -0,0 +1,63 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +BEGIN { + our $haspw; + eval { my @n = getpwuid 0 }; + $haspw = 1 unless $@ && $@ =~ /unimplemented/; + unless ($haspw) { print "1..0 # Skip: no getpwuid\n"; exit 0 } + use Config; + $haspw = 0 unless $Config{'i_pwd'} eq 'define'; + unless ($haspw) { print "1..0 # Skip: no pwd.h\n"; exit 0 } +} + +BEGIN { + our @pwent = getpwuid 0; # This is the function getpwuid. + unless (@pwent) { print "1..0 # Skip: no uid 0\n"; exit 0 } +} + +print "1..9\n"; + +use User::pwent; + +print "ok 1\n"; + +my $pwent = getpwuid 0; # This is the OO getpwuid. + +print "not " unless $pwent->uid == 0; +print "ok 2\n"; + +print "not " unless $pwent->name == $pwent[0]; +print "ok 3\n"; + +print "not " unless $pwent->passwd eq $pwent[1]; +print "ok 4\n"; + +print "not " unless $pwent->uid == $pwent[2]; +print "ok 5\n"; + +print "not " unless $pwent->gid == $pwent[3]; +print "ok 6\n"; + +# The quota and comment fields are unportable. + +print "not " unless $pwent->gecos eq $pwent[6]; +print "ok 7\n"; + +print "not " unless $pwent->dir eq $pwent[7]; +print "ok 8\n"; + +print "not " unless $pwent->shell eq $pwent[8]; +print "ok 9\n"; + +# The expire field is unportable. + +# Testing pretty much anything else is unportable: +# there maybe more than one username with uid 0; +# uid 0's home directory may be "/" or "/root' or something else, +# and so on. + diff --git a/t/op/gmagic.t b/t/op/gmagic.t new file mode 100644 index 0000000000..ab6d2ee3e6 --- /dev/null +++ b/t/op/gmagic.t @@ -0,0 +1,83 @@ +#!./perl -w + +BEGIN { + $| = 1; + chdir 't' if -d 't'; + @INC = '../lib'; +} + +print "1..18\n"; + +my $t = 1; +tie my $c => 'Tie::Monitor'; + +sub ok { + my($ok, $got, $exp, $rexp, $wexp) = @_; + my($rgot, $wgot) = (tied $c)->init(0); + print $ok ? "ok $t\n" : "# expected $exp, got $got\nnot ok $t\n"; + ++$t; + if ($rexp == $rgot && $wexp == $wgot) { + print "ok $t\n"; + } else { + print "# read $rgot expecting $rexp\n" if $rgot != $rexp; + print "# wrote $wgot expecting $wexp\n" if $wgot != $wexp; + print "not ok $t\n"; + } + ++$t; +} + +sub ok_undef { ok(!defined($_[0]), shift, "undef", @_) } +sub ok_numeric { ok($_[0] == $_[1], @_) } +sub ok_string { ok($_[0] eq $_[1], @_) } + +my($r, $s); +# the thing itself +ok_numeric($r = $c + 0, 0, 1, 0); +ok_string($r = "$c", '0', 1, 0); + +# concat +ok_string($c . 'x', '0x', 1, 0); +ok_string('x' . $c, 'x0', 1, 0); +$s = $c . $c; +ok_string($s, '00', 2, 0); +$r = 'x'; +$s = $c = $r . 'y'; +ok_string($s, 'xy', 1, 1); +$s = $c = $c . 'x'; +ok_string($s, '0x', 2, 1); +$s = $c = 'x' . $c; +ok_string($s, 'x0', 2, 1); +$s = $c = $c . $c; +ok_string($s, '00', 3, 1); + +# adapted from Tie::Counter by Abigail +package Tie::Monitor; + +sub TIESCALAR { + my($class, $value) = @_; + bless { + read => 0, + write => 0, + values => [ 0 ], + }; +} + +sub FETCH { + my $self = shift; + ++$self->{read}; + $self->{values}[$#{ $self->{values} }]; +} + +sub STORE { + my($self, $value) = @_; + ++$self->{write}; + push @{ $self->{values} }, $value; +} + +sub init { + my $self = shift; + my @results = ($self->{read}, $self->{write}); + $self->{read} = $self->{write} = 0; + $self->{values} = [ 0 ]; + @results; +} diff --git a/t/op/misc.t b/t/op/misc.t index e3927a3716..90df19a420 100755 --- a/t/op/misc.t +++ b/t/op/misc.t @@ -682,3 +682,15 @@ OK "abcd\x{1234}" =~ /(a)(b[c])(d+)?/i and print "ok\n"; EXPECT ok +######## +# Bug 20010422.005 +{s//${}/; //} +EXPECT +syntax error at - line 2, near "${}" +Execution of - aborted due to compilation errors. +######## +# Bug 20010528.007 +"\x{" +EXPECT +Missing right brace on \x{} at - line 2, within string +Execution of - aborted due to compilation errors. diff --git a/t/op/regexp.t b/t/op/regexp.t index 4a4d42fd98..0751559964 100755 --- a/t/op/regexp.t +++ b/t/op/regexp.t @@ -70,8 +70,8 @@ while (<TESTS>) { $expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/; $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//)); # Certain tests don't work with utf8 (the re_test should be in UTF8) - $skip = 1, $reason = 'utf8' - if ($^H &= ~0x00000008) && $pat =~ /\[:\^(alnum|print|word|ascii|xdigit):\]/; +# $skip = 1, $reason = 'utf8' +# if ($^H &= ~0x00000008) && $pat =~ /\[:\^(alnum|print|word|ascii|xdigit):\]/; $result =~ s/B//i unless $skip; for $study ('', 'study \$subject') { $c = $iters; diff --git a/t/op/splice.t b/t/op/splice.t index 06e350988d..3b4229a031 100755 --- a/t/op/splice.t +++ b/t/op/splice.t @@ -1,6 +1,6 @@ #!./perl -print "1..9\n"; +print "1..10\n"; @a = (1..10); @@ -32,3 +32,9 @@ print "ok 8\n"; print "not " unless j(splice(@a,-3,-2,2)) eq j(7) && j(@a) eq j(1,2,7,3); print "ok 9\n"; + +# Bug 20000223.001 - no test for splice(@array). Destructive test! +print "not " unless j(splice(@a)) eq j(1,2,7,3) && j(@a) eq ''; +print "ok 10\n"; + + diff --git a/t/op/taint.t b/t/op/taint.t index 46b9aab3fb..0d1e747daf 100755 --- a/t/op/taint.t +++ b/t/op/taint.t @@ -106,7 +106,7 @@ print PROG 'print "@ARGV\n"', "\n"; close PROG; my $echo = "$Invoke_Perl $ECHO"; -print "1..173\n"; +print "1..174\n"; # First, let's make sure that Perl is checking the dangerous # environment variables. Maybe they aren't set yet, so we'll @@ -811,3 +811,22 @@ else { } } +{ + # bug 20010526.004 + + use warnings; + + $SIG{__WARN__} = sub { print "not " }; + + sub fmi { + my $divnum = shift()/1; + sprintf("%1.1f\n", $divnum); + } + + fmi(21 . $TAINT); + fmi(37); + fmi(248); + + print "ok 174\n"; +} + diff --git a/t/op/ver.t b/t/op/ver.t index 0fe7fd1bbb..05bd854b24 100755 --- a/t/op/ver.t +++ b/t/op/ver.t @@ -5,7 +5,7 @@ BEGIN { @INC = '../lib'; } -print "1..33\n"; +print "1..37\n"; my $test = 1; @@ -222,3 +222,17 @@ okeq(v5.6.0 lt v5.7.0,1,"v5.6.0 lt v5.7.0 fails"); # floating point too messy # my $v = ord($^V)+ord(substr($^V,1,1))/1000+ord(substr($^V,2,1))/1000000; # okeq($v,$],"\$^V and \$] do not match"); + +# 34..37: part of 20000323.059 +print "not " unless v200 eq chr(200); +print "ok 34\n"; + +print "not " unless v200 eq +v200; +print "ok 35\n"; + +print "not " unless v200 eq eval "v200"; +print "ok 36\n"; + +print "not " unless v200 eq eval "+v200"; +print "ok 37\n"; + diff --git a/t/pragma/autouse.t b/t/pragma/autouse.t new file mode 100644 index 0000000000..0120ed0899 --- /dev/null +++ b/t/pragma/autouse.t @@ -0,0 +1,53 @@ +#!./perl + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +use Test; +BEGIN { plan tests => 9; } + +BEGIN { + require autouse; + eval { + "autouse"->import('List::Util' => 'List::Util::first'); + }; + ok( $@, qr/^autouse into different package attempted/ ); + + "autouse"->import('List::Util' => qw(max first(&@))); +} + +my @a = (1,2,3,4,5.5); +ok( max(@a), 5.5); + + +# first() has a prototype of &@. Make sure that's preserved. +ok( (first { $_ > 3 } @a), 4); + + +# Example from the docs. +use autouse 'Carp' => qw(carp croak); + +{ + my @warning; + local $SIG{__WARN__} = sub { push @warning, @_ }; + carp "this carp was predeclared and autoused\n"; + ok( scalar @warning, 1 ); + ok( $warning[0], "this carp was predeclared and autoused\n" ); + + eval { croak "It is but a scratch!" }; + ok( $@, qr/^It is but a scratch!/); +} + + +# Test that autouse's lazy module loading works. We assume that nothing +# involved in this test uses Test::Soundex, which is pretty safe. +use File::Spec; +use autouse 'Text::Soundex' => qw(soundex); + +my $mod_file = File::Spec->catfile(qw(Text Soundex.pm)); +ok( !exists $INC{$mod_file} ); +ok( soundex('Basset'), 'B230' ); +ok( exists $INC{$mod_file} ); + diff --git a/t/pragma/locale.t b/t/pragma/locale.t index 068fedeac8..000203b3c4 100755 --- a/t/pragma/locale.t +++ b/t/pragma/locale.t @@ -515,16 +515,15 @@ foreach $Locale (@Locale) { # Test \w. if (utf8locale($Locale)) { - # Until the polymorphic regexen arrive. + # utf8 and locales do not mix. debug "# skipping UTF-8 locale '$Locale'\n"; } else { my $word = join('', @Neoalpha); $word =~ /^(\w+)$/; - + tryneoalpha($Locale, 99, $1 eq $word); } - # Cross-check the whole 8-bit character set. for (map { chr } 0..255) { @@ -697,29 +696,32 @@ foreach $Locale (@Locale) { # Does lc of an UPPER (if different from the UPPER) match # case-insensitively the UPPER, and does the UPPER match # case-insensitively the lc of the UPPER. And vice versa. - if (utf8locale($Locale)) { - # Until the polymorphic regexen arrive. - debug "# skipping UTF-8 locale '$Locale'\n"; - } else { - use locale; - - my @f = (); - foreach my $x (keys %UPPER) { - my $y = lc $x; - next unless uc $y eq $x; - push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; - } - foreach my $x (keys %lower) { - my $y = uc $x; - next unless lc $y eq $x; - push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; - } - tryneoalpha($Locale, 116, @f == 0); - if (@f) { - print "# failed 116 locale '$Locale' characters @f\n" + { + if (utf8locale($Locale)) { + # utf8 and locales do not mix. + debug "# skipping UTF-8 locale '$Locale'\n"; + } else { + use locale; + use locale; + no utf8; # so that the native 8-bit characters work + + my @f = (); + foreach my $x (keys %UPPER) { + my $y = lc $x; + next unless uc $y eq $x; + push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; + } + foreach my $x (keys %lower) { + my $y = uc $x; + next unless lc $y eq $x; + push @f, $x unless $x =~ /$y/i && $y =~ /$x/i; + } + tryneoalpha($Locale, 116, @f == 0); + if (@f) { + print "# failed 116 locale '$Locale' characters @f\n" + } } } - } # Recount the errors. diff --git a/t/pragma/warn/pp_hot b/t/pragma/warn/pp_hot index 3ee853f6e2..c5a3790587 100644 --- a/t/pragma/warn/pp_hot +++ b/t/pragma/warn/pp_hot @@ -211,6 +211,21 @@ $b = sub EXPECT ######## # pp_hot.c [pp_concat] +use warnings 'uninitialized'; +my($x, $y); +sub a { shift } +a($x . "x"); # should warn once +a($x . $y); # should warn twice +$x .= $y; # should warn once +$y .= $y; # should warn once +EXPECT +Use of uninitialized value in concatenation (.) or string at - line 5. +Use of uninitialized value in concatenation (.) or string at - line 6. +Use of uninitialized value in concatenation (.) or string at - line 6. +Use of uninitialized value in concatenation (.) or string at - line 7. +Use of uninitialized value in concatenation (.) or string at - line 8. +######## +# pp_hot.c [pp_concat] use warnings 'y2k'; use Config; BEGIN { @@ -1043,6 +1043,7 @@ S_sublex_push(pTHX) SAVEI32(PL_lex_inwhat); SAVECOPLINE(PL_curcop); SAVEPPTR(PL_bufptr); + SAVEPPTR(PL_bufend); SAVEPPTR(PL_oldbufptr); SAVEPPTR(PL_oldoldbufptr); SAVEPPTR(PL_last_lop); @@ -1438,14 +1439,14 @@ S_scan_const(pTHX_ char *start) ++s; if (*s == '{') { char* e = strchr(s, '}'); + STRLEN len = 1; /* allow underscores */ + if (!e) { yyerror("Missing right brace on \\x{}"); - e = s; - } - else { - STRLEN len = 1; /* allow underscores */ - uv = (UV)scan_hex(s + 1, e - s - 1, &len); + ++s; + continue; } + uv = (UV)scan_hex(s + 1, e - s - 1, &len); s = e + 1; } else { @@ -1634,7 +1635,7 @@ S_scan_const(pTHX_ char *start) *d = '\0'; SvCUR_set(sv, d - SvPVX(sv)); if (SvCUR(sv) >= SvLEN(sv)) - Perl_croak(aTHX_ "panic:constant overflowed allocated space"); + Perl_croak(aTHX_ "panic: constant overflowed allocated space"); SvPOK_on(sv); if (has_utf8) { @@ -3227,8 +3228,16 @@ Perl_yylex(pTHX) else PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR; s = skipspace(s); - if (*s == '}') + if (*s == '}') { + if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) { + PL_expect = XTERM; + /* This hack is to get the ${} in the message. */ + PL_bufptr = s+1; + yyerror("syntax error"); + break; + } OPERATOR(HASHBRACK); + } /* This hack serves to disambiguate a pair of curlies * as being a block or an anon hash. Normally, expectation * determines that, but in cases where we're not in a @@ -3773,7 +3782,7 @@ Perl_yylex(pTHX) TERM(THING); } /* avoid v123abc() or $h{v1}, allow C<print v10;> */ - else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF)) { + else if (!isALPHA(*start) && (PL_expect == XTERM || PL_expect == XREF || PL_expect == XSTATE)) { char c = *start; GV *gv; *start = '\0'; @@ -981,7 +981,7 @@ * preprocessor can make decisions based on it. */ #define INTSIZE 1 /**/ -#define LONGSIZE 1 /**/ +#define LONGSIZE 4 /**/ #define SHORTSIZE 1 /**/ /* MULTIARCH: @@ -1038,7 +1038,13 @@ * by Configure. You shouldn't rely on it too much; the specific * feature tests from Configure are generally more reliable. */ +/* OSVERS: + * This symbol contains the version of the operating system, as determined + * by Configure. You shouldn't rely on it too much; the specific + * feature tests from Configure are generally more reliable. + */ #define OSNAME "unknown" /**/ +#define OSVERS "" /**/ /* MEM_ALIGNBYTES: * This symbol contains the number of bytes required to align a @@ -1146,7 +1152,7 @@ # define BYTEORDER 0x4321 # endif #else -#define BYTEORDER 0x12 /* large digits for MSB */ +#define BYTEORDER 0x1234 /* large digits for MSB */ #endif /* NeXT */ /* CAT2: @@ -2324,7 +2330,7 @@ * This symbol contains the size of a double, so that the C preprocessor * can make decisions based on it. */ -#define DOUBLESIZE 1 /**/ +#define DOUBLESIZE 8 /**/ /* EBCDIC: * This symbol, if defined, indicates that this system uses @@ -2927,7 +2933,7 @@ * the compiler supports (void *); otherwise it will be * sizeof(char *). */ -#define PTRSIZE 1 /**/ +#define PTRSIZE 4 /**/ /* Drand01: * This macro is to be used to generate uniformly distributed diff --git a/uconfig.sh b/uconfig.sh index ed31a3daf3..d5254f1f38 100755 --- a/uconfig.sh +++ b/uconfig.sh @@ -9,7 +9,7 @@ archlibexp='/usr/local/lib/perl5/5.7/unknown' archname='unknown' bin='/usr/local/bin' bincompat5005='define' -byteorder='12' +byteorder='1234' castflags='0' charsize='1' clocktype='clock_t' @@ -342,7 +342,7 @@ db_hashtype='u_int32_t' db_prefixtype='size_t' defvoidused=1 direntrytype='struct dirent' -doublesize=1 +doublesize='8' drand01="((rand() & 0x7FFF) / (double) ((unsigned long)1 << 15))" eagain='EAGAIN' ebcdic='undef' @@ -458,7 +458,7 @@ ivtype='long' lib_ext='.a' longdblsize=1 longlongsize=1 -longsize=1 +longsize='4' lseeksize=1 lseektype=int malloctype='int*' @@ -490,7 +490,7 @@ pm_apiversion='5.005' privlib='/usr/local/lib/perl5/5.7' privlibexp='/usr/local/lib/perl5/5.7' prototype='undef' -ptrsize=1 +ptrsize='4' quadkind='4' quadtype='int64_t' randbits='48' @@ -1240,10 +1240,15 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none) SV* tokenbufsv = sv_2mortal(NEWSV(0,0)); dSP; HV *stash = gv_stashpvn(pkg, strlen(pkg), FALSE); + SV* errsv_save; if (!gv_fetchmeth(stash, "SWASHNEW", 8, -1)) { /* demand load utf8 */ ENTER; + errsv_save = newSVsv(ERRSV); Perl_load_module(aTHX_ PERL_LOADMOD_NOIMPORT, newSVpv(pkg,0), Nullsv); + if (!SvTRUE(ERRSV)) + sv_setsv(ERRSV, errsv_save); + SvREFCNT_dec(errsv_save); LEAVE; } SPAGAIN; @@ -1263,10 +1268,14 @@ Perl_swash_init(pTHX_ char* pkg, char* name, SV *listsv, I32 minbits, I32 none) if (PL_curcop == &PL_compiling) /* XXX ought to be handled by lex_start */ sv_setpv(tokenbufsv, PL_tokenbuf); + errsv_save = newSVsv(ERRSV); if (call_method("SWASHNEW", G_SCALAR)) retval = newSVsv(*PL_stack_sp--); else retval = &PL_sv_undef; + if (!SvTRUE(ERRSV)) + sv_setsv(ERRSV, errsv_save); + SvREFCNT_dec(errsv_save); LEAVE; POPSTACK; if (PL_curcop == &PL_compiling) { @@ -1350,6 +1359,7 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8) Unicode tables, not a native character number. */ UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXLEN, NULL, 0); + SV *errsv_save; ENTER; SAVETMPS; save_re_context(); @@ -1362,10 +1372,14 @@ Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8) (code_point & ~(needents - 1)) : 0))); PUSHs(sv_2mortal(newSViv(needents))); PUTBACK; + errsv_save = newSVsv(ERRSV); if (call_method("SWASHGET", G_SCALAR)) retval = newSVsv(*PL_stack_sp--); else retval = &PL_sv_undef; + if (!SvTRUE(ERRSV)) + sv_setsv(ERRSV, errsv_save); + SvREFCNT_dec(errsv_save); POPSTACK; FREETMPS; LEAVE; diff --git a/utils/dprofpp.PL b/utils/dprofpp.PL index b1379bf7e3..8f6afe46df 100644 --- a/utils/dprofpp.PL +++ b/utils/dprofpp.PL @@ -693,7 +693,7 @@ sub parsestack { chop; if (/^&/) { ($dir, $id, $pack, $name) = split; - if ($opt_R and ($name =~ /::(__ANON_|END)$/)) { + if ($opt_R and ($name =~ /(?:::)?(__ANON__|END)$/)) { $name .= "($id)"; } $cv_hash{$id} = "$pack\::$name"; @@ -830,7 +830,7 @@ sub exitstamp { die "Garbled profile, missing an enter time stamp"; } if( $x->[0] ne $name and $opt_G and ($name =~ /$opt_G/)){ - if ($x->[0] =~ /::AUTOLOAD$/) { + if ($x->[0] =~ /(?:::)?AUTOLOAD$/) { if ($opt_A) { $name = $x->[0]; } diff --git a/utils/h2xs.PL b/utils/h2xs.PL index 4333c0fd88..ef31a2e8a9 100644 --- a/utils/h2xs.PL +++ b/utils/h2xs.PL @@ -116,6 +116,18 @@ two methods are constructed for the structure type itself, C<_to_ptr> which returns a Ptr type pointing to the same structure, and a C<new> method to construct and return a new structure, initialised to zeroes. +=item B<-b> I<version> + +Generates a .pm file which is backwards compatible with the specified +perl version. + +For versions < 5.6.0, the changes are. + - no use of 'our' (uses 'use vars' instead) + - no 'use warnings' + +Specifying a compatibility version higher than the version of perl you +are using to run h2xs will have no effect. + =item B<-c> Omit C<constant()> from the .xs file and corresponding specialised @@ -178,6 +190,13 @@ with the constant() subroutine. These macros are assumed to have a return type of B<char *>, e.g., S<-s sec_rgy_wildcard_name,sec_rgy_wildcard_sid>. +=item B<-t> I<type> + +Specify the internal type that the constant() mechanism uses for macros. +The default is IV (signed integer). Currently all macros found during the +header scanning process will be assumed to have this type. Future versions +of C<h2xs> may gain the ability to make educated guesses. + =item B<-v> I<version> Specify a version number for this extension. This version number is added @@ -198,18 +217,6 @@ hand-editing. Such may be objects which cannot be converted from/to a pointer (like C<long long>), pointers to functions, or arrays. See also the section on L<LIMITATIONS of B<-x>>. -=item B<-b> I<version> - -Generates a .pm file which is backwards compatible with the specified -perl version. - -For versions < 5.6.0, the changes are. - - no use of 'our' (uses 'use vars' instead) - - no 'use warnings' - -Specifying a compatibility version higher than the version of perl you -are using to run h2xs will have no effect. - =back =head1 EXAMPLES @@ -417,6 +424,10 @@ my $compat_version = $]; use Getopt::Std; use Config; +use Text::Wrap; +$Text::Wrap::huge = 'overflow'; +$Text::Wrap::columns = 80; +use ExtUtils::Constant qw (constant_types C_constant XS_constant autoload); sub usage { warn "@_\n" if @_; @@ -444,6 +455,7 @@ version: $H2XS_VERSION -v Specify a version number for this extension. -x Autogenerate XSUBs using C::Scan. -b Specify a perl version to be backwards compatibile with + -t Default type for autoloaded constants extra_libraries are any libraries that might be needed for loading the extension, e.g. -lm would try to link in the math library. @@ -451,10 +463,10 @@ EOFUSAGE } -getopts("ACF:M:OPXacdfhkmn:o:p:s:v:xb:") || usage; -use vars qw($opt_A $opt_C $opt_F $opt_M $opt_O $opt_P $opt_X $opt_a $opt_c $opt_d - $opt_f $opt_h $opt_k $opt_m $opt_n $opt_o $opt_p $opt_s $opt_v $opt_x - $opt_b); +getopts("ACF:M:OPXacdfhkmn:o:p:s:v:xb:t:") || usage; +use vars qw($opt_A $opt_C $opt_F $opt_M $opt_O $opt_P $opt_X $opt_a $opt_c + $opt_d $opt_f $opt_h $opt_k $opt_m $opt_n $opt_o $opt_p $opt_s + $opt_v $opt_x $opt_b $opt_t); usage if $opt_h; @@ -896,41 +908,7 @@ if (@vdecls) { } -$tmp = ( $compat_version < 5.006 ? "" : "our \$AUTOLOAD;" ); -print PM <<"END" unless $opt_c or $opt_X; -sub AUTOLOAD { - # This AUTOLOAD is used to 'autoload' constants from the constant() - # XS function. If a constant is not found then control is passed - # to the AUTOLOAD in AutoLoader. - - my \$constname; - $tmp - (\$constname = \$AUTOLOAD) =~ s/.*:://; - croak "&${module}::constant not defined" if \$constname eq 'constant'; - my \$val = constant(\$constname, \@_ ? \$_[0] : 0); - if (\$! != 0) { - if (\$! =~ /Invalid/ || \$!{EINVAL}) { - \$AutoLoader::AUTOLOAD = \$AUTOLOAD; - goto &AutoLoader::AUTOLOAD; - } - else { - croak "Your vendor has not defined $module macro \$constname"; - } - } - { - no strict 'refs'; - # Fixed between 5.005_53 and 5.005_61 - if (\$] >= 5.00561) { - *\$AUTOLOAD = sub () { \$val }; - } - else { - *\$AUTOLOAD = sub { \$val }; - } - } - goto &\$AUTOLOAD; -} - -END +print PM autoload ($module, $compat_version) unless $opt_c or $opt_X; if( ! $opt_X ){ # print bootstrap, unless XS is disabled print PM <<"END"; @@ -1152,186 +1130,15 @@ sub td_is_struct { return ($struct_typedefs{$otype} = $out); } -# Some macros will bomb if you try to return them from a double-returning func. -# Say, ((char *)0), or strlen (if somebody #define STRLEN strlen). -# Fortunately, we can detect both these cases... -sub protect_convert_to_double { - my $in = shift; - my $val; - return '' unless defined ($val = $seen_define{$in}); - return '(IV)' if $known_fnames{$val}; - # OUT_t of ((OUT_t)-1): - return '' unless $val =~ /^\s*(\(\s*)?\(\s*([^()]*?)\s*\)/; - td_is_pointer($2) ? '(IV)' : ''; -} - -# For each of the generated functions, length($pref) leading -# letters are already checked. Moreover, it is recommended that -# the generated functions uses switch on letter at offset at least -# $off + length($pref). -# -# The given list has length($pref) chars removed at front, it is -# guarantied that $off leading chars in the rest are the same for all -# elts of the list. -# -# Returns: how at which offset it was decided to make a switch, or -1 if none. - -sub write_const; - -sub write_const { - my ($fh, $pref, $off, $list) = (shift,shift,shift,shift); - my %leading; - my $offarg = length $pref; - - if (@$list == 0) { # Can happen on the initial iteration only - print $fh <<"END"; -static NV -constant(char *name, int len, int arg) -{ - errno = EINVAL; - return 0; -} -END - return -1; - } - - if (@$list == 1) { # Can happen on the initial iteration only - my $protect = protect_convert_to_double("$pref$list->[0]"); - - print $fh <<"END"; -static NV -constant(char *name, int len, int arg) -{ - errno = 0; - if (strEQ(name + $offarg, "$list->[0]")) { /* \"$pref\" removed */ -#ifdef $pref$list->[0] - return $protect$pref$list->[0]; -#else - errno = ENOENT; - return 0; -#endif - } - errno = EINVAL; - return 0; -} -END - return -1; - } - - for my $n (@$list) { - my $c = substr $n, $off, 1; - $leading{$c} = [] unless exists $leading{$c}; - push @{$leading{$c}}, $off < length $n ? substr $n, $off + 1 : $n - } - - if (keys(%leading) == 1) { - return 1 + write_const $fh, $pref, $off + 1, $list; - } - - my $leader = substr $list->[0], 0, $off; - foreach my $letter (keys %leading) { - write_const $fh, "$pref$leader$letter", 0, $leading{$letter} - if @{$leading{$letter}} > 1; - } - - my $npref = "_$pref"; - $npref = '' if $pref eq ''; - - print $fh <<"END"; -static NV -constant$npref(char *name, int len, int arg) -{ -END - - print $fh <<"END" if $npref eq ''; - errno = 0; -END - - if ($off) { - my $null = 0; - - foreach my $letter (keys %leading) { - if ($letter eq '') { - $null = 1; - last; - } - } - - my $cmp = $null ? '>' : '>='; - - print $fh <<"END" - if ($offarg + $off $cmp len ) { - errno = EINVAL; - return 0; - } -END - } - - print $fh <<"END"; - switch (name[$offarg + $off]) { -END - - foreach my $letter (sort keys %leading) { - my $let = $letter; - $let = '\0' if $letter eq ''; - - print $fh <<EOP; - case '$let': -EOP - if (@{$leading{$letter}} > 1) { - # It makes sense to call a function - if ($off) { - print $fh <<EOP; - if (!strnEQ(name + $offarg,"$leader", $off)) - break; -EOP - } - print $fh <<EOP; - return constant_$pref$leader$letter(name, len, arg); -EOP - } - else { - # Do it ourselves - my $protect - = protect_convert_to_double("$pref$leader$letter$leading{$letter}[0]"); - - print $fh <<EOP; - if (strEQ(name + $offarg, "$leader$letter$leading{$letter}[0]")) { /* \"$pref\" removed */ -#ifdef $pref$leader$letter$leading{$letter}[0] - return $protect$pref$leader$letter$leading{$letter}[0]; -#else - goto not_there; -#endif - } -EOP - } - } - print $fh <<"END"; - } - errno = EINVAL; - return 0; - -not_there: - errno = ENOENT; - return 0; -} - -END - -} +my $types = {}; +# Important. Passing an undef scalar doesn't cause the +# autovivified hashref to appear back out in this scope. if( ! $opt_c ) { - print XS <<"END"; -static int -not_here(char *s) -{ - croak("${module}::%s not implemented on this architecture", s); - return -1; -} - -END - - write_const(\*XS, '', 0, \@const_names); + print XS constant_types(), "\n"; + foreach (C_constant (undef, $opt_t, $types, undef, undef, @const_names)) { + print XS $_, "\n"; + } } print_tievar_subs(\*XS, $_, $vdecl_hash{$_}) for @vdecls; @@ -1365,22 +1172,8 @@ END # If a constant() function was written then output a corresponding # XS declaration: -print XS <<"END" unless $opt_c; - -NV -constant(sv,arg) - PREINIT: - STRLEN len; - INPUT: - SV * sv - char * s = SvPV(sv, len); - int arg - CODE: - RETVAL = constant(s,len,arg); - OUTPUT: - RETVAL - -END +# XXX IVs +print XS XS_constant ($module, $types) unless $opt_c; my %seen_decl; my %typemap; @@ -1872,10 +1665,14 @@ ok(1); # If we made it this far, we're ok. _END_ if (@const_names) { my $const_names = join " ", @const_names; - print EX <<_END_; + print EX <<'_END_'; -my \$fail; -foreach my \$constname qw($const_names) { +my $fail; +foreach my $constname (qw( +_END_ + print EX wrap ("\t", "\t", $const_names); + print EX (")) {\n"); + print EX <<_END_; next if (eval "my \\\$a = \$constname; 1"); if (\$\@ =~ /^Your vendor has not defined $module macro \$constname/) { print "# pass: \$\@"; diff --git a/vms/ext/Stdio/Stdio.xs b/vms/ext/Stdio/Stdio.xs index d82b17dbfa..9b61c590c0 100644 --- a/vms/ext/Stdio/Stdio.xs +++ b/vms/ext/Stdio/Stdio.xs @@ -81,7 +81,7 @@ IV *pval; static SV * -newFH(FILE *fp, char type) { +newFH(PerlIO *fp, char type) { SV *rv; GV **stashp, *gv = (GV *)NEWSV(0,0); HV *stash; @@ -129,15 +129,15 @@ binmode(fh) PROTOTYPE: $ CODE: IO *io = sv_2io(fh); - FILE *fp = io ? IoOFP(io) : NULL; + PerlIO *fp = io ? IoOFP(io) : NULL; char iotype = io ? IoTYPE(io) : '\0'; char filespec[NAM$C_MAXRSS], *acmode, *s, *colon, *dirend = Nullch; int ret = 0, saverrno = errno, savevmserrno = vaxc$errno; - fpos_t pos; + SV pos; if (fp == NULL || strchr(">was+-|",iotype) == Nullch) { set_errno(EBADF); set_vaxc_errno(SS$_IVCHAN); XSRETURN_UNDEF; } - if (!fgetname(fp,filespec)) XSRETURN_UNDEF; + if (!PerlIO_getname(fp,filespec)) XSRETURN_UNDEF; for (s = filespec; *s; s++) { if (*s == ':') colon = s; else if (*s == ']' || *s == '>') dirend = s; @@ -149,7 +149,7 @@ binmode(fh) /* If we've got a non-file-structured device, clip off the trailing * junk, and don't lose sleep if we can't get a stream position. */ if (dirend == Nullch) *(colon+1) = '\0'; - if (iotype != '-' && (ret = fgetpos(fp, &pos)) == -1 && dirend) + if (iotype != '-' && (ret = PerlIO_getpos(fp, &pos)) == -1 && dirend) XSRETURN_UNDEF; switch (iotype) { case '<': case 'r': acmode = "rb"; break; @@ -158,7 +158,7 @@ binmode(fh) fsetpos below will take care of restoring file position */ case 'a': acmode = "ab"; break; case '+': case 's': acmode = "rb+"; break; - case '-': acmode = fileno(fp) ? "ab" : "rb"; break; + case '-': acmode = PerlIO_fileno(fp) ? "ab" : "rb"; break; /* iotype'll be null for the SYS$INPUT:/SYS$OUTPUT:/SYS$ERROR: files */ /* since we didn't really open them and can't really */ /* reopen them */ @@ -168,35 +168,41 @@ binmode(fh) iotype, filespec); acmode = "rb+"; } - if (freopen(filespec,acmode,fp) == NULL) XSRETURN_UNDEF; - if (iotype != '-' && ret != -1 && fsetpos(fp,&pos) == -1) XSRETURN_UNDEF; + /* appearances to the contrary, this is an freopen substitute */ + SV *name = sv_2mortal(newSVpvn(filespec,strlen(filespec))); + if (PerlIO_openn(Nullch,acmode,-1,0,0,fp,1,&name) == Nullfp) XSRETURN_UNDEF; + if (iotype != '-' && ret != -1 && PerlIO_setpos(fp,&pos) == -1) XSRETURN_UNDEF; if (ret == -1) { set_errno(saverrno); set_vaxc_errno(savevmserrno); } XSRETURN_YES; void flush(fp) - FILE * fp + PerlIO * fp PROTOTYPE: $ CODE: - if (fflush(fp)) { ST(0) = &PL_sv_undef; } - else { clearerr(fp); ST(0) = &PL_sv_yes; } + FILE *stdio = PerlIO_exportFILE(fp,0); + if (fflush(stdio)) { ST(0) = &PL_sv_undef; } + else { clearerr(stdio); ST(0) = &PL_sv_yes; } + PerlIO_releaseFILE(fp,stdio); char * getname(fp) - FILE * fp + PerlIO * fp PROTOTYPE: $ CODE: char fname[NAM$C_MAXRSS+1]; ST(0) = sv_newmortal(); - if (fgetname(fp,fname) != NULL) sv_setpv(ST(0),fname); + if (PerlIO_getname(fp,fname) != NULL) sv_setpv(ST(0),fname); void rewind(fp) - FILE * fp + PerlIO * fp PROTOTYPE: $ CODE: - ST(0) = rewind(fp) ? &PL_sv_undef : &PL_sv_yes; + FILE *stdio = PerlIO_exportFILE(fp,0); + ST(0) = rewind(stdio) ? &PL_sv_undef : &PL_sv_yes; + PerlIO_releaseFILE(fp,stdio); void remove(name) @@ -261,11 +267,13 @@ setdef(...) void sync(fp) - FILE * fp + PerlIO * fp PROTOTYPE: $ CODE: - if (fsync(fileno(fp))) { ST(0) = &PL_sv_undef; } - else { clearerr(fp); ST(0) = &PL_sv_yes; } + FILE *stdio = PerlIO_exportFILE(fp,0); + if (fsync(fileno(stdio))) { ST(0) = &PL_sv_undef; } + else { clearerr(stdio); ST(0) = &PL_sv_yes; } + PerlIO_releaseFILE(fp,stdio); char * tmpnam() @@ -283,6 +291,7 @@ vmsopen(spec,...) char *args[8],mode[3] = {'r','\0','\0'}, type = '<'; register int i, myargc; FILE *fp; + PerlIO *pio_fp; STRLEN n_a; if (!spec || !*spec) { @@ -333,8 +342,9 @@ vmsopen(spec,...) fp = fopen(spec,mode,args[0],args[1],args[2],args[3],args[4],args[5],args[6],args[7]); break; } - if (fp != Nullfp) { - SV *fh = newFH(fp,(mode[1] ? '+' : (mode[0] == 'r' ? '<' : (mode[0] == 'a' ? 'a' : '>')))); + if (fp != Null(FILE*)) { + pio_fp = PerlIO_importFILE(fp,0); + SV *fh = newFH(pio_fp,(mode[1] ? '+' : (mode[0] == 'r' ? '<' : (mode[0] == 'a' ? 'a' : '>')))); ST(0) = (fh ? sv_2mortal(fh) : &PL_sv_undef); } else { ST(0) = &PL_sv_undef; } @@ -349,6 +359,7 @@ vmssysopen(spec,mode,perm,...) char *args[8]; int i, myargc, fd; FILE *fp; + PerlIO *pio_fp; SV *fh; STRLEN n_a; if (!spec || !*spec) { @@ -391,18 +402,21 @@ vmssysopen(spec,mode,perm,...) } i = mode & 3; if (fd >= 0 && - ((fp = fdopen(fd, &("r\000w\000r+"[2*i]))) != Nullfp)) { - SV *fh = newFH(fp,"<>++"[i]); + ((fp = fdopen(fd, &("r\000w\000r+"[2*i]))) != Null(FILE*))) { + pio_fp = PerlIO_importFILE(fp,0); + SV *fh = newFH(pio_fp,"<>++"[i]); ST(0) = (fh ? sv_2mortal(fh) : &PL_sv_undef); } else { ST(0) = &PL_sv_undef; } void waitfh(fp) - FILE * fp + PerlIO * fp PROTOTYPE: $ CODE: - ST(0) = fwait(fp) ? &PL_sv_undef : &PL_sv_yes; + FILE *stdio = PerlIO_exportFILE(fp,0); + ST(0) = fwait(stdio) ? &PL_sv_undef : &PL_sv_yes; + PerlIO_releaseFILE(fp,stdio); void writeof(mysv) @@ -413,11 +427,11 @@ writeof(mysv) unsigned long int chan, iosb[2], retsts, retsts2; struct dsc$descriptor devdsc = {0, DSC$K_DTYPE_T, DSC$K_CLASS_S, devnam}; IO *io = sv_2io(mysv); - FILE *fp = io ? IoOFP(io) : NULL; + PerlIO *fp = io ? IoOFP(io) : NULL; if (fp == NULL || strchr(">was+-|",IoTYPE(io)) == Nullch) { set_errno(EBADF); set_vaxc_errno(SS$_IVCHAN); XSRETURN_UNDEF; } - if (fgetname(fp,devnam) == Nullch) { ST(0) = &PL_sv_undef; XSRETURN(1); } + if (PerlIO_getname(fp,devnam) == Nullch) { ST(0) = &PL_sv_undef; XSRETURN(1); } if ((cp = strrchr(devnam,':')) != NULL) *(cp+1) = '\0'; devdsc.dsc$w_length = strlen(devnam); retsts = sys$assign(&devdsc,&chan,0,0); diff --git a/vms/gen_shrfls.pl b/vms/gen_shrfls.pl index 48499d4a49..d393b0f0cc 100644 --- a/vms/gen_shrfls.pl +++ b/vms/gen_shrfls.pl @@ -39,7 +39,7 @@ require 5.000; $debug = $ENV{'GEN_SHRFLS_DEBUG'}; -print "gen_shrfls.pl Rev. 14-Dec-1997\n" if $debug; +print "gen_shrfls.pl Rev. 18-May-2001\n" if $debug; if ($ARGV[0] eq '-f') { open(INP,$ARGV[1]) or die "Can't read input file $ARGV[1]: $!\n"; @@ -69,7 +69,7 @@ if ($docc) { else { die "$0: Can't find perl.h\n"; } $use_threads = $use_mymalloc = $case_about_case = $debugging_enabled = 0; - $hide_mymalloc = $isgcc = 0; + $hide_mymalloc = $isgcc = $use_perlio = 0; # Go see what is enabled in config.sh $config = $dir . "config.sh"; @@ -81,6 +81,7 @@ if ($docc) { $debugging_enabled++ if /usedebugging_perl='Y'/; $hide_mymalloc++ if /embedmymalloc='Y'/; $isgcc++ if /gccversion='[^']/; + $use_perlio++ if /useperlio='define'/; } close CONFIG; @@ -147,6 +148,7 @@ sub scan_func { my($line) = @_; print "\tchecking for global routine\n" if $debug > 1; + $line =~ s/\b(IV|Off_t|Size_t|SSize_t|void)\b//i; if ( $line =~ /(\w+)\s*\(/ ) { print "\troutine name is \\$1\\\n" if $debug > 1; if ($1 eq 'main' || $1 eq 'perl_init_ext') { @@ -164,10 +166,16 @@ if ($use_mymalloc) { $fcns{'Perl_mfree'}++; } +if ($use_perlio) { + $preprocess_list = "${dir}perl.h,${dir}perliol.h"; +} else { + $preprocess_list = "${dir}perl.h"; +} + $used_expectation_enum = $used_opcode_enum = 0; # avoid warnings if ($docc) { - open(CPP,"${cc_cmd}/NoObj/PreProc=Sys\$Output ${dir}perl.h|") - or die "$0: Can't preprocess ${dir}perl.h: $!\n"; + open(CPP,"${cc_cmd}/NoObj/PreProc=Sys\$Output $preprocess_list|") + or die "$0: Can't preprocess $preprocess_list: $!\n"; } else { open(CPP,"$cpp_file") or die "$0: Can't read preprocessed file $cpp_file: $!\n"; @@ -198,6 +206,7 @@ LINE: while (<CPP>) { # Pull name from library module or header filespec $spec =~ /^(\w+)$/ or $spec =~ /(\w+)\.h/i; my $name = lc $1; + $name = 'perlio' if $name eq 'perliol'; $ckfunc = exists $checkh{$name} ? 1 : 0; $scanname = $name if $ckfunc; print "Header file transition: ckfunc = $ckfunc for $name.h\n" if $debug > 1; @@ -49,6 +49,9 @@ # define SS$_NOSUCHOBJECT 2696 #endif +/* We implement I/O here, so we will be mixing PerlIO and stdio calls. */ +#define PERLIO_NOT_STDIO 0 + /* Don't replace system definitions of vfork, getenv, and stat, * code below needs to get to the underlying CRTL routines. */ #define DONT_MASK_RTL_CALLS @@ -2184,8 +2187,8 @@ safe_popen(pTHX_ char *cmd, char *mode) } /* end of safe_popen */ -/*{{{ FILE *my_popen(char *cmd, char *mode)*/ -FILE * +/*{{{ PerlIO *my_popen(char *cmd, char *mode)*/ +PerlIO * Perl_my_popen(pTHX_ char *cmd, char *mode) { TAINT_ENV(); @@ -2196,8 +2199,8 @@ Perl_my_popen(pTHX_ char *cmd, char *mode) /*}}}*/ -/*{{{ I32 my_pclose(FILE *fp)*/ -I32 Perl_my_pclose(pTHX_ FILE *fp) +/*{{{ I32 my_pclose(PerlIO *fp)*/ +I32 Perl_my_pclose(pTHX_ PerlIO *fp) { pInfo info, last = NULL; unsigned long int retsts; @@ -2220,7 +2223,7 @@ I32 Perl_my_pclose(pTHX_ FILE *fp) * the first EOF closing the pipe (and DASSGN'ing the channel)... */ - fsync(fileno(info->fp)); /* first, flush data */ + PerlIO_flush(info->fp); /* first, flush data */ _ckvmssts(sys$setast(0)); info->closing = TRUE; @@ -3620,7 +3623,7 @@ mp_getredirection(pTHX_ int *ac, char ***av) /* Input from a pipe, reopen it in binary mode to disable */ /* carriage control processing. */ - PerlIO_getname(stdin, mbxname); + fgetname(stdin, mbxname); mbxnam.dsc$a_pointer = mbxname; mbxnam.dsc$w_length = strlen(mbxnam.dsc$a_pointer); lib$getdvi(&dvi_item, 0, &mbxnam, &bufsize, 0, 0); @@ -3652,7 +3655,7 @@ mp_getredirection(pTHX_ int *ac, char ***av) if (err != NULL) { if (strcmp(err,"&1") == 0) { - dup2(fileno(stdout), fileno(Perl_debug_log)); + dup2(fileno(stdout), fileno(stderr)); Perl_vmssetuserlnm(aTHX_ "SYS$ERROR","SYS$OUTPUT"); } else { FILE *tmperr; @@ -3662,7 +3665,7 @@ mp_getredirection(pTHX_ int *ac, char ***av) exit(vaxc$errno); } fclose(tmperr); - if (NULL == freopen(err, "a", Perl_debug_log, "mbc=32", "mbf=2")) + if (NULL == freopen(err, "a", stderr, "mbc=32", "mbf=2")) { exit(vaxc$errno); } @@ -4847,9 +4850,9 @@ int my_fclose(FILE *fp) { * data with nulls sprinkled in the middle but also data with no null * byte at the end. */ -/*{{{ int my_fwrite(void *src, size_t itmsz, size_t nitm, FILE *dest)*/ +/*{{{ int my_fwrite(const void *src, size_t itmsz, size_t nitm, FILE *dest)*/ int -my_fwrite(void *src, size_t itmsz, size_t nitm, FILE *dest) +my_fwrite(const void *src, size_t itmsz, size_t nitm, FILE *dest) { register char *cp, *end, *cpd, *data; register unsigned int fd = fileno(dest); @@ -6577,7 +6580,7 @@ candelete_fromperl(pTHX_ CV *cv) mysv = SvROK(ST(0)) ? SvRV(ST(0)) : ST(0); if (SvTYPE(mysv) == SVt_PVGV) { - if (!(io = GvIOp(mysv)) || !fgetname(IoIFP(io),fspec,1)) { + if (!(io = GvIOp(mysv)) || !PerlIO_getname(IoIFP(io),fspec)) { set_errno(EINVAL); set_vaxc_errno(LIB$_INVARG); ST(0) = &PL_sv_no; XSRETURN(1); @@ -6614,7 +6617,7 @@ rmscopy_fromperl(pTHX_ CV *cv) mysv = SvROK(ST(0)) ? SvRV(ST(0)) : ST(0); if (SvTYPE(mysv) == SVt_PVGV) { - if (!(io = GvIOp(mysv)) || !fgetname(IoIFP(io),inspec,1)) { + if (!(io = GvIOp(mysv)) || !PerlIO_getname(IoIFP(io),inspec)) { set_errno(EINVAL); set_vaxc_errno(LIB$_INVARG); ST(0) = &PL_sv_no; XSRETURN(1); @@ -6630,7 +6633,7 @@ rmscopy_fromperl(pTHX_ CV *cv) } mysv = SvROK(ST(1)) ? SvRV(ST(1)) : ST(1); if (SvTYPE(mysv) == SVt_PVGV) { - if (!(io = GvIOp(mysv)) || !fgetname(IoIFP(io),outspec,1)) { + if (!(io = GvIOp(mysv)) || !PerlIO_getname(IoIFP(io),outspec)) { set_errno(EINVAL); set_vaxc_errno(LIB$_INVARG); ST(0) = &PL_sv_no; XSRETURN(1); diff --git a/vms/vmsish.h b/vms/vmsish.h index 2eb8e93c5f..a1f76301a4 100644 --- a/vms/vmsish.h +++ b/vms/vmsish.h @@ -310,7 +310,7 @@ #define _ckvmssts_noperl(call) STMT_START { register unsigned long int __ckvms_sts; \ if (!((__ckvms_sts=(call))&1)) { \ set_errno(EVMSERR); set_vaxc_errno(__ckvms_sts); \ - fprintf(Perl_debug_log,"Fatal VMS error (status=%d) at %s, line %d", \ + fprintf(stderr,"Fatal VMS error (status=%d) at %s, line %d", \ __ckvms_sts,__FILE__,__LINE__); lib$signal(__ckvms_sts); } } STMT_END #ifdef VMS_DO_SOCKETS @@ -411,6 +411,7 @@ #ifndef DONT_MASK_RTL_CALLS +# define fwrite my_fwrite /* for PerlSIO_fwrite */ # define fdopen my_fdopen # define fclose my_fclose #endif @@ -774,7 +775,7 @@ unsigned long int Perl_do_aspawn (pTHX_ void *, void **, void **); unsigned long int Perl_do_spawn (pTHX_ char *); FILE * my_fdopen (int, const char *); int my_fclose (FILE *); -int my_fwrite (void *, size_t, size_t, FILE *); +int my_fwrite (const void *, size_t, size_t, FILE *); int Perl_my_flush (pTHX_ FILE *); struct passwd * Perl_my_getpwnam (pTHX_ char *name); struct passwd * Perl_my_getpwuid (pTHX_ Uid_t uid); |