diff options
51 files changed, 4325 insertions, 845 deletions
@@ -31,6 +31,226 @@ or any other branch. Version v5.7.2 Development release working toward v5.8 -------------- ____________________________________________________________________________ +[ 12566] By: jhi on 2001/10/22 12:22:29 + Log: Subject: [PATCH pod/perlvar.pod] Undeprecating $] + From: Michael G Schwern <schwern@pobox.com> + Date: Mon, 22 Oct 2001 03:48:38 -0400 + Message-ID: <20011022034838.B1676@blackrider> + Branch: perl + ! pod/perlvar.pod +____________________________________________________________________________ +[ 12565] By: jhi on 2001/10/22 12:21:28 + Log: Pod nits, as suggested by Stas Bekman. + Branch: perl + ! ext/threads/shared/shared.pm ext/threads/threads.pm +____________________________________________________________________________ +[ 12564] By: jhi on 2001/10/22 12:17:00 + Log: Subject: [PATCH @ ] Fix for FETCH/NEXTKEY problem in all *DB*_File modules + From: "Paul Marquess" <paul.marquess@openwave.com> + Date: Sun, 21 Oct 2001 21:11:15 +0100 + Message-ID: <AIEAJICLCBDNAAOLLOKLAEOMDCAA.paul.marquess@openwave.com> + Branch: perl + ! ext/DB_File/DB_File.xs ext/DB_File/t/db-btree.t + ! ext/DB_File/t/db-hash.t ext/GDBM_File/GDBM_File.xs + ! ext/GDBM_File/gdbm.t ext/GDBM_File/typemap + ! ext/NDBM_File/NDBM_File.xs ext/NDBM_File/ndbm.t + ! ext/ODBM_File/ODBM_File.xs ext/ODBM_File/odbm.t + ! ext/ODBM_File/typemap ext/SDBM_File/SDBM_File.xs + ! ext/SDBM_File/sdbm.t +____________________________________________________________________________ +[ 12563] By: jhi on 2001/10/22 12:15:19 + Log: Integrate change #12559 from maintperl; + various fixes for system() and backticks under windows + Branch: perl + !> win32/win32.c +____________________________________________________________________________ +[ 12562] By: jhi on 2001/10/22 12:05:35 + Log: Integrate change #12560 from maintperl; + make pl2bat use %0 rather than "%0" (the latter fails to work + in a lot of common cases) + Branch: perl + !> win32/bin/pl2bat.pl +____________________________________________________________________________ +[ 12561] By: jhi on 2001/10/22 12:00:23 + Log: Integrate changes #12549 and #12550 from maintperl; + + readline() doesn't work with our variables; it confuses them with + my variables (change#4227 was incomplete) + Branch: perl + ! t/lib/strict/vars + !> t/base/rs.t toke.c +____________________________________________________________________________ +[ 12560] By: gsar on 2001/10/22 09:51:59 + Log: make pl2bat use %0 rather than "%0" (the latter fails to work + in a lot of common cases) + Branch: maint-5.6/perl + ! win32/bin/pl2bat.pl +____________________________________________________________________________ +[ 12559] By: gsar on 2001/10/22 09:49:51 + Log: various fixes for system() and backticks under windows: + + * avoid munging whitespace that is passed within quotes + * work around a cmd.exe misfeature that made multi-arg + system() unreliable when there is more than one quoted + argument + * make multi-arg system() autoquote arguments as needed + before passing them to the shell (this avoids having to + second guess the shell quoting) + * perl's -Dp switch can be used to trace the innards + Branch: maint-5.6/perl + ! win32/win32.c +____________________________________________________________________________ +[ 12558] By: nick on 2001/10/22 08:58:42 + Log: Integrate ithreads buffer flush fix from perlio + Branch: perl + !> ext/threads/t/basic.t perl.c +____________________________________________________________________________ +[ 12557] By: nick on 2001/10/22 08:35:09 + Log: All tests pass (legitimately) on ithreads + Branch: perlio + ! ext/threads/t/basic.t perl.c +____________________________________________________________________________ +[ 12556] By: nick on 2001/10/22 06:51:22 + Log: Integrate non-ithreads fix to mainline + Branch: perl + !> perlio.c +____________________________________________________________________________ +[ 12555] By: nick on 2001/10/22 06:48:33 + Log: Restore non-ithreads build. Interestingly binmode test passes + non-ithreads - so crlf layer is not damaged and ithreads fail + is a symptom ... + Branch: perlio + ! perlio.c +____________________________________________________________________________ +[ 12554] By: jhi on 2001/10/21 22:21:55 + Log: Integrate from perlio; restructuring. + Branch: perl + !> embed.h embedvar.h intrpvar.h perl.c perlapi.h perlio.c + !> perlio.h perliol.h pod/perlapi.pod sv.c win32/makefile.mk + !> win32/perlhost.h +____________________________________________________________________________ +[ 12553] By: pudge on 2001/10/21 21:51:34 + Log: Integrate from maintperl + (Changes 12350, 12496, 12548, 12549, 12550) + Branch: maint-5.6/macperl + !> ext/IO/lib/IO/Seekable.pm lib/Carp/Heavy.pm t/base/rs.t + !> t/lib/filefind-taint.t t/pragma/strict-vars toke.c +____________________________________________________________________________ +[ 12552] By: nick on 2001/10/21 19:18:12 + Log: Win32 PerlIO_cleanup special cases should no longer be required. + Branch: perlio + ! perl.c +____________________________________________________________________________ +[ 12551] By: nick on 2001/10/21 19:16:56 + Log: Multiple win32io.o is fatal for GCC (and noise with VC++) + Branch: perlio + ! win32/makefile.mk +____________________________________________________________________________ +[ 12550] By: gsar on 2001/10/21 19:05:54 + Log: change#12549 wasn't aware of strictures + Branch: maint-5.6/perl + ! t/pragma/strict-vars toke.c +____________________________________________________________________________ +[ 12549] By: gsar on 2001/10/21 18:03:19 + Log: readline() doesn't work with our variables; it confuses them with + my variables (change#4227 was incomplete) + Branch: maint-5.6/perl + ! t/base/rs.t toke.c +____________________________________________________________________________ +[ 12548] By: gsar on 2001/10/21 17:48:01 + Log: change#12220 appears to have pulled in a mainline change that + isn't applicable to maint-5.6 (some platforms don't taint + cwd) + Branch: maint-5.6/perl + ! t/lib/filefind-taint.t +____________________________________________________________________________ +[ 12547] By: nick on 2001/10/21 17:15:54 + Log: Convert rest of PerlIO's memory tables to per-interp and add clone functions + for them. Call explicit cleanup during destruct process. + - one binmode test is failing + - also ext/threads/t/basic.t fails under make test, and is noisy under + harness. (Threads results are intermingled and don't match order expected.) + Branch: perlio + ! embed.h embedvar.h intrpvar.h perl.c perlapi.h perlio.c + ! perlio.h perliol.h pod/perlapi.pod sv.c +____________________________________________________________________________ +[ 12546] By: jhi on 2001/10/21 16:12:08 + Log: Implement multicharacter case mappings where a single + Unicode character can be mapped into several. + Branch: perl + - lib/unicore/To/SpecLower.pl lib/unicore/To/SpecTitle.pl + - lib/unicore/To/SpecUpper.pl + ! MANIFEST embed.h embed.pl global.sym lib/unicore/To/Lower.pl + ! lib/unicore/To/Title.pl lib/unicore/To/Upper.pl + ! lib/unicore/mktables pod/perlfunc.pod pod/perlunicode.pod pp.c + ! proto.h t/op/lc.t utf8.c +____________________________________________________________________________ +[ 12545] By: sky on 2001/10/21 15:25:16 + Log: First support of threads::shared, support shared svs and references. + Branch: perl + + ext/threads/shared/Makefile.PL ext/threads/shared/README + + ext/threads/shared/shared.pm ext/threads/shared/shared.xs + + ext/threads/shared/t/sv_refs.t + + ext/threads/shared/t/sv_simple.t + ! MANIFEST sharedsv.c +____________________________________________________________________________ +[ 12544] By: nick on 2001/10/21 14:52:35 + Log: PerlIO layer table as PL_perlio (per-interpreter) + Branch: perlio + ! embed.h embedvar.h intrpvar.h perlapi.h perlio.c perlio.h + ! pod/perlapi.pod sv.c +____________________________________________________________________________ +[ 12543] By: jhi on 2001/10/21 13:36:40 + Log: Prettyprinting. + Branch: perl + ! pod/perlunicode.pod +____________________________________________________________________________ +[ 12542] By: ams on 2001/10/21 03:50:25 + Log: Subject: perlpodspec and perlpod rewrite, draft 3 "final" + From: "Sean M. Burke" <sburke@cpan.org> + Date: Sat, 20 Oct 2001 17:51:09 -0600 + Message-Id: <3.0.6.32.20011020175109.007cb3d0@mail.spinn.net> + Branch: perl + + pod/perlpodspec.pod + ! MANIFEST pod/buildtoc.PL pod/perl.pod pod/perlpod.pod + ! pod/perltoc.pod +____________________________________________________________________________ +[ 12541] By: nick on 2001/10/20 22:23:53 + Log: Fix typos in new locking on MemShared + Make buffers in PerlIOBuf_* in per-thread heap (they are cloned after all...) + - seems to make segfaults more deterministic - suspect they are + attempt to free() after Interp and Host have gone. + Branch: perlio + ! perlio.c win32/perlhost.h +____________________________________________________________________________ +[ 12540] By: nick on 2001/10/20 21:50:46 + Log: Remove the MemShared re-#define (again) + Branch: perlio + ! perlio.c +____________________________________________________________________________ +[ 12539] By: nick on 2001/10/20 21:49:06 + Log: Use locks on MemShared + Branch: perlio + ! win32/perlhost.h +____________________________________________________________________________ +[ 12538] By: jhi on 2001/10/20 17:31:07 + Log: Subject: [REPATCH installhtml] Re: installhtml needs a good beating out + From: "chromatic" <chromatic@rmci.net> + Date: Sat, 20 Oct 2001 11:13:47 -0600 + Message-ID: <20011020172003.60024.qmail@onion.perl.org> + Branch: perl + ! installhtml +____________________________________________________________________________ +[ 12537] By: nick on 2001/10/20 16:44:03 + Log: Integrate mainline + Branch: perlio + !> (integrate 48 files) +____________________________________________________________________________ +[ 12536] By: jhi on 2001/10/20 15:58:00 + Log: Update Changes. + Branch: perl + ! Changes patchlevel.h +____________________________________________________________________________ [ 12535] By: jhi on 2001/10/20 15:18:57 Log: Upgrade to podlators 1.11, from Russ Allbery. Branch: perl @@ -580,6 +580,12 @@ ext/threads/t/stress_string.t Test with multiple threads, string cv argument. ext/threads/threads.h ithreads ext/threads/threads.pm ithreads ext/threads/threads.xs ithreads +ext/threads/shared/Makefile.PL thread shared variables +ext/threads/shared/README thread shared variables +ext/threads/shared/shared.pm thread shared variables +ext/threads/shared/shared.xs thread shared variables +ext/threads/shared/t/sv_simple.t thread shared variables +ext/threads/shared/t/sv_refs.t thread shared variables ext/Time/HiRes/Changes Time::HiRes extension ext/Time/HiRes/hints/dynixptx.pl Hint for Time::HiRes for named architecture ext/Time/HiRes/hints/sco.pl Hints for Time::HiRes for named architecture @@ -1598,9 +1604,6 @@ lib/unicore/Scripts.txt Unicode character database lib/unicore/SpecCase.txt Unicode character database lib/unicore/To/Digit.pl Unicode character database lib/unicore/To/Lower.pl Unicode character database -lib/unicore/To/SpecLower.pl Unicode character database -lib/unicore/To/SpecTitle.pl Unicode character database -lib/unicore/To/SpecUpper.pl Unicode character database lib/unicore/To/Title.pl Unicode character database lib/unicore/To/Upper.pl Unicode character database lib/unicore/UCD.html Unicode character database @@ -1854,6 +1857,7 @@ pod/perlop.pod Operator info pod/perlopentut.pod open() tutorial pod/perlothrtut.pod Threads old tutorial pod/perlpod.pod Pod info +pod/perlpodspec.pod Pod specification pod/perlport.pod Portability guide pod/perlre.pod Regular expression info pod/perlref.pod References info @@ -720,6 +720,7 @@ #define swash_fetch Perl_swash_fetch #define taint_env Perl_taint_env #define taint_proper Perl_taint_proper +#define to_utf8_case Perl_to_utf8_case #define to_utf8_lower Perl_to_utf8_lower #define to_utf8_upper Perl_to_utf8_upper #define to_utf8_title Perl_to_utf8_title @@ -2230,6 +2231,7 @@ #define swash_fetch(a,b,c) Perl_swash_fetch(aTHX_ a,b,c) #define taint_env() Perl_taint_env(aTHX) #define taint_proper(a,b) Perl_taint_proper(aTHX_ a,b) +#define to_utf8_case(a,b,c,d,e,f) Perl_to_utf8_case(aTHX_ a,b,c,d,e,f) #define to_utf8_lower(a,b,c) Perl_to_utf8_lower(aTHX_ a,b,c) #define to_utf8_upper(a,b,c) Perl_to_utf8_upper(aTHX_ a,b,c) #define to_utf8_title(a,b,c) Perl_to_utf8_title(aTHX_ a,b,c) @@ -1818,6 +1818,8 @@ Ap |SV* |swash_init |char* pkg|char* name|SV* listsv \ Ap |UV |swash_fetch |SV *sv|U8 *ptr|bool do_utf8 Ap |void |taint_env Ap |void |taint_proper |const char* f|const char* s +Ap |UV |to_utf8_case |U8 *p|U8* ustrp|STRLEN *lenp \ + |SV **swash|char *normal|char *special Ap |UV |to_utf8_lower |U8 *p|U8* ustrp|STRLEN *lenp Ap |UV |to_utf8_upper |U8 *p|U8* ustrp|STRLEN *lenp Ap |UV |to_utf8_title |U8 *p|U8* ustrp|STRLEN *lenp diff --git a/ext/DB_File/DB_File.xs b/ext/DB_File/DB_File.xs index db4382be8f..05e5319b92 100644 --- a/ext/DB_File/DB_File.xs +++ b/ext/DB_File/DB_File.xs @@ -1777,13 +1777,14 @@ db_FIRSTKEY(db) void db_NEXTKEY(db, key) DB_File db - DBTKEY key + DBTKEY key = NO_INIT PREINIT: int RETVAL; CODE: { DBT value ; + DBT_clear(key) ; DBT_clear(value) ; CurrentDB = db ; RETVAL = do_SEQ(db, key, value, R_NEXT) ; diff --git a/ext/DB_File/t/db-btree.t b/ext/DB_File/t/db-btree.t index 905cbe1fdf..a380496b53 100755 --- a/ext/DB_File/t/db-btree.t +++ b/ext/DB_File/t/db-btree.t @@ -15,7 +15,7 @@ use strict; use DB_File; use Fcntl; -print "1..157\n"; +print "1..163\n"; sub ok { @@ -1295,4 +1295,46 @@ EOM unlink $Dfile; } +{ + # When iterating over a tied hash using "each", the key passed to FETCH + # will be recycled and passed to NEXTKEY. If a Source Filter modifies the + # key in FETCH via a filter_fetch_key method we need to check that the + # modified key doesn't get passed to NEXTKEY. + # Also Test "keys" & "values" while we are at it. + + use warnings ; + use strict ; + use DB_File ; + + unlink $Dfile; + my $bad_key = 0 ; + my %h = () ; + my $db ; + ok(158, $db = tie(%h, 'DB_File', $Dfile, O_RDWR|O_CREAT, 0640, $DB_BTREE ) ); + $db->filter_fetch_key (sub { $_ =~ s/^Beta_/Alpha_/ if defined $_}) ; + $db->filter_store_key (sub { $bad_key = 1 if /^Beta_/ ; $_ =~ s/^Alpha_/Beta_/}) ; + + $h{'Alpha_ABC'} = 2 ; + $h{'Alpha_DEF'} = 5 ; + + ok(159, $h{'Alpha_ABC'} == 2); + ok(160, $h{'Alpha_DEF'} == 5); + + my ($k, $v) = ("",""); + while (($k, $v) = each %h) {} + ok(161, $bad_key == 0); + + $bad_key = 0 ; + foreach $k (keys %h) {} + ok(162, $bad_key == 0); + + $bad_key = 0 ; + foreach $v (values %h) {} + ok(163, $bad_key == 0); + + undef $db ; + untie %h ; + unlink $Dfile; +} + exit ; diff --git a/ext/DB_File/t/db-hash.t b/ext/DB_File/t/db-hash.t index 12b0848fa2..1d13dc0941 100755 --- a/ext/DB_File/t/db-hash.t +++ b/ext/DB_File/t/db-hash.t @@ -15,7 +15,7 @@ use warnings; use DB_File; use Fcntl; -print "1..111\n"; +print "1..117\n"; sub ok { @@ -742,4 +742,46 @@ EOM unlink $Dfile; } +{ + # When iterating over a tied hash using "each", the key passed to FETCH + # will be recycled and passed to NEXTKEY. If a Source Filter modifies the + # key in FETCH via a filter_fetch_key method we need to check that the + # modified key doesn't get passed to NEXTKEY. + # Also Test "keys" & "values" while we are at it. + + use warnings ; + use strict ; + use DB_File ; + + unlink $Dfile; + my $bad_key = 0 ; + my %h = () ; + my $db ; + ok(112, $db = tie(%h, 'DB_File', $Dfile, O_RDWR|O_CREAT, 0640, $DB_HASH ) ); + $db->filter_fetch_key (sub { $_ =~ s/^Beta_/Alpha_/ if defined $_}) ; + $db->filter_store_key (sub { $bad_key = 1 if /^Beta_/ ; $_ =~ s/^Alpha_/Beta_/}) ; + + $h{'Alpha_ABC'} = 2 ; + $h{'Alpha_DEF'} = 5 ; + + ok(113, $h{'Alpha_ABC'} == 2); + ok(114, $h{'Alpha_DEF'} == 5); + + my ($k, $v) = ("",""); + while (($k, $v) = each %h) {} + ok(115, $bad_key == 0); + + $bad_key = 0 ; + foreach $k (keys %h) {} + ok(116, $bad_key == 0); + + $bad_key = 0 ; + foreach $v (values %h) {} + ok(117, $bad_key == 0); + + undef $db ; + untie %h ; + unlink $Dfile; +} + exit ; diff --git a/ext/GDBM_File/GDBM_File.xs b/ext/GDBM_File/GDBM_File.xs index ffdc41b14c..d58feeccef 100644 --- a/ext/GDBM_File/GDBM_File.xs +++ b/ext/GDBM_File/GDBM_File.xs @@ -17,6 +17,7 @@ typedef struct { typedef GDBM_File_type * GDBM_File ; typedef datum datum_key ; typedef datum datum_value ; +typedef datum datum_key_copy; #define ckFilter(arg,type,name) \ if (db->type) { \ @@ -122,7 +123,7 @@ gdbm_DESTROY(db) datum_value gdbm_FETCH(db, key) GDBM_File db - datum_key key + datum_key_copy key #define gdbm_STORE(db,key,value,flags) gdbm_store(db->dbp,key,value,flags) int @@ -154,7 +155,7 @@ gdbm_FIRSTKEY(db) datum_key gdbm_NEXTKEY(db, key) GDBM_File db - datum_key key + datum_key key #define gdbm_reorganize(db) gdbm_reorganize(db->dbp) int diff --git a/ext/GDBM_File/gdbm.t b/ext/GDBM_File/gdbm.t index 3ba19e8722..7c268936f3 100755 --- a/ext/GDBM_File/gdbm.t +++ b/ext/GDBM_File/gdbm.t @@ -18,7 +18,7 @@ use warnings; use GDBM_File; -print "1..68\n"; +print "1..74\n"; unlink <Op.dbmx*>; @@ -425,3 +425,46 @@ EOM untie %h; unlink <Op.dbmx*>; } + +{ + # When iterating over a tied hash using "each", the key passed to FETCH + # will be recycled and passed to NEXTKEY. If a Source Filter modifies the + # key in FETCH via a filter_fetch_key method we need to check that the + # modified key doesn't get passed to NEXTKEY. + # Also Test "keys" & "values" while we are at it. + + use warnings ; + use strict ; + use GDBM_File ; + + unlink <Op.dbmx*>; + my $bad_key = 0 ; + my %h = () ; + ok(69, my $db = tie(%h, 'GDBM_File','Op.dbmx', &GDBM_WRCREAT, 0640)); + $db->filter_fetch_key (sub { $_ =~ s/^Beta_/Alpha_/ if defined $_}) ; + $db->filter_store_key (sub { $bad_key = 1 if /^Beta_/ ; $_ =~ s/^Alpha_/Beta_/}) ; + + $h{'Alpha_ABC'} = 2 ; + $h{'Alpha_DEF'} = 5 ; + + ok(70, $h{'Alpha_ABC'} == 2); + ok(71, $h{'Alpha_DEF'} == 5); + + my ($k, $v) = ("",""); + while (($k, $v) = each %h) {} + ok(72, $bad_key == 0); + + $bad_key = 0 ; + foreach $k (keys %h) {} + ok(73, $bad_key == 0); + + $bad_key = 0 ; + foreach $v (values %h) {} + ok(74, $bad_key == 0); + + undef $db ; + untie %h ; + unlink <Op.dbmx*>; +} + +exit ; diff --git a/ext/GDBM_File/typemap b/ext/GDBM_File/typemap index 1dd063003a..8952938ccd 100644 --- a/ext/GDBM_File/typemap +++ b/ext/GDBM_File/typemap @@ -3,6 +3,7 @@ # datum_key T_DATUM_K +datum_key_copy T_DATUM_K_C datum_value T_DATUM_V NDBM_File T_PTROBJ GDBM_File T_PTROBJ @@ -17,6 +18,18 @@ T_DATUM_K ckFilter($arg, filter_store_key, \"filter_store_key\"); $var.dptr = SvPV($arg, PL_na); $var.dsize = (int)PL_na; +T_DATUM_K_C + { + SV * tmpSV; + if (db->filter_store_key) { + tmpSV = sv_2mortal(newSVsv($arg)); + ckFilter(tmpSV, filter_store_key, \"filter_store_key\"); + } + else + tmpSV = $arg; + $var.dptr = SvPV(tmpSV, PL_na); + $var.dsize = (int)PL_na; + } T_DATUM_V ckFilter($arg, filter_store_value, \"filter_store_value\"); if (SvOK($arg)) { diff --git a/ext/NDBM_File/NDBM_File.xs b/ext/NDBM_File/NDBM_File.xs index 55dd639e95..78a56cb7cc 100644 --- a/ext/NDBM_File/NDBM_File.xs +++ b/ext/NDBM_File/NDBM_File.xs @@ -107,7 +107,7 @@ ndbm_FIRSTKEY(db) datum_key ndbm_NEXTKEY(db, key) NDBM_File db - datum_key key + datum_key key = NO_INIT #define ndbm_error(db) dbm_error(db->dbp) int diff --git a/ext/NDBM_File/ndbm.t b/ext/NDBM_File/ndbm.t index f56034387a..a340e33214 100755 --- a/ext/NDBM_File/ndbm.t +++ b/ext/NDBM_File/ndbm.t @@ -28,7 +28,7 @@ require NDBM_File; #If Fcntl is not available, try 0x202 or 0x102 for O_RDWR|O_CREAT use Fcntl; -print "1..65\n"; +print "1..71\n"; unlink <Op.dbmx*>; @@ -418,3 +418,46 @@ EOM ok(65, tie(%h, 'NDBM_File','Op.dbmx', O_RDWR|O_CREAT, 0640)) ; } + +{ + # When iterating over a tied hash using "each", the key passed to FETCH + # will be recycled and passed to NEXTKEY. If a Source Filter modifies the + # key in FETCH via a filter_fetch_key method we need to check that the + # modified key doesn't get passed to NEXTKEY. + # Also Test "keys" & "values" while we are at it. + + use warnings ; + use strict ; + use NDBM_File ; + + unlink <Op.dbmx*>; + my $bad_key = 0 ; + my %h = () ; + ok(66, my $db = tie(%h, 'NDBM_File','Op.dbmx', O_RDWR|O_CREAT, 0640)) ; + $db->filter_fetch_key (sub { $_ =~ s/^Beta_/Alpha_/ if defined $_}) ; + $db->filter_store_key (sub { $bad_key = 1 if /^Beta_/ ; $_ =~ s/^Alpha_/Beta_/}) ; + + $h{'Alpha_ABC'} = 2 ; + $h{'Alpha_DEF'} = 5 ; + + ok(67, $h{'Alpha_ABC'} == 2); + ok(68, $h{'Alpha_DEF'} == 5); + + my ($k, $v) = ("",""); + while (($k, $v) = each %h) {} + ok(69, $bad_key == 0); + + $bad_key = 0 ; + foreach $k (keys %h) {} + ok(70, $bad_key == 0); + + $bad_key = 0 ; + foreach $v (values %h) {} + ok(71, $bad_key == 0); + + undef $db ; + untie %h ; + unlink <Op.dbmx*>; +} + +exit ; diff --git a/ext/ODBM_File/ODBM_File.xs b/ext/ODBM_File/ODBM_File.xs index 3724dae962..5a556bfd2f 100644 --- a/ext/ODBM_File/ODBM_File.xs +++ b/ext/ODBM_File/ODBM_File.xs @@ -53,6 +53,7 @@ typedef struct { typedef ODBM_File_type * ODBM_File ; typedef datum datum_key ; +typedef datum datum_key_copy ; typedef datum datum_value ; #define ckFilter(arg,type,name) \ @@ -133,7 +134,7 @@ DESTROY(db) datum_value odbm_FETCH(db, key) ODBM_File db - datum_key key + datum_key_copy key int odbm_STORE(db, key, value, flags = DBM_REPLACE) diff --git a/ext/ODBM_File/odbm.t b/ext/ODBM_File/odbm.t index a43e70bd99..ecffffd81a 100755 --- a/ext/ODBM_File/odbm.t +++ b/ext/ODBM_File/odbm.t @@ -28,7 +28,7 @@ require ODBM_File; #If Fcntl is not available, try 0x202 or 0x102 for O_RDWR|O_CREAT use Fcntl; -print "1..66\n"; +print "1..72\n"; unlink <Op.dbmx*>; @@ -425,6 +425,48 @@ EOM unlink <Op.dbmx*>; } +{ + # When iterating over a tied hash using "each", the key passed to FETCH + # will be recycled and passed to NEXTKEY. If a Source Filter modifies the + # key in FETCH via a filter_fetch_key method we need to check that the + # modified key doesn't get passed to NEXTKEY. + # Also Test "keys" & "values" while we are at it. + + use warnings ; + use strict ; + use ODBM_File ; + + unlink <Op.dbmx*>; + my $bad_key = 0 ; + my %h = () ; + ok(67, my $db = tie(%h, 'ODBM_File','Op.dbmx', O_RDWR|O_CREAT, 0640)) ; + $db->filter_fetch_key (sub { $_ =~ s/^Beta_/Alpha_/ if defined $_}) ; + $db->filter_store_key (sub { $bad_key = 1 if /^Beta_/ ; $_ =~ s/^Alpha_/Beta_/}) ; + + $h{'Alpha_ABC'} = 2 ; + $h{'Alpha_DEF'} = 5 ; + + ok(68, $h{'Alpha_ABC'} == 2); + ok(69, $h{'Alpha_DEF'} == 5); + + my ($k, $v) = ("",""); + while (($k, $v) = each %h) {} + ok(70, $bad_key == 0); + + $bad_key = 0 ; + foreach $k (keys %h) {} + ok(71, $bad_key == 0); + + $bad_key = 0 ; + foreach $v (values %h) {} + ok(72, $bad_key == 0); + + undef $db ; + untie %h ; + unlink <Op.dbmx*>; +} + +exit ; if ($^O eq 'hpux') { print <<EOM; # diff --git a/ext/ODBM_File/typemap b/ext/ODBM_File/typemap index 096427ea7f..62b8622569 100644 --- a/ext/ODBM_File/typemap +++ b/ext/ODBM_File/typemap @@ -3,6 +3,7 @@ # datum_key T_DATUM_K +datum_key_copy T_DATUM_K_C datum_value T_DATUM_V gdatum T_GDATUM NDBM_File T_PTROBJ @@ -18,6 +19,18 @@ T_DATUM_K ckFilter($arg, filter_store_key, \"filter_store_key\"); $var.dptr = SvPV($arg, PL_na); $var.dsize = (int)PL_na; +T_DATUM_K_C + { + SV * tmpSV ; + if (db->filter_store_key){ + tmpSV = sv_2mortal(newSVsv($arg)); + ckFilter(tmpSV, filter_store_key, \"filter_store_key\"); + } + else + tmpSV = $arg; + $var.dptr = SvPV(tmpSV, PL_na); + $var.dsize = (int)PL_na; + } T_DATUM_V ckFilter($arg, filter_store_value, \"filter_store_value\"); if (SvOK($arg)) { diff --git a/ext/SDBM_File/SDBM_File.xs b/ext/SDBM_File/SDBM_File.xs index 859730bf3a..94fc305673 100644 --- a/ext/SDBM_File/SDBM_File.xs +++ b/ext/SDBM_File/SDBM_File.xs @@ -119,7 +119,7 @@ sdbm_FIRSTKEY(db) datum_key sdbm_NEXTKEY(db, key) SDBM_File db - datum_key key + datum_key key = NO_INIT int sdbm_error(db) diff --git a/ext/SDBM_File/sdbm.t b/ext/SDBM_File/sdbm.t index 49bc9f131e..e1ed259bfa 100644 --- a/ext/SDBM_File/sdbm.t +++ b/ext/SDBM_File/sdbm.t @@ -28,7 +28,7 @@ require SDBM_File; #If Fcntl is not available, try 0x202 or 0x102 for O_RDWR|O_CREAT use Fcntl; -print "1..68\n"; +print "1..74\n"; unlink <Op_dbmx.*>; @@ -427,3 +427,46 @@ unlink <Op_dbmx*>, $Dfile; untie %h; unlink <Op_dbmx*>; } + +{ + # When iterating over a tied hash using "each", the key passed to FETCH + # will be recycled and passed to NEXTKEY. If a Source Filter modifies the + # key in FETCH via a filter_fetch_key method we need to check that the + # modified key doesn't get passed to NEXTKEY. + # Also Test "keys" & "values" while we are at it. + + use warnings ; + use strict ; + use SDBM_File ; + + unlink <Op.dbmx*>; + my $bad_key = 0 ; + my %h = () ; + ok(69, my $db = tie(%h, 'SDBM_File','Op_dbmx', O_RDWR|O_CREAT, 0640)) ; + $db->filter_fetch_key (sub { $_ =~ s/^Beta_/Alpha_/ if defined $_}) ; + $db->filter_store_key (sub { $bad_key = 1 if /^Beta_/ ; $_ =~ s/^Alpha_/Beta_/}) ; + + $h{'Alpha_ABC'} = 2 ; + $h{'Alpha_DEF'} = 5 ; + + ok(70, $h{'Alpha_ABC'} == 2); + ok(71, $h{'Alpha_DEF'} == 5); + + my ($k, $v) = ("",""); + while (($k, $v) = each %h) {} + ok(72, $bad_key == 0); + + $bad_key = 0 ; + foreach $k (keys %h) {} + ok(73, $bad_key == 0); + + $bad_key = 0 ; + foreach $v (values %h) {} + ok(74, $bad_key == 0); + + undef $db ; + untie %h ; + unlink <Op.dbmx*>; +} + +exit ; diff --git a/ext/threads/shared/Makefile.PL b/ext/threads/shared/Makefile.PL new file mode 100755 index 0000000000..8587906f41 --- /dev/null +++ b/ext/threads/shared/Makefile.PL @@ -0,0 +1,26 @@ +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. + +use Config; + + +unless($Config{'useithreads'} eq 'define') { + die "We need a perl that is built with USEITHREAD!\n"; +} + +WriteMakefile( + 'NAME' => 'threads::shared', + 'VERSION_FROM' => 'shared.pm', # finds $VERSION + 'PREREQ_PM' => {}, # e.g., Module::Name => 1.1 + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'shared.pm', # retrieve abstract from module + AUTHOR => 'Arthur Bergman <arthur@contiller.se>') : ()), + 'LIBS' => [''], # e.g., '-lm' + 'DEFINE' => '', # e.g., '-DHAVE_SOMETHING' + # Insert -I. if you add *.h files later: + 'INC' => '', # e.g., '-I/usr/include/other' + # Un-comment this if you add C files to link with later: + # 'OBJECT' => '$(O_FILES)', # link all the C files too + +); diff --git a/ext/threads/shared/README b/ext/threads/shared/README new file mode 100644 index 0000000000..0690835a2c --- /dev/null +++ b/ext/threads/shared/README @@ -0,0 +1,26 @@ +threads/shared version 0.02 +=========================== + +This module needs perl 5.7.2 or later compiled with USEITHREADS, +It lets you share simple data structures between threads. + +INSTALLATION + +To install this module type the following: + + perl Makefile.PL + make + make test + make install + +DEPENDENCIES + +This module requires these other modules and libraries: + +threads 0.03; + +COPYRIGHT AND LICENCE + +Copyright (C) 2001 Arthur Bergman artur at contiller.se +Same licence as perl. + diff --git a/ext/threads/shared/shared.pm b/ext/threads/shared/shared.pm new file mode 100644 index 0000000000..1ad7dfa274 --- /dev/null +++ b/ext/threads/shared/shared.pm @@ -0,0 +1,121 @@ +package threads::shared; + +use strict; +use warnings; +use Config; +use Scalar::Util qw(weaken); +use attributes qw(reftype); + +BEGIN { + if($Config{'useithreads'} && $Config::threads) { + *share = \&share_enabled; + *cond_wait = \&cond_wait_disabled; + *cond_signal = \&cond_signal_disabled; + *cond_broadcast = \&cond_broadcast_disabled; + *unlock = \&unlock_disabled; + *lock = \&lock_disabled; + } else { + *share = \&share_enabled; + } +} + +require Exporter; +require DynaLoader; +our @ISA = qw(Exporter DynaLoader); + +our @EXPORT = qw(share cond_wait cond_broadcast cond_signal unlock lock); +our $VERSION = '0.01'; + +our %shared; + +sub cond_wait_disabled { return @_ }; +sub cond_signal_disabled { return @_}; +sub cond_broadcast_disabled { return @_}; +sub unlock_disabled { 1 }; +sub lock_disabled { 1 } +sub share_disabled { return @_} + +sub share_enabled (\[$@%]) { # \] + my $value = $_[0]; + my $ref = reftype($value); + if($ref eq 'SCALAR') { + my $obj = \threads::shared::sv->new($$value); + bless $obj, 'threads::shared::sv'; + $shared{$$obj} = $value; + weaken($shared{$$obj}); + } else { + die "You cannot share ref of type $_[0]\n"; + } +} + +sub CLONE { + return unless($_[0] eq "threads::shared"); + foreach my $ptr (keys %shared) { + if($ptr) { + thrcnt_inc($shared{$ptr}); + } + } +} + +package threads::shared::sv; +use base 'threads::shared'; + +package threads::shared::av; +use base 'threads::shared'; + +package threads::shared::hv; +use base 'threads::shared'; + +bootstrap threads::shared $VERSION; + +__END__ + +=head1 NAME + +threads::shared - Perl extension for sharing data structures between threads + +=head1 SYNOPSIS + + use threads::shared; + + my($foo, @foo, %foo); + share(\$foo); + share(\@foo); + share(\%hash); + my $bar = share([]); + $hash{bar} = share({}); + + lock(\%hash); + unlock(\%hash); + cond_wait($scalar); + cond_broadcast(\@array); + cond_signal($scalar); + +=head1 DESCRIPTION + +This modules allows you to share() variables. These variables will +then be shared across different threads (and pseudoforks on +win32). They are used together with the threads module. + +=head2 EXPORT + +share(), lock(), unlock(), cond_wait, cond_signal, cond_broadcast + +=head1 BUGS + +Not stress tested! +Does not support references +Does not support splice on arrays! +The exported functions need a reference due to unsufficent prototyping! + +=head1 AUTHOR + +Artur Bergman E<lt>artur at contiller.seE<gt> + +threads is released under the same license as Perl + +=head1 SEE ALSO + +L<perl> L<threads> + +=cut diff --git a/ext/threads/shared/shared.xs b/ext/threads/shared/shared.xs new file mode 100644 index 0000000000..90049e2a8e --- /dev/null +++ b/ext/threads/shared/shared.xs @@ -0,0 +1,144 @@ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" + + +void shared_sv_attach_sv (SV* sv, shared_sv* shared) { + HV* shared_hv = get_hv("threads::shared::shared", FALSE); + SV* id = newSViv((IV)shared); + STRLEN length = sv_len(id); + SV* tiedobject; + SV** tiedobject_ = hv_fetch(shared_hv, SvPV(id,length), length, 0); + if(tiedobject_) { + tiedobject = (*tiedobject_); + SvROK_on(sv); + SvRV(sv) = SvRV(tiedobject); + + } else { + croak("die\n"); + } +} + + +int shared_sv_fetch_mg (pTHX_ SV* sv, MAGIC *mg) { + shared_sv* shared = (shared_sv*) SvIV(mg->mg_obj); + SHAREDSvLOCK(shared); + if(SvROK(SHAREDSvGET(shared))) { + shared_sv* target = (shared_sv*) SvIV(SvRV(SHAREDSvGET(shared))); + shared_sv_attach_sv(sv, target); + } else { + sv_setsv(sv, SHAREDSvGET(shared)); + } + SHAREDSvUNLOCK(shared); + + return 0; +} + +int shared_sv_store_mg (pTHX_ SV* sv, MAGIC *mg) { + shared_sv* shared = (shared_sv*) SvIV(mg->mg_obj); + SHAREDSvLOCK(shared); + if(SvROK(SHAREDSvGET(shared))) + Perl_sharedsv_thrcnt_dec(aTHX_ (shared_sv*) SvIV(SvRV(SHAREDSvGET(shared)))); + SHAREDSvEDIT(shared); + if(SvROK(sv)) { + shared_sv* target = Perl_sharedsv_find(aTHX_ SvRV(sv)); + if(!target) { + SHAREDSvRELEASE(shared); + sv_setsv(sv,SHAREDSvGET(shared)); + SHAREDSvUNLOCK(shared); + Perl_croak(aTHX_ "You cannot assign a non shared reference to a shared scalar"); + } + Perl_sv_free(PL_sharedsv_space,SHAREDSvGET(shared)); + SHAREDSvGET(shared) = newRV_noinc(newSViv((IV)target)); + SvROK_off(sv); + } else { + sv_setsv(SHAREDSvGET(shared), sv); + } + SHAREDSvRELEASE(shared); + if(SvROK(SHAREDSvGET(shared))) + Perl_sharedsv_thrcnt_inc(aTHX_ (shared_sv*) SvIV(SvRV(SHAREDSvGET(shared)))); + SHAREDSvUNLOCK(shared); + return 0; +} + +int shared_sv_destroy_mg (pTHX_ SV* sv, MAGIC *mg) { + shared_sv* shared = (shared_sv*) SvIV(mg->mg_obj); + if(!shared) + return 0; + Perl_sharedsv_thrcnt_dec(aTHX_ shared); +} + +MGVTBL svtable = {MEMBER_TO_FPTR(shared_sv_fetch_mg), + MEMBER_TO_FPTR(shared_sv_store_mg), + 0, + 0, + MEMBER_TO_FPTR(shared_sv_destroy_mg) +}; + +MODULE = threads::shared PACKAGE = threads::shared + + +PROTOTYPES: DISABLE + + +SV* +ptr(ref) + SV* ref + CODE: + RETVAL = newSViv(SvIV(SvRV(ref))); + OUTPUT: + RETVAL + + +SV* +_thrcnt(ref) + SV* ref + CODE: + shared_sv* shared = Perl_sharedsv_find(aTHX, ref); + if(!shared) + croak("thrcnt can only be used on shared values"); + SHAREDSvLOCK(shared); + RETVAL = newSViv(SvREFCNT(SHAREDSvGET(shared))); + SHAREDSvUNLOCK(shared); + OUTPUT: + RETVAL + + +void +thrcnt_inc(ref) + SV* ref + CODE: + shared_sv* shared; + if(SvROK(ref)) + ref = SvRV(ref); + shared = Perl_sharedsv_find(aTHX, ref); + if(!shared) + croak("thrcnt can only be used on shared values"); + Perl_sharedsv_thrcnt_inc(aTHX_ shared); + + +MODULE = threads::shared PACKAGE = threads::shared::sv + +SV* +new(class, value) + SV* class + SV* value + CODE: + shared_sv* shared = Perl_sharedsv_new(aTHX); + MAGIC* shared_magic; + SV* obj = newSViv((IV)shared); + SHAREDSvEDIT(shared); + SHAREDSvGET(shared) = newSVsv(value); + SHAREDSvRELEASE(shared); + sv_magic(value, 0, PERL_MAGIC_ext, "threads::shared", 16); + shared_magic = mg_find(value, PERL_MAGIC_ext); + shared_magic->mg_virtual = &svtable; + shared_magic->mg_obj = newSViv((IV)shared); + shared_magic->mg_flags |= MGf_REFCOUNTED; + SvMAGICAL_on(value); + RETVAL = obj; + OUTPUT: + RETVAL + + diff --git a/ext/threads/shared/t/sv_refs.t b/ext/threads/shared/t/sv_refs.t new file mode 100644 index 0000000000..36977e7ae7 --- /dev/null +++ b/ext/threads/shared/t/sv_refs.t @@ -0,0 +1,56 @@ +BEGIN { +# chdir 't' if -d 't'; +# push @INC ,'../lib'; + require Config; import Config; + unless ($Config{'useithreads'}) { + print "1..0 # Skip: no useithreads\n"; + exit 0; + } +} + + +sub ok { + my ($id, $ok, $name) = @_; + + # You have to do it this way or VMS will get confused. + print $ok ? "ok $id - $name\n" : "not ok $id - $name\n"; + + printf "# Failed test at line %d\n", (caller)[2] unless $ok; + + return $ok; +} + +use Devel::Peek; +use ExtUtils::testlib; +use strict; +BEGIN { print "1..9\n" }; +use threads; +use threads::shared; +ok(1,1,"loaded"); + +my $foo; +my $bar = "foo"; +share($foo); +eval { +$foo = \$bar; +}; +ok(2,my $temp1 = $@ =~/You cannot assign a non shared reference to a shared scalar/, "Check that the warning message is correct"); +share($bar); +$foo = \$bar; +ok(3, $temp1 = $foo =~/SCALAR/, "Check that is a ref"); +ok(4, $$foo eq "foo", "Check that it points to the correct value"); +$bar = "yeah"; +ok(5, $$foo eq "yeah", "Check that assignment works"); +$$foo = "yeah2"; +ok(6, $$foo eq "yeah2", "Check that deref assignment works"); +threads->create(sub {$bar = "yeah3"})->join(); +ok(7, $$foo eq "yeah3", "Check that other thread assignemtn works"); +threads->create(sub {$foo = "artur"})->join(); +ok(8, $foo eq "artur", "Check that uncopupling the ref works"); +my $baz; +share($baz); +$baz = "original"; +$bar = \$baz; +$foo = \$bar; +ok(9,$$$foo eq 'original', "Check reference chain"); + diff --git a/ext/threads/shared/t/sv_simple.t b/ext/threads/shared/t/sv_simple.t new file mode 100644 index 0000000000..2a0d2970de --- /dev/null +++ b/ext/threads/shared/t/sv_simple.t @@ -0,0 +1,59 @@ + + + + +BEGIN { +# chdir 't' if -d 't'; +# push @INC ,'../lib'; + require Config; import Config; + unless ($Config{'useithreads'}) { + print "1..0 # Skip: no useithreads\n"; + exit 0; + } +} + + +sub ok { + my ($id, $ok, $name) = @_; + + # You have to do it this way or VMS will get confused. + print $ok ? "ok $id - $name\n" : "not ok $id - $name\n"; + + printf "# Failed test at line %d\n", (caller)[2] unless $ok; + + return $ok; +} + + +use ExtUtils::testlib; +use strict; +BEGIN { print "1..10\n" }; +use threads; +use threads::shared; +ok(1,1,"loaded"); +my $test = "bar"; +share($test); +ok(2,$test eq "bar","Test magic share fetch"); +$test = "foo"; +ok(3,$test eq "foo","Test magic share assign"); +threads->create( + sub { + ok(4, $test eq "foo","Test mage share fetch after thread"); + $test = "baz"; + ok(5,threads::shared::_thrcnt($test) == 2, "Check that threadcount is correct"); + })->join(); +ok(6,$test eq "baz","Test that value has changed in another thread"); +ok(7,threads::shared::_thrcnt($test) == 1,"Check thrcnt is down properly"); +$test = "barbar"; +ok(8, length($test) == 6, "Check length code"); +threads->create(sub { $test = "barbarbar" })->join; +ok(9, length($test) == 9, "Check length code after different thread modified it"); +threads->create(sub { undef($test)})->join(); +ok(10, !defined($test), "Check undef value"); + + + + + + + diff --git a/ext/threads/threads.pm b/ext/threads/threads.pm index 9f9c32356b..ef0f412f65 100755 --- a/ext/threads/threads.pm +++ b/ext/threads/threads.pm @@ -1,5 +1,3 @@ - - package threads; use 5.7.2; @@ -10,7 +8,6 @@ use overload '==' => \&equals, 'fallback' => 1; - #use threads::Shared; require Exporter; @@ -18,7 +15,6 @@ require DynaLoader; use Devel::Peek; - our @ISA = qw(Exporter DynaLoader); our %EXPORT_TAGS = ( all => [qw()]); @@ -46,7 +42,6 @@ $Config::threads = 1; bootstrap threads $VERSION; - # Preloaded methods go here. 1; @@ -58,14 +53,12 @@ threads - Perl extension allowing use of interpreter based threads from perl =head1 SYNOPSIS - use threads; sub start_thread { print "Thread started\n"; } - my $thread = threads->new("start_thread","argument"); $thread->new(sub { print "I am a thread"},"argument"); @@ -78,18 +71,21 @@ $thread = threads->self(); thread->tid(); - - =head1 DESCRIPTION -Perl 5.6 has something called interpreter threads, interpreter threads are built on MULTIPLICITY and allows for several different perl interpreters to run in different threads. This has been used in win32 perl to fake forks, it has also been available to people embedding perl. +Perl 5.6 has something called interpreter threads, interpreter threads +are built on MULTIPLICITY and allows for several different perl +interpreters to run in different threads. This has been used in win32 +perl to fake forks, it has also been available to people embedding +perl. =over =item new, function, LIST -This will create a new thread with the entry point function and give it LIST as parameters. -It will return the corresponding threads object. +This will create a new thread with the entry point function and give +it LIST as parameters. It will return the corresponding threads +object. =item $threads->join @@ -126,21 +122,21 @@ threads->self->tid() is a quick way to get current thread id =head1 AUTHOR and COPYRIGHT -Artur Bergman <lt>artur at contiller.se<gt> +Artur Bergman E<lt>artur at contiller.seE<gt> threads is released under the same license as Perl Thanks to -Richard Soderberg <lt>rs at crystalflame.net<gt> -Helping me out tons, trying to find reasons for races and other wierd bugs! +Richard Soderberg E<lt>rs at crystalflame.netE<gt> +Helping me out tons, trying to find reasons for races and other weird bugs! -Simon Cozens <lt>simon at brecon.co.uk<gt> -Being there to answer zillions of annoying questions +Simon Cozens E<lt>simon at brecon.co.ukE<gt> +Being there to answer zillions of annoying questions -Rocco Caputo <lt>troc at netrus.net<gt> +Rocco Caputo E<lt>troc at netrus.netE<gt> -Vipul Ved Prakash <lt>mail at vipul.net<gt> +Vipul Ved Prakash E<lt>mail at vipul.netE<gt> Helping with debugging. please join perl-ithreads@perl.org for more information @@ -160,7 +156,3 @@ please join perl-ithreads@perl.org for more information L<perl>, L<perlcall>, L<perlembed>, L<perlguts> =cut - - - - diff --git a/global.sym b/global.sym index c5a924697b..ede1f3d6b4 100644 --- a/global.sym +++ b/global.sym @@ -470,6 +470,7 @@ Perl_swash_init Perl_swash_fetch Perl_taint_env Perl_taint_proper +Perl_to_utf8_case Perl_to_utf8_lower Perl_to_utf8_upper Perl_to_utf8_title diff --git a/installhtml b/installhtml index 4bbaba9f0d..fe6628d476 100755 --- a/installhtml +++ b/installhtml @@ -167,12 +167,10 @@ usage("") unless @ARGV; # See vms/descrip_mms.template -> descrip.mms for invokation. if ( $^O eq 'VMS' ) { @ARGV = split(/\s+/,$ARGV[0]); } -use vars qw($opt_htmldir $opt_htmlroot $opt_podroot $opt_splitpod - $opt_verbose $opt_help $opt_podpath $opt_splithead $opt_splititem - $opt_libpods $opt_recurse); +use vars qw( %Options ); # parse the command-line -my $result = GetOptions( qw( +my $result = GetOptions( \%Options, qw( help podpath=s podroot=s @@ -249,7 +247,7 @@ foreach my $dir (@splithead) { $_ =~ s{HREF="#(.*)">}{ my $url = "$file/$1.html" ; $url = Pod::Html::relativize_url( $url, "$file.html" ) - if ( ! defined $opt_htmlroot || $opt_htmlroot eq '' ) ; + if ( ! defined $Options{htmlroot} || $Options{htmlroot} eq '' ); "HREF=\"$url\">" ; }eg; push @data, $_; @@ -273,24 +271,24 @@ sub usage { sub parse_command_line { - usage() if defined $opt_help; - $opt_help = ""; # make -w shut up + usage() if defined $Options{help}; + $Options{help} = ""; # make -w shut up # list of directories - @podpath = split(":", $opt_podpath) if defined $opt_podpath; + @podpath = split(":", $Options{podpath}) if defined $Options{podpath}; # lists of files - @splithead = split(",", $opt_splithead) if defined $opt_splithead; - @splititem = split(",", $opt_splititem) if defined $opt_splititem; - @libpods = split(",", $opt_libpods) if defined $opt_libpods; + @splithead = split(",", $Options{splithead}) if defined $Options{splithead}; + @splititem = split(",", $Options{splititem}) if defined $Options{splititem}; + @libpods = split(",", $Options{libpods}) if defined $Options{libpods}; - $htmldir = $opt_htmldir if defined $opt_htmldir; - $htmlroot = $opt_htmlroot if defined $opt_htmlroot; - $podroot = $opt_podroot if defined $opt_podroot; - $splitpod = $opt_splitpod if defined $opt_splitpod; + $htmldir = $Options{htmldir} if defined $Options{htmldir}; + $htmlroot = $Options{htmlroot} if defined $Options{htmlroot}; + $podroot = $Options{podroot} if defined $Options{podroot}; + $splitpod = $Options{splitpod} if defined $Options{splitpod}; - $recurse = $opt_recurse if defined $opt_recurse; - $verbose = $opt_verbose if defined $opt_verbose; + $recurse = $Options{recurse} if defined $Options{recurse}; + $verbose = $Options{verbose} if defined $Options{verbose}; } @@ -337,7 +335,7 @@ sub create_index { ($lcp1,$lcp2) = ($name =~ m,/H1>\s<P>\s(\S+)\s[\s-]*(.*?)\s*$,sm); } my $url= "$dir/$file" ; - if ( ! defined $opt_htmlroot || $opt_htmlroot eq '' ) { + if ( ! defined $Options{htmlroot} || $Options{htmlroot} eq '' ) { $url = Pod::Html::relativize_url( "$dir/$file", $html ) ; } diff --git a/lib/unicore/To/Lower.pl b/lib/unicore/To/Lower.pl index 0fd4f8dbb0..ce89c8e2a7 100644 --- a/lib/unicore/To/Lower.pl +++ b/lib/unicore/To/Lower.pl @@ -1,6 +1,112 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! + +%utf8::ToSpecLower = ( +'223' => "\x{00DF}", +'329' => "\x{0149}", +'496' => "\x{01F0}", +'912' => "\x{0390}", +'944' => "\x{03B0}", +'1415' => "\x{0587}", +'7830' => "\x{1E96}", +'7831' => "\x{1E97}", +'7832' => "\x{1E98}", +'7833' => "\x{1E99}", +'7834' => "\x{1E9A}", +'8016' => "\x{1F50}", +'8018' => "\x{1F52}", +'8020' => "\x{1F54}", +'8022' => "\x{1F56}", +'8064' => "\x{1F80}", +'8065' => "\x{1F81}", +'8066' => "\x{1F82}", +'8067' => "\x{1F83}", +'8068' => "\x{1F84}", +'8069' => "\x{1F85}", +'8070' => "\x{1F86}", +'8071' => "\x{1F87}", +'8072' => "\x{1F80}", +'8073' => "\x{1F81}", +'8074' => "\x{1F82}", +'8075' => "\x{1F83}", +'8076' => "\x{1F84}", +'8077' => "\x{1F85}", +'8078' => "\x{1F86}", +'8079' => "\x{1F87}", +'8080' => "\x{1F90}", +'8081' => "\x{1F91}", +'8082' => "\x{1F92}", +'8083' => "\x{1F93}", +'8084' => "\x{1F94}", +'8085' => "\x{1F95}", +'8086' => "\x{1F96}", +'8087' => "\x{1F97}", +'8088' => "\x{1F90}", +'8089' => "\x{1F91}", +'8090' => "\x{1F92}", +'8091' => "\x{1F93}", +'8092' => "\x{1F94}", +'8093' => "\x{1F95}", +'8094' => "\x{1F96}", +'8095' => "\x{1F97}", +'8096' => "\x{1FA0}", +'8097' => "\x{1FA1}", +'8098' => "\x{1FA2}", +'8099' => "\x{1FA3}", +'8100' => "\x{1FA4}", +'8101' => "\x{1FA5}", +'8102' => "\x{1FA6}", +'8103' => "\x{1FA7}", +'8104' => "\x{1FA0}", +'8105' => "\x{1FA1}", +'8106' => "\x{1FA2}", +'8107' => "\x{1FA3}", +'8108' => "\x{1FA4}", +'8109' => "\x{1FA5}", +'8110' => "\x{1FA6}", +'8111' => "\x{1FA7}", +'8114' => "\x{1FB2}", +'8115' => "\x{1FB3}", +'8116' => "\x{1FB4}", +'8118' => "\x{1FB6}", +'8119' => "\x{1FB7}", +'8124' => "\x{1FB3}", +'8130' => "\x{1FC2}", +'8131' => "\x{1FC3}", +'8132' => "\x{1FC4}", +'8134' => "\x{1FC6}", +'8135' => "\x{1FC7}", +'8140' => "\x{1FC3}", +'8146' => "\x{1FD2}", +'8147' => "\x{1FD3}", +'8150' => "\x{1FD6}", +'8151' => "\x{1FD7}", +'8162' => "\x{1FE2}", +'8163' => "\x{1FE3}", +'8164' => "\x{1FE4}", +'8166' => "\x{1FE6}", +'8167' => "\x{1FE7}", +'8178' => "\x{1FF2}", +'8179' => "\x{1FF3}", +'8180' => "\x{1FF4}", +'8182' => "\x{1FF6}", +'8183' => "\x{1FF7}", +'8188' => "\x{1FF3}", +'64256' => "\x{FB00}", +'64257' => "\x{FB01}", +'64258' => "\x{FB02}", +'64259' => "\x{FB03}", +'64260' => "\x{FB04}", +'64261' => "\x{FB05}", +'64262' => "\x{FB06}", +'64275' => "\x{FB13}", +'64276' => "\x{FB14}", +'64277' => "\x{FB15}", +'64278' => "\x{FB16}", +'64279' => "\x{FB17}", +); + return <<'END'; 0041 0061 0042 0062 diff --git a/lib/unicore/To/SpecLower.pl b/lib/unicore/To/SpecLower.pl deleted file mode 100644 index 18c073b98b..0000000000 --- a/lib/unicore/To/SpecLower.pl +++ /dev/null @@ -1,107 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. -# Any changes made here will be lost! -return <<'END'; -00DF 00DF -0149 0149 -01F0 01F0 -0390 0390 -03B0 03B0 -0587 0587 -1E96 1E96 -1E97 1E97 -1E98 1E98 -1E99 1E99 -1E9A 1E9A -1F50 1F50 -1F52 1F52 -1F54 1F54 -1F56 1F56 -1F80 1F80 -1F81 1F81 -1F82 1F82 -1F83 1F83 -1F84 1F84 -1F85 1F85 -1F86 1F86 -1F87 1F87 -1F88 1F80 -1F89 1F81 -1F8A 1F82 -1F8B 1F83 -1F8C 1F84 -1F8D 1F85 -1F8E 1F86 -1F8F 1F87 -1F90 1F90 -1F91 1F91 -1F92 1F92 -1F93 1F93 -1F94 1F94 -1F95 1F95 -1F96 1F96 -1F97 1F97 -1F98 1F90 -1F99 1F91 -1F9A 1F92 -1F9B 1F93 -1F9C 1F94 -1F9D 1F95 -1F9E 1F96 -1F9F 1F97 -1FA0 1FA0 -1FA1 1FA1 -1FA2 1FA2 -1FA3 1FA3 -1FA4 1FA4 -1FA5 1FA5 -1FA6 1FA6 -1FA7 1FA7 -1FA8 1FA0 -1FA9 1FA1 -1FAA 1FA2 -1FAB 1FA3 -1FAC 1FA4 -1FAD 1FA5 -1FAE 1FA6 -1FAF 1FA7 -1FB2 1FB2 -1FB3 1FB3 -1FB4 1FB4 -1FB6 1FB6 -1FB7 1FB7 -1FBC 1FB3 -1FC2 1FC2 -1FC3 1FC3 -1FC4 1FC4 -1FC6 1FC6 -1FC7 1FC7 -1FCC 1FC3 -1FD2 1FD2 -1FD3 1FD3 -1FD6 1FD6 -1FD7 1FD7 -1FE2 1FE2 -1FE3 1FE3 -1FE4 1FE4 -1FE6 1FE6 -1FE7 1FE7 -1FF2 1FF2 -1FF3 1FF3 -1FF4 1FF4 -1FF6 1FF6 -1FF7 1FF7 -1FFC 1FF3 -FB00 FB00 -FB01 FB01 -FB02 FB02 -FB03 FB03 -FB04 FB04 -FB05 FB05 -FB06 FB06 -FB13 FB13 -FB14 FB14 -FB15 FB15 -FB16 FB16 -FB17 FB17 -END diff --git a/lib/unicore/To/SpecTitle.pl b/lib/unicore/To/SpecTitle.pl deleted file mode 100644 index c3e1911408..0000000000 --- a/lib/unicore/To/SpecTitle.pl +++ /dev/null @@ -1,106 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. -# Any changes made here will be lost! -return <<'END'; -00DF 0053 0073 -0149 02BC 004E -01F0 004A 030C -0390 0399 0308 0301 -03B0 03A5 0308 0301 -0587 0535 0582 -1E96 0048 0331 -1E97 0054 0308 -1E98 0057 030A -1E99 0059 030A -1E9A 0041 02BE -1F50 03A5 0313 -1F52 03A5 0313 0300 -1F54 03A5 0313 0301 -1F56 03A5 0313 0342 -1F80 1F88 -1F81 1F89 -1F82 1F8A -1F83 1F8B -1F84 1F8C -1F85 1F8D -1F86 1F8E -1F87 1F8F -1F88 1F88 -1F89 1F89 -1F8A 1F8A -1F8B 1F8B -1F8C 1F8C -1F8D 1F8D -1F8E 1F8E -1F8F 1F8F -1F90 1F98 -1F91 1F99 -1F92 1F9A -1F93 1F9B -1F94 1F9C -1F95 1F9D -1F96 1F9E -1F97 1F9F -1F98 1F98 -1F99 1F99 -1F9A 1F9A -1F9B 1F9B -1F9C 1F9C -1F9D 1F9D -1F9E 1F9E -1F9F 1F9F -1FA0 1FA8 -1FA1 1FA9 -1FA2 1FAA -1FA3 1FAB -1FA4 1FAC -1FA5 1FAD -1FA6 1FAE -1FA7 1FAF -1FA8 1FA8 -1FA9 1FA9 -1FAA 1FAA -1FAB 1FAB -1FAC 1FAC -1FAD 1FAD -1FAE 1FAE -1FAF 1FAF -1FB2 1FBA 0345 -1FB3 1FBC -1FB4 0386 0345 -1FB6 0391 0342 -1FB7 0391 0342 0345 -1FBC 1FBC -1FC2 1FCA 0345 -1FC3 1FCC -1FC4 0389 0345 -1FC6 0397 0342 -1FC7 0397 0342 0345 -1FCC 1FCC -1FD2 0399 0308 0300 -1FD3 0399 0308 0301 -1FD6 0399 0342 -1FD7 0399 0308 0342 -1FE2 03A5 0308 0300 -1FE3 03A5 0308 0301 -1FE4 03A1 0313 -1FE6 03A5 0342 -1FE7 03A5 0308 0342 -1FF2 1FFA 0345 -1FF3 1FFC -1FF4 038F 0345 -1FF6 03A9 0342 -1FF7 03A9 0342 0345 -1FFC 1FFC -FB00 0046 0066 -FB01 0046 0069 -FB02 0046 006C -FB03 0046 0066 0069 -FB04 0046 0066 006C -FB05 FB06 0053 0074 -FB13 0544 0576 -FB14 0544 0565 -FB15 0544 056B -FB16 054E 0576 -FB17 0544 056D -END diff --git a/lib/unicore/To/SpecUpper.pl b/lib/unicore/To/SpecUpper.pl deleted file mode 100644 index e5af4b1089..0000000000 --- a/lib/unicore/To/SpecUpper.pl +++ /dev/null @@ -1,106 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. -# Any changes made here will be lost! -return <<'END'; -00DF 0053 0053 -0149 02BC 004E -01F0 004A 030C -0390 0399 0308 0301 -03B0 03A5 0308 0301 -0587 0535 0552 -1E96 0048 0331 -1E97 0054 0308 -1E98 0057 030A -1E99 0059 030A -1E9A 0041 02BE -1F50 03A5 0313 -1F52 03A5 0313 0300 -1F54 03A5 0313 0301 -1F56 03A5 0313 0342 -1F80 1F08 0399 -1F81 1F09 0399 -1F82 1F0A 0399 -1F83 1F0B 0399 -1F84 1F0C 0399 -1F85 1F0D 0399 -1F86 1F0E 0399 -1F87 1F0F 0399 -1F88 1F08 0399 -1F89 1F09 0399 -1F8A 1F0A 0399 -1F8B 1F0B 0399 -1F8C 1F0C 0399 -1F8D 1F0D 0399 -1F8E 1F0E 0399 -1F8F 1F0F 0399 -1F90 1F28 0399 -1F91 1F29 0399 -1F92 1F2A 0399 -1F93 1F2B 0399 -1F94 1F2C 0399 -1F95 1F2D 0399 -1F96 1F2E 0399 -1F97 1F2F 0399 -1F98 1F28 0399 -1F99 1F29 0399 -1F9A 1F2A 0399 -1F9B 1F2B 0399 -1F9C 1F2C 0399 -1F9D 1F2D 0399 -1F9E 1F2E 0399 -1F9F 1F2F 0399 -1FA0 1F68 0399 -1FA1 1F69 0399 -1FA2 1F6A 0399 -1FA3 1F6B 0399 -1FA4 1F6C 0399 -1FA5 1F6D 0399 -1FA6 1F6E 0399 -1FA7 1F6F 0399 -1FA8 1F68 0399 -1FA9 1F69 0399 -1FAA 1F6A 0399 -1FAB 1F6B 0399 -1FAC 1F6C 0399 -1FAD 1F6D 0399 -1FAE 1F6E 0399 -1FAF 1F6F 0399 -1FB2 1FBA 0399 -1FB3 0391 0399 -1FB4 0386 0399 -1FB6 0391 0342 -1FB7 0391 0342 0399 -1FBC 0391 0399 -1FC2 1FCA 0399 -1FC3 0397 0399 -1FC4 0389 0399 -1FC6 0397 0342 -1FC7 0397 0342 0399 -1FCC 0397 0399 -1FD2 0399 0308 0300 -1FD3 0399 0308 0301 -1FD6 0399 0342 -1FD7 0399 0308 0342 -1FE2 03A5 0308 0300 -1FE3 03A5 0308 0301 -1FE4 03A1 0313 -1FE6 03A5 0342 -1FE7 03A5 0308 0342 -1FF2 1FFA 0399 -1FF3 03A9 0399 -1FF4 038F 0399 -1FF6 03A9 0342 -1FF7 03A9 0342 0399 -1FFC 03A9 0399 -FB00 0046 0046 -FB01 0046 0049 -FB02 0046 004C -FB03 0046 0046 0049 -FB04 0046 0046 004C -FB05 FB06 0053 0054 -FB13 0544 0546 -FB14 0544 0535 -FB15 0544 053B -FB16 054E 0546 -FB17 0544 053D -END diff --git a/lib/unicore/To/Title.pl b/lib/unicore/To/Title.pl index 2fca3533c8..3da9ca96ad 100644 --- a/lib/unicore/To/Title.pl +++ b/lib/unicore/To/Title.pl @@ -1,6 +1,112 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! + +%utf8::ToSpecTitle = ( +'223' => "\x{0053}\x{0073}", +'329' => "\x{02BC}\x{004E}", +'496' => "\x{004A}\x{030C}", +'912' => "\x{0399}\x{0308}\x{0301}", +'944' => "\x{03A5}\x{0308}\x{0301}", +'1415' => "\x{0535}\x{0582}", +'7830' => "\x{0048}\x{0331}", +'7831' => "\x{0054}\x{0308}", +'7832' => "\x{0057}\x{030A}", +'7833' => "\x{0059}\x{030A}", +'7834' => "\x{0041}\x{02BE}", +'8016' => "\x{03A5}\x{0313}", +'8018' => "\x{03A5}\x{0313}\x{0300}", +'8020' => "\x{03A5}\x{0313}\x{0301}", +'8022' => "\x{03A5}\x{0313}\x{0342}", +'8064' => "\x{1F88}", +'8065' => "\x{1F89}", +'8066' => "\x{1F8A}", +'8067' => "\x{1F8B}", +'8068' => "\x{1F8C}", +'8069' => "\x{1F8D}", +'8070' => "\x{1F8E}", +'8071' => "\x{1F8F}", +'8072' => "\x{1F88}", +'8073' => "\x{1F89}", +'8074' => "\x{1F8A}", +'8075' => "\x{1F8B}", +'8076' => "\x{1F8C}", +'8077' => "\x{1F8D}", +'8078' => "\x{1F8E}", +'8079' => "\x{1F8F}", +'8080' => "\x{1F98}", +'8081' => "\x{1F99}", +'8082' => "\x{1F9A}", +'8083' => "\x{1F9B}", +'8084' => "\x{1F9C}", +'8085' => "\x{1F9D}", +'8086' => "\x{1F9E}", +'8087' => "\x{1F9F}", +'8088' => "\x{1F98}", +'8089' => "\x{1F99}", +'8090' => "\x{1F9A}", +'8091' => "\x{1F9B}", +'8092' => "\x{1F9C}", +'8093' => "\x{1F9D}", +'8094' => "\x{1F9E}", +'8095' => "\x{1F9F}", +'8096' => "\x{1FA8}", +'8097' => "\x{1FA9}", +'8098' => "\x{1FAA}", +'8099' => "\x{1FAB}", +'8100' => "\x{1FAC}", +'8101' => "\x{1FAD}", +'8102' => "\x{1FAE}", +'8103' => "\x{1FAF}", +'8104' => "\x{1FA8}", +'8105' => "\x{1FA9}", +'8106' => "\x{1FAA}", +'8107' => "\x{1FAB}", +'8108' => "\x{1FAC}", +'8109' => "\x{1FAD}", +'8110' => "\x{1FAE}", +'8111' => "\x{1FAF}", +'8114' => "\x{1FBA}\x{0345}", +'8115' => "\x{1FBC}", +'8116' => "\x{0386}\x{0345}", +'8118' => "\x{0391}\x{0342}", +'8119' => "\x{0391}\x{0342}\x{0345}", +'8124' => "\x{1FBC}", +'8130' => "\x{1FCA}\x{0345}", +'8131' => "\x{1FCC}", +'8132' => "\x{0389}\x{0345}", +'8134' => "\x{0397}\x{0342}", +'8135' => "\x{0397}\x{0342}\x{0345}", +'8140' => "\x{1FCC}", +'8146' => "\x{0399}\x{0308}\x{0300}", +'8147' => "\x{0399}\x{0308}\x{0301}", +'8150' => "\x{0399}\x{0342}", +'8151' => "\x{0399}\x{0308}\x{0342}", +'8162' => "\x{03A5}\x{0308}\x{0300}", +'8163' => "\x{03A5}\x{0308}\x{0301}", +'8164' => "\x{03A1}\x{0313}", +'8166' => "\x{03A5}\x{0342}", +'8167' => "\x{03A5}\x{0308}\x{0342}", +'8178' => "\x{1FFA}\x{0345}", +'8179' => "\x{1FFC}", +'8180' => "\x{038F}\x{0345}", +'8182' => "\x{03A9}\x{0342}", +'8183' => "\x{03A9}\x{0342}\x{0345}", +'8188' => "\x{1FFC}", +'64256' => "\x{0046}\x{0066}", +'64257' => "\x{0046}\x{0069}", +'64258' => "\x{0046}\x{006C}", +'64259' => "\x{0046}\x{0066}\x{0069}", +'64260' => "\x{0046}\x{0066}\x{006C}", +'64261' => "\x{0053}\x{0074}", +'64262' => "\x{0053}\x{0074}", +'64275' => "\x{0544}\x{0576}", +'64276' => "\x{0544}\x{0565}", +'64277' => "\x{0544}\x{056B}", +'64278' => "\x{054E}\x{0576}", +'64279' => "\x{0544}\x{056D}", +); + return <<'END'; 0061 0041 0062 0042 diff --git a/lib/unicore/To/Upper.pl b/lib/unicore/To/Upper.pl index bfdd4eadc5..a9c7a9f734 100644 --- a/lib/unicore/To/Upper.pl +++ b/lib/unicore/To/Upper.pl @@ -1,6 +1,112 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is built by mktables from e.g. Unicode.txt. # Any changes made here will be lost! + +%utf8::ToSpecUpper = ( +'223' => "\x{0053}\x{0053}", +'329' => "\x{02BC}\x{004E}", +'496' => "\x{004A}\x{030C}", +'912' => "\x{0399}\x{0308}\x{0301}", +'944' => "\x{03A5}\x{0308}\x{0301}", +'1415' => "\x{0535}\x{0552}", +'7830' => "\x{0048}\x{0331}", +'7831' => "\x{0054}\x{0308}", +'7832' => "\x{0057}\x{030A}", +'7833' => "\x{0059}\x{030A}", +'7834' => "\x{0041}\x{02BE}", +'8016' => "\x{03A5}\x{0313}", +'8018' => "\x{03A5}\x{0313}\x{0300}", +'8020' => "\x{03A5}\x{0313}\x{0301}", +'8022' => "\x{03A5}\x{0313}\x{0342}", +'8064' => "\x{1F08}\x{0399}", +'8065' => "\x{1F09}\x{0399}", +'8066' => "\x{1F0A}\x{0399}", +'8067' => "\x{1F0B}\x{0399}", +'8068' => "\x{1F0C}\x{0399}", +'8069' => "\x{1F0D}\x{0399}", +'8070' => "\x{1F0E}\x{0399}", +'8071' => "\x{1F0F}\x{0399}", +'8072' => "\x{1F08}\x{0399}", +'8073' => "\x{1F09}\x{0399}", +'8074' => "\x{1F0A}\x{0399}", +'8075' => "\x{1F0B}\x{0399}", +'8076' => "\x{1F0C}\x{0399}", +'8077' => "\x{1F0D}\x{0399}", +'8078' => "\x{1F0E}\x{0399}", +'8079' => "\x{1F0F}\x{0399}", +'8080' => "\x{1F28}\x{0399}", +'8081' => "\x{1F29}\x{0399}", +'8082' => "\x{1F2A}\x{0399}", +'8083' => "\x{1F2B}\x{0399}", +'8084' => "\x{1F2C}\x{0399}", +'8085' => "\x{1F2D}\x{0399}", +'8086' => "\x{1F2E}\x{0399}", +'8087' => "\x{1F2F}\x{0399}", +'8088' => "\x{1F28}\x{0399}", +'8089' => "\x{1F29}\x{0399}", +'8090' => "\x{1F2A}\x{0399}", +'8091' => "\x{1F2B}\x{0399}", +'8092' => "\x{1F2C}\x{0399}", +'8093' => "\x{1F2D}\x{0399}", +'8094' => "\x{1F2E}\x{0399}", +'8095' => "\x{1F2F}\x{0399}", +'8096' => "\x{1F68}\x{0399}", +'8097' => "\x{1F69}\x{0399}", +'8098' => "\x{1F6A}\x{0399}", +'8099' => "\x{1F6B}\x{0399}", +'8100' => "\x{1F6C}\x{0399}", +'8101' => "\x{1F6D}\x{0399}", +'8102' => "\x{1F6E}\x{0399}", +'8103' => "\x{1F6F}\x{0399}", +'8104' => "\x{1F68}\x{0399}", +'8105' => "\x{1F69}\x{0399}", +'8106' => "\x{1F6A}\x{0399}", +'8107' => "\x{1F6B}\x{0399}", +'8108' => "\x{1F6C}\x{0399}", +'8109' => "\x{1F6D}\x{0399}", +'8110' => "\x{1F6E}\x{0399}", +'8111' => "\x{1F6F}\x{0399}", +'8114' => "\x{1FBA}\x{0399}", +'8115' => "\x{0391}\x{0399}", +'8116' => "\x{0386}\x{0399}", +'8118' => "\x{0391}\x{0342}", +'8119' => "\x{0391}\x{0342}\x{0399}", +'8124' => "\x{0391}\x{0399}", +'8130' => "\x{1FCA}\x{0399}", +'8131' => "\x{0397}\x{0399}", +'8132' => "\x{0389}\x{0399}", +'8134' => "\x{0397}\x{0342}", +'8135' => "\x{0397}\x{0342}\x{0399}", +'8140' => "\x{0397}\x{0399}", +'8146' => "\x{0399}\x{0308}\x{0300}", +'8147' => "\x{0399}\x{0308}\x{0301}", +'8150' => "\x{0399}\x{0342}", +'8151' => "\x{0399}\x{0308}\x{0342}", +'8162' => "\x{03A5}\x{0308}\x{0300}", +'8163' => "\x{03A5}\x{0308}\x{0301}", +'8164' => "\x{03A1}\x{0313}", +'8166' => "\x{03A5}\x{0342}", +'8167' => "\x{03A5}\x{0308}\x{0342}", +'8178' => "\x{1FFA}\x{0399}", +'8179' => "\x{03A9}\x{0399}", +'8180' => "\x{038F}\x{0399}", +'8182' => "\x{03A9}\x{0342}", +'8183' => "\x{03A9}\x{0342}\x{0399}", +'8188' => "\x{03A9}\x{0399}", +'64256' => "\x{0046}\x{0046}", +'64257' => "\x{0046}\x{0049}", +'64258' => "\x{0046}\x{004C}", +'64259' => "\x{0046}\x{0046}\x{0049}", +'64260' => "\x{0046}\x{0046}\x{004C}", +'64261' => "\x{0053}\x{0054}", +'64262' => "\x{0053}\x{0054}", +'64275' => "\x{0544}\x{0546}", +'64276' => "\x{0544}\x{0535}", +'64277' => "\x{0544}\x{053B}", +'64278' => "\x{054E}\x{0546}", +'64279' => "\x{0544}\x{053D}", +); + return <<'END'; 0061 0041 0062 0042 diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 3328f69326..7d8912d977 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -745,16 +745,32 @@ if (open(my $SpecCase, "SpecCase.txt")) { } # Now write out the special cases properties in their code point order. -# The To/Spec{Lower,Title,Upper}.pl are unused for now since the swash -# routines do not do returning multiple characters. +# Prepend them to the To/{Upper,Lower,Title}.pl. for my $case (qw(Lower Title Upper)) { - my @case; - for my $prop (sort { $a->[0] <=> $b->[0] } @{$Case{$case}}) { - my ($ix, $code, $to) = @$prop; - append(\@case, $code, $to); + my $NormalCase = do "To/$case.pl"; + if (open(my $Case, ">To/$case.pl")) { + header($Case); + print $Case <<EOT; + +%utf8::ToSpec$case = ( +EOT + for my $prop (sort { $a->[0] <=> $b->[0] } @{$Case{$case}}) { + my ($ix, $code, $to) = @$prop; + my $tostr = + join "", map { sprintf "\\x{%s}", $_ } split ' ', $to; + print $Case qq['$ix' => "$tostr",\n]; + } + print $Case <<EOT; +); + +EOT + begin($Case); + print $Case $NormalCase; + end($Case); + } else { + die "$0: To/$case.txt: $!\n"; } - flush(\@case, "To/Spec$case.pl"); } # That's all, folks! diff --git a/patchlevel.h b/patchlevel.h index d62ee798fb..cdce27c291 100644 --- a/patchlevel.h +++ b/patchlevel.h @@ -70,7 +70,7 @@ #if !defined(PERL_PATCHLEVEL_H_IMPLICIT) && !defined(LOCAL_PATCH_COUNT) static char *local_patches[] = { NULL - ,"DEVEL12535" + ,"DEVEL12566" ,NULL }; diff --git a/pod/buildtoc.PL b/pod/buildtoc.PL index 5d789624db..bb6d0d3ad0 100644 --- a/pod/buildtoc.PL +++ b/pod/buildtoc.PL @@ -109,6 +109,7 @@ if (-d "pod") { perldsc perlrequick perlpod + perlpodspec perlstyle perltrap diff --git a/pod/perl.pod b/pod/perl.pod index 9d585b5d61..8495648083 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -63,6 +63,7 @@ For ease of access, the Perl manual has been split up into several sections. perlfunc Perl built-in functions perlopentut Perl open() tutorial perlpod Perl plain old documentation + perlpodspec Perl plain old documentation format specification perlrun Perl execution and options perldiag Perl diagnostic messages perllexwarn Perl warnings and their control diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index 6d94459817..ca48470c3a 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -2330,7 +2330,7 @@ C<redo> work. Returns an lowercased version of EXPR. This is the internal function implementing the C<\L> escape in double-quoted strings. Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale> -and L<perlunicode>. +and L<perlunicode> for more details about locale and Unicode support. If EXPR is omitted, uses C<$_>. @@ -2341,7 +2341,8 @@ If EXPR is omitted, uses C<$_>. Returns the value of EXPR with the first character lowercased. This is the internal function implementing the C<\l> escape in double-quoted strings. Respects current LC_CTYPE locale if C<use -locale> in force. See L<perllocale> and L<perlunicode>. +locale> in force. See L<perllocale> and L<perlunicode> for more +details about locale and Unicode support. If EXPR is omitted, uses C<$_>. @@ -5464,8 +5465,9 @@ otherwise. Returns an uppercased version of EXPR. This is the internal function implementing the C<\U> escape in double-quoted strings. Respects current LC_CTYPE locale if C<use locale> in force. See L<perllocale> -and L<perlunicode>. It does not attempt to do titlecase mapping on -initial letters. See C<ucfirst> for that. +and L<perlunicode> for more details about locale and Unicode support. +It does not attempt to do titlecase mapping on initial letters. See +C<ucfirst> for that. If EXPR is omitted, uses C<$_>. @@ -5476,7 +5478,8 @@ If EXPR is omitted, uses C<$_>. Returns the value of EXPR with the first character in uppercase (titlecase in Unicode). This is the internal function implementing the C<\u> escape in double-quoted strings. Respects current LC_CTYPE -locale if C<use locale> in force. See L<perllocale> and L<perlunicode>. +locale if C<use locale> in force. See L<perllocale> and L<perlunicode> +for more details about locale and Unicode support. If EXPR is omitted, uses C<$_>. diff --git a/pod/perlpod.pod b/pod/perlpod.pod index 765266b9ea..91cc81ac84 100644 --- a/pod/perlpod.pod +++ b/pod/perlpod.pod @@ -1,325 +1,685 @@ + +=for comment +This document is in Pod format. To read this, use a Pod formatter, +like "perldoc perlpod". + =head1 NAME -perlpod - plain old documentation +perlpod - the Plain Old Documentation format =head1 DESCRIPTION -A pod-to-whatever translator reads a pod file paragraph by paragraph, -and translates it to the appropriate output format. There are -three kinds of paragraphs: -L<verbatim|/"Verbatim Paragraph">, -L<command|/"Command Paragraph">, and -L<ordinary text|/"Ordinary Block of Text">. +Pod is a simple-to-use markup language used for writing documentation +for Perl, Perl programs, and Perl modules. + +Translators are available for converting Pod to various formats +like plain text, HTML, man pages, and more. + +Pod markup consists of three basic kinds of paragraphs: +L<ordinary|/"Ordinary Paragraph">, +L<verbatim|/"Verbatim Paragraph">, and +L<command|/"Command Paragraph">. + + +=head2 Ordinary Paragraph + +Most paragraphs in your documentation will be ordinary blocks +of text, like this one. You can simply type in your text without +any markup whatsoever, and with just a blank line before and +after. When it gets formatted, it will undergo minimal formatting, +like being rewrapped, probably put into a proportionally spaced +font, and maybe even justified. + +You can use formatting codes in ordinary paragraphs, for B<bold>, +I<italic>, C<code-style>, L<hyperlinks|perlfaq>, and more. Such +codes are explained in the "L<Formatting Codes|/"Formatting Codes">" +section, below. + =head2 Verbatim Paragraph -A verbatim paragraph, distinguished by being indented (that is, -it starts with space or tab). It should be reproduced exactly, -with tabs assumed to be on 8-column boundaries. There are no -special formatting escapes, so you can't italicize or anything -like that. A \ means \, and nothing else. +Verbatim paragraphs are usually used for presenting a codeblock or +other text which does not require any special parsing or formatting, +and which shouldn't be wrapped. + +A verbatim paragraph is distinguished by having its first character +be a space or a tab. (And commonly, all its lines begin with spaces +and/or tabs.) It should be reproduced exactly, with tabs assumed to +be on 8-column boundaries. There are no special formatting codes, +so you can't italicize or anything like that. A \ means \, and +nothing else. + =head2 Command Paragraph -All command paragraphs start with "=", followed by an -identifier, followed by arbitrary text that the command can -use however it pleases. Currently recognized commands are +A command paragraph is used for special treatment of whole chunks +of text, usually as headings or parts of lists. + +All command paragraphs (which are typically only one line long) start +with "=", followed by an identifier, followed by arbitrary text that +the command can use however it pleases. Currently recognized commands +are - =head1 heading - =head2 heading - =head3 heading - =head4 heading - =item text - =over N + =head1 Heading Text + =head2 Heading Text + =head3 Heading Text + =head4 Heading Text + =over indentlevel + =item stuff =back =cut =pod - =for X - =begin X - =end X + =begin format + =end format + =for format text... + +To explain them each in detail: + +=over + +=item C<=head1 I<Heading Text>> -=over 4 +=item C<=head2 I<Heading Text>> -=item =pod +=item C<=head3 I<Heading Text>> -=item =cut +=item C<=head4 I<Heading Text>> -The "=pod" directive does nothing beyond telling the compiler to lay -off parsing code through the next "=cut". It's useful for adding -another paragraph to the doc if you're mixing up code and pod a lot. +Head1 through head4 produce headings, head1 being the highest +level. The text in the rest of this paragraph is the content of the +heading. For example: -=item =head1 + =head2 Object Attributes -=item =head2 +The text "Object Attributes" comprises the heading there. (Note that +head3 and head4 are recent additions, not supported in older Pod +translators.) The text in these heading commands can use +formatting codes, as seen here: -=item =head3 + =head2 Possible Values for C<$/> -=item =head4 +Such commands are explained in the +"L<Formatting Codes|/"Formatting Codes">" section, below. -Head1, head2, head3 and head4 produce first, second, third and fourth -level headings, with the text in the same paragraph as the "=headn" -directive forming the heading description. +=item C<=over I<indentlevel>> -=item =over +=item C<=item I<stuff...>> -=item =back +=item C<=back> -=item =item +Item, over, and back require a little more explanation: "=over" starts +a region specifically for the generation of a list using "=item" +commands, or for indenting (groups of) normal paragraphs. At the end +of your list, use "=back" to end it. The I<indentlevel> option to +"=over" indicates how far over to indent, generally in ems (where +one em is the width of an "M" in the document's base font) or roughly +comparable units; if there is no I<indentlevel> option, it defaults +to four. (And some formatters may just ignore whatever I<indentlevel> +you provide.) In the I<stuff> in C<=item I<stuff...>>, you may +use formatting codes, as seen here: -Item, over, and back require a little more explanation: "=over" starts a -section specifically for the generation of a list using "=item" commands. At -the end of your list, use "=back" to end it. You will probably want to give -"4" as the number to "=over", as some formatters will use this for indentation. -The unit of indentation is optional. If the unit is not given the natural -indentation of the formatting system applied will be used. Note also that -there are some basic rules to using =item: don't use them outside of -an =over/=back block, use at least one inside an =over/=back block, you don't -_have_ to include the =back if the list just runs off the document, and -perhaps most importantly, keep the items consistent: either use "=item *" for -all of them, to produce bullets, or use "=item 1.", "=item 2.", etc., to -produce numbered lists, or use "=item foo", "=item bar", etc., i.e., things -that looks nothing like bullets or numbers. If you start with bullets or -numbers, stick with them, as many formatters use the first "=item" type to -decide how to format the list. + =item Using C<$|> to Control Buffering -=item =for +Such commands are explained in the +"L<Formatting Codes|/"Formatting Codes">" section, below. -=item =begin +Note also that there are some basic rules to using "=over" ... +"=back" regions: -=item =end +=over -For, begin, and end let you include sections that are not interpreted -as pod text, but passed directly to particular formatters. A formatter -that can utilize that format will use the section, otherwise it will be -completely ignored. The directive "=for" specifies that the entire next -paragraph is in the format indicated by the first word after -"=for", like this: +=item * + +Don't use "=item"s outside of an "=over" ... "=back" region. + +=item * - =for html <br> +The first thing after the "=over" command should be an "=item", unless +there aren't going to be any items at all in this "=over" ... "=back" +region. + +=item * + +Don't put "=headI<n>" commands inside an "=over" ... "=back" region. + +=item * + +And perhaps most importantly, keep the items consistent: either use +"=item *" for all of them, to produce bullets; or use "=item 1.", +"=item 2.", etc., to produce numbered lists; or use "=item foo", +"=item bar", etc. -- namely, things that look nothing like bullets or +numbers. + +If you start with bullets or numbers, stick with them, as +formatters use the first "=item" type to decide how to format the +list. + +=back + +=item C<=cut> + +To end a Pod block, use a blank line, +then a line beginning with "=cut", and a blank +line after it. This lets Perl (and the Pod formatter) know that +this is where Perl code is resuming. (The blank line before the "=cut" +is not technically necessary, but many older Pod processors require it.) + +=item C<=pod> + +The "=pod" command by itself doesn't do much of anything, but it +signals to Perl (and Pod formatters) that a Pod block starts here. A +Pod block starts with I<any> command paragraph, so a "=pod" command is +usually used just when you want to start a Pod block with an ordinary +paragraph or a verbatim paragraph. For example: + + =item stuff() + + This function does stuff. + + =cut + + sub stuff { + ... + } + + =pod + + Remember to check its return value, as in: + + stuff() || die "Couldn't do stufF!"; + + =cut + +=item C<=begin I<formatname>> + +=item C<=end I<formatname>> + +=item C<=for I<formatname> I<text...>> + +For, begin, and end will let you have regions of text/code/data that +are not generally interpreted as normal Pod text, but are passed +directly to particular formatters, or are otherwise special. A +formatter that can use that format will use the region, otherwise it +will be completely ignored. + +A command "=begin I<formatname>", some paragraphs, and a +command "=end I<formatname>", mean that the text/data inbetween +is meant for formatters that understand the special format +called I<formatname>. For example, + + =begin html + + <hr> <img src="thang.png"> <p> This is a raw HTML paragraph </p> + + =end html + +The command "=for I<formatname> I<text...>" +specifies that the remainder of just this paragraph (starting +right after I<formatname>) is in that special format. + + =for html <hr> <img src="thang.png"> + <p> This is a raw HTML paragraph </p> + +This means the same thing as the above "=begin html" ... "=end html" +region. -The paired commands "=begin" and "=end" work very similarly to "=for", but -instead of only accepting a single paragraph, all text from "=begin" to a -paragraph with a matching "=end" are treated as a particular format. +That is, with "=for", you can have only one paragraph's worth +of text (i.e., the text in "=foo targetname text..."), but with +"=begin targetname" ... "=end targetname", you can have any amount +of stuff inbetween. (Note that there still must be a blank line +after the "=begin" command and a blank line before the "=end" +command. Here are some examples of how to use these: - =begin html + =begin html + + <br>Figure 1.<br><IMG SRC="figure1.png"><br> + + =end html + + =begin text + + --------------- + | foo | + | bar | + --------------- - <br>Figure 1.<IMG SRC="figure1.png"><br> + ^^^^ Figure 1. ^^^^ - =end html + =end text - =begin text +Some format names that formatters currently are known to accept +include "roff", "man", "latex", "tex", "text", and "html". (Some +formatters will treat some of these as synonyms.) - --------------- - | foo | - | bar | - --------------- +A format name of "comment" is common for just making notes (presumably +to yourself) that won't appear in any formatted version of the Pod +document: - ^^^^ Figure 1. ^^^^ + =for comment + Make sure that all the available options are documented! - =end text +Some I<formatnames> will require a leading colon (as in +C<"=for :formatname">, or +C<"=begin :formatname" ... "=end :formatname">), +to signal that the text is not raw data, but instead I<is> Pod text +(i.e., possibly containing formatting codes) that's just not for +normal formatting (e.g., may not be a normal-use paragraph, but might +be for formatting as a footnote). -Some format names that formatters currently are known to accept include -"roff", "man", "latex", "tex", "text", and "html". (Some formatters will -treat some of these as synonyms.) +=back -And don't forget, when using any command, that the command lasts up until -the end of the B<paragraph>, not the line. Hence in the examples below, you -can see the empty lines after each command to end its paragraph. +And don't forget, when using any command, that the command lasts up +until the end of its I<paragraph>, not its line. So in the +examples below, you can see that every command needs the blank +line after it, to end its paragraph. Some examples of lists include: - =over 4 + =over + + =item * + + First item + + =item * + + Second item + + =back + + =over + + =item Foo() + + Description of Foo function + + =item Bar() - =item * + Description of Bar function - First item + =back - =item * - Second item +=head2 Formatting Codes - =back +In ordinary paragraphs and in some command paragraphs, various +formatting codes (a.k.a. "interior sequences") can be used: - =over 4 +=for comment + "interior sequences" is such an opaque term. + Prefer "formatting codes" instead. - =item Foo() +=over - Description of Foo function +=item C<IE<lt>textE<gt>> -- italic text - =item Bar() +Used for emphasis ("C<be IE<lt>careful!E<gt>>") and parameters +("C<redo IE<lt>LABELE<gt>>") + +=item C<BE<lt>textE<gt>> -- bold text + +Used for switches ("C<perl's BE<lt>-nE<gt> switch>"), programs +("C<some systems provide a BE<lt>chfnE<gt> for that>"), +emphasis ("C<be BE<lt>careful!E<gt>>"), and so on +("C<and that feature is known as BE<lt>autovivificationE<gt>>"). + +=item C<CE<lt>codeE<gt>> -- code text + +Renders code in a typewriter font, or gives some other indication that +this represents program text ("C<CE<lt>gmtime($^T)E<gt>>") or some other +form of computerese ("C<CE<lt>drwxr-xr-xE<gt>>"). + +=item C<LE<lt>nameE<gt>> -- a hyperlink + +There are various syntaxes, listed below. In the syntaxes given, +C<text>, C<name>, and C<section> cannot contain the characters +'/' and '|'; and any '<' or '>' should be matched. + +=over + +=item * - Description of Bar function +C<LE<lt>nameE<gt>> - =back +Link to a Perl manual page (e.g., C<LE<lt>Net::PingE<gt>>). Note +that C<name> should not contain spaces. This syntax +is also occasionally used for references to UNIX man pages, as in +C<LE<lt>crontab(5)E<gt>>. + +=item * + +C<LE<lt>name/"sec"E<gt>> or C<LE<lt>name/secE<gt>> + +Link to a section in other manual page. E.g., +C<LE<lt>perlsyn/"For Loops"E<gt>> + +=item * + +C<LE<lt>/"sec"E<gt>> or C<LE<lt>/secE<gt>> or C<LE<lt>"sec"E<gt>> + +Link to a section in this manual page. E.g., +C<LE<lt>/"Object Methods"E<gt>> =back -=head2 Ordinary Block of Text - -It will be filled, and maybe even -justified. Certain interior sequences are recognized both -here and in commands: - - I<text> Italicize text, used for emphasis or variables - B<text> Embolden text, used for switches and programs - S<text> Text contains non-breaking spaces - C<code> Render code in a typewriter font, or give some other - indication that this represents program text - L<name> A link (cross reference) to name - L<name> manual page - L<name/ident> item in manual page - L<name/"sec"> section in other manual page - L<"sec"> section in this manual page - (the quotes are optional) - L</"sec"> ditto - same as above but only 'text' is used for output. - (Text can not contain the characters '/' and '|', - and should contain matched '<' or '>') - L<text|name> - L<text|name/ident> - L<text|name/"sec"> - L<text|"sec"> - L<text|/"sec"> - - F<file> Used for filenames - X<index> An index entry - Z<> A zero-width character - E<escape> A named character (very similar to HTML escapes) - E<lt> A literal < - E<gt> A literal > - E<sol> A literal / - E<verbar> A literal | - (these are optional except in other interior - sequences and when preceded by a capital letter) - E<n> Character number n (probably in ASCII) - E<html> Some non-numeric HTML entity, such - as E<Agrave> - -Most of the time, you will only need a single set of angle brackets to -delimit the beginning and end of interior sequences. However, sometimes -you will want to put a right angle bracket (or greater-than sign '>') -inside of a sequence. This is particularly common when using a sequence -to provide a different font-type for a snippet of code. As with all -things in Perl, there is more than one way to do it. One way is to -simply escape the closing bracket using an C<E> sequence: +A section is started by the named heading or item. For +example, C<LE<lt>perlvar/$.E<gt>> or C<LE<lt>perlvar/"$."E<gt>> both +link to the section started by "C<=item $.>" in perlvar. And +C<LE<lt>perlsyn/For LoopsE<gt>> or C<LE<lt>perlsyn/"For Loops"E<gt>> +both link to the section started by "C<=head2 For Loops>" +in perlsyn. + +To control what text is used for display, you +use "C<LE<lt>text|...E<gt>>", as in: + +=over + +=item * + +C<LE<lt>text|nameE<gt>> + +Link this text to that manual page. E.g., +C<LE<lt>Perl Error Messages|perldiagE<gt>> + +=item * + +C<LE<lt>text|name/"sec"E<gt>> or C<LE<lt>text|name/secE<gt>> + +Link this text to that section in that manual page. E.g., +C<LE<lt>SWITCH statements|perlsyn/"Basic BLOCKs and Switch +Statements"E<gt>> + +=item * + +C<LE<lt>text|/"sec"E<gt>> or C<LE<lt>text|/secE<gt>> +or C<LE<lt>text|"sec"E<gt>> + +Link this text to that section in this manual page. E.g., +C<LE<lt>the various attributes|/"Member Data"E<gt>> + +=back + +Or you can link to a web page: + +=over + +=item * + +C<LE<lt>scheme:...E<gt>> + +Links to an absolute URL. For example, +C<LE<lt>http://www.perl.org/E<gt>>. But note +that there is no corresponding C<LE<lt>text|scheme:...E<gt>> syntax, for +various reasons. + +=back + +=item C<EE<lt>escapeE<gt>> -- a character escape + +Very similar to HTML/XML C<&I<foo>;> "entity references": + +=over + +=item * + +C<EE<lt>ltE<gt>> -- a literal E<lt> (less than) + +=item * + +C<EE<lt>gtE<gt>> -- a literal E<gt> (greater than) + +=item * + +C<EE<lt>verbarE<gt>> -- a literal | (I<ver>tical I<bar>) + +=item * + +C<EE<lt>solE<gt>> = a literal / (I<sol>idus) + +The above four are optional except in other formatting codes, +notably C<LE<lt>...E<gt>>, and when preceded by a +capital letter. + +=item * + +C<EE<lt>htmlnameE<gt>> + +Some non-numeric HTML entity name, such as C<EE<lt>eacuteE<gt>>, +meaning the same thing as C<é> in HTML -- i.e., a lowercase +e with an acute (/-shaped) accent. + +=item * + +C<EE<lt>numberE<gt>> + +The ASCII/Latin-1/Unicode character with that number. A +leading "0x" means that I<number> is hex, as in +C<EE<lt>0x201EE<gt>>. A leading "0" means that I<number> is octal, +as in C<EE<lt>075E<gt>>. Otherwise I<number> is interpreted as being +in decimal, as in C<EE<lt>181E<gt>>. + +Note that older Pod formatters might not recognize octal or +hex numeric escapes, and that many formatters cannot reliably +render characters above 255. (Some formatters may even have +to use compromised renderings of Latin-1 characters, like +rendering C<EE<lt>eacuteE<gt>> as just a plain "e".) + +=back + +=item C<FE<lt>filenameE<gt>> -- used for filenames + +Typically displayed in italics. Example: "C<FE<lt>.cshrcE<gt>>" + +=item C<SE<lt>textE<gt>> -- text contains non-breaking spaces + +This means that the words in I<text> should not be broken +across lines. Example: S<C<SE<lt>$x ? $y : $zE<gt>>>. + +=item C<XE<lt>topic nameE<gt>> -- an index entry + +This is ignored by most formatters, but some may use it for building +indexes. It always renders as empty-string. +Example: C<XE<lt>absolutizing relative URLsE<gt>> + +=item C<ZE<lt>E<gt>> -- a null (zero-effect) formatting code + +This is rarely used. It's one way to get around using an +EE<lt>...E<gt> code sometimes. For example, instead of +"C<NEE<lt>ltE<gt>3>" (for "NE<lt>3") you could write +"C<NZE<lt>E<gt>E<lt>3>" (the "ZE<lt>E<gt>" breaks up the "N" and +the "E<lt>" so they can't be considered +the part of a (fictitious) "NE<lt>...E<gt>" code. + +=for comment + This was formerly explained as a "zero-width character". But it in + most parser models, it parses to nothing at all, as opposed to parsing + as if it were a E<zwnj> or E<zwj>, which are REAL zero-width characters. + So "width" and "character" are exactly the wrong words. + +=back + +Most of the time, you will need only a single set of angle brackets to +delimit the beginning and end of formatting codes. However, +sometimes you will want to put a real right angle bracket (a +greater-than sign, '>') inside of a formatting code. This is particularly +common when using a formatting code to provide a different font-type for a +snippet of code. As with all things in Perl, there is more than +one way to do it. One way is to simply escape the closing bracket +using an C<E> code: C<$a E<lt>=E<gt> $b> This will produce: "C<$a E<lt>=E<gt> $b>" -A more readable, and perhaps more "plain" way is to use an alternate set of -delimiters that doesn't require a ">" to be escaped. As of perl5.5.660, -doubled angle brackets ("<<" and ">>") may be used I<if and only if there -is whitespace immediately following the opening delimiter and immediately -preceding the closing delimiter!> For example, the following will do the -trick: +A more readable, and perhaps more "plain" way is to use an alternate +set of delimiters that doesn't require a single ">" to be escaped. With +the Pod formatters that are standard starting with perl5.5.660, doubled +angle brackets ("<<" and ">>") may be used I<if and only if there is +whitespace right after the opening delimiter and whitespace right +before the closing delimiter!> For example, the following will +do the trick: C<< $a <=> $b >> In fact, you can use as many repeated angle-brackets as you like so long as you have the same number of them in the opening and closing delimiters, and make sure that whitespace immediately follows the last -'<' of the opening delimiter, and immediately precedes the first '>' of -the closing delimiter. So the following will also work: +'<' of the opening delimiter, and immediately precedes the first '>' +of the closing delimiter. (The whitespace is ignored.) So the +following will also work: C<<< $a <=> $b >>> - C<<<< $a <=> $b >>>> + C<<<< $a <=> $b >>>> -This is currently supported by pod2text (Pod::Text), pod2man (Pod::Man), -and any other pod2xxx and Pod::Xxxx translator that uses Pod::Parser -1.093 or later. +And they all mean exactly the same as this: + + C<$a E<lt>=E<gt> $b> + +As a further example, this means that if you wanted to put these bits of +code in C<C> (code) style: + + open(X, ">>thing.dat") || die $! + $foo->bar(); + +you could do it like so: + + C<<< open(X, ">>thing.dat") || die $! >>> + C<< $foo->bar(); >> +which is presumably easier to read than the old way: + + C<open(X, "E<gt>E<gt>thing.dat") || die $!> + C<$foo-E<gt>bar(); >> + +This is currently supported by pod2text (Pod::Text), pod2man (Pod::Man), +and any other pod2xxx or Pod::Xxxx translators that use +Pod::Parser 1.093 or later, or Pod::Tree 1.02 or later. =head2 The Intent -That's it. The intent is simplicity, not power. I wanted paragraphs -to look like paragraphs (block format), so that they stand out -visually, and so that I could run them through fmt easily to reformat -them (that's F7 in my version of B<vi>). I wanted the translator (and not -me) to worry about whether " or ' is a left quote or a right quote -within filled text, and I wanted it to leave the quotes alone, dammit, in -verbatim mode, so I could slurp in a working program, shift it over 4 -spaces, and have it print out, er, verbatim. And presumably in a -constant width font. - -In particular, you can leave things like this verbatim in your text: - - Perl - FILEHANDLE - $variable - function() - manpage(3r) - -Doubtless a few other commands or sequences will need to be added along -the way, but I've gotten along surprisingly well with just these. - -Note that I'm not at all claiming this to be sufficient for producing a -book. I'm just trying to make an idiot-proof common source for nroff, -TeX, and other markup languages, as used for online documentation. -Translators exist for B<pod2man> (that's for nroff(1) and troff(1)), -B<pod2text>, B<pod2html>, B<pod2latex>, and B<pod2fm>. +The intent is simplicity of use, not power of expression. Paragraphs +look like paragraphs (block format), so that they stand out +visually, and so that I could run them through C<fmt> easily to reformat +them (that's F7 in my version of B<vi>, or Esc Q in my version of +B<emacs>). I wanted the translator to always leave the C<'> and C<`> and +C<"> quotes alone, in verbatim mode, so I could slurp in a +working program, shift it over four spaces, and have it print out, er, +verbatim. And presumably in a monospace font. + +The Pod format is not necessarily sufficient for writing a book. Pod +is just meant to be an idiot-proof common source for nroff, HTML, +TeX, and other markup languages, as used for online +documentation. Translators exist for B<pod2text>, B<pod2html>, +B<pod2man> (that's for nroff(1) and troff(1)), B<pod2latex>, and +B<pod2fm>. Various others are available in CPAN. + =head2 Embedding Pods in Perl Modules -You can embed pod documentation in your Perl scripts. Start your -documentation with a "=head1" command at the beginning, and end it -with a "=cut" command. Perl will ignore the pod text. See any of the -supplied library modules for examples. If you're going to put your -pods at the end of the file, and you're using an __END__ or __DATA__ -cut mark, make sure to put an empty line there before the first pod -directive. +You can embed Pod documentation in your Perl modules and scripts. +Start your documentation with an empty line, a "=head1" command at the +beginning, and end it with a "=cut" command and an empty line. Perl +will ignore the Pod text. See any of the supplied library modules for +examples. If you're going to put your Pod at the end of the file, and +you're using an __END__ or __DATA__ cut mark, make sure to put an +empty line there before the first Pod command. - __END__ + __END__ - =head1 NAME + =head1 NAME - modern - I am a modern module + Time::Local - efficiently compute time from local and GMT time -If you had not had that empty line there, then the translators wouldn't -have seen it. +Without that empty line before the "=head1", many translators wouldn't +have recognized the "=head1" as starting a Pod block. -=head2 Common Pod Pitfalls +=head2 Hints for Writing Pod -=over 4 +=over =item * -Pod translators usually will require paragraphs to be separated by -completely empty lines. If you have an apparently empty line with -some spaces on it, this can cause odd formatting. +The B<podchecker> command is provided for checking Pod syntax for errors +and warnings. For example, it checks for completely blank lines in +Pod blocks and for unknown commands and formatting codes. You should +still also pass your document through one or more translators and proofread +the result, or print out the result and proofread that. Some of the +problems found may be bugs in the translators, which you may or may not +wish to work around. =item * -Translators will mostly add wording around a LE<lt>E<gt> link, so that -C<LE<lt>foo(1)E<gt>> becomes "the I<foo>(1) manpage", for example (see -B<pod2man> for details). Thus, you shouldn't write things like C<the -LE<lt>fooE<gt> manpage>, if you want the translated document to read -sensibly. +If you're more familiar with writing in HTML than with writing in Pod, you +can try your hand at writing documentation in simple HTML, and coverting +it to Pod with the experimental L<Pod::HTML2Pod|Pod::HTML2Pod> module, +(available in CPAN), and looking at the resulting code. The experimental +L<Pod::PXML|Pod::PXML> module in CPAN might also be useful. + +=item * + +Many older Pod translators require the lines before every Pod +command and after every Pod command (including "=cut"!) to be a blank +line. Having something like this: + + # - - - - - - - - - - - - + =item $firecracker->boom() + + This noisily detonates the firecracker object. + =cut + sub boom { + ... + +...will make such Pod translators completely fail to see the Pod block +at all. + +Instead, have it like this: + + # - - - - - - - - - - - - + + =item $firecracker->boom() + + This noisily detonates the firecracker object. + + =cut + + sub boom { + ... + +=item * + +Some older Pod translators require paragraphs (including command +paragraphs like "=head2 Functions") to be separated by I<completely> +empty lines. If you have an apparently empty line with some spaces +on it, this might not count as a separator for those translators, and +that could cause odd formatting. + +=item * -If you need total control of the text used for a link in the output -use the form LE<lt>show this text|fooE<gt> instead. +Older translators might add wording around an LE<lt>E<gt> link, so that +C<LE<lt>Foo::BarE<gt>> may become "the Foo::Bar manpage", for example. +So you shouldn't write things like C<the LE<lt>fooE<gt> +documentation>, if you want the translated document to read sensibly +-- instead write C<the LE<lt>Foo::Bar|Foo::BarE<gt> documentation> or +C<LE<lt>the Foo::Bar documentation|Foo::BarE<gt>>, to control how the +link comes out. =item * -The B<podchecker> command is provided to check pod syntax -for errors and warnings. For example, it checks for completely -blank lines in pod segments and for unknown escape sequences. -It is still advised to pass it through -one or more translators and proofread the result, or print out the -result and proofread that. Some of the problems found may be bugs in -the translators, which you may or may not wish to work around. +Going past the 70th column in a verbatim block might be ungracefully +wrapped by some formatters. =back =head1 SEE ALSO -L<pod2man>, L<perlsyn/"PODs: Embedded Documentation">, -L<podchecker> +L<perlpodspec>, L<perlsyn/"PODs: Embedded Documentation">, +L<perlnewmod>, L<perldoc>, L<pod2html>, L<pod2man>, L<podchecker>. =head1 AUTHOR -Larry Wall +Larry Wall, Sean M. Burke +=cut diff --git a/pod/perlpodspec.pod b/pod/perlpodspec.pod new file mode 100644 index 0000000000..c87e1cbe7a --- /dev/null +++ b/pod/perlpodspec.pod @@ -0,0 +1,1876 @@ + +=head1 NAME + +perlpodspec - Plain Old Documentation: format specification and notes + +=head1 DESCRIPTION + +This document is detailed notes on the Pod markup language. Most +people will only have to read L<perlpod|perlpod> to know how to write +in Pod, but this document may answer some incidental questions to do +with parsing and rendering Pod. + +In this document, "must" / "must not", "should" / +"should not", and "may" have their conventional (cf. RFC 2119) +meanings: "X must do Y" means that if X doesn't do Y, it's against +this specification, and should really be fixed. "X should do Y" +means that it's recommended, but X may fail to do Y, if there's a +good reason. "X may do Y" is merely a note that X can do Y at +will (although it is up to the reader to detect any connotation of +"and I think it would be I<nice> if X did Y" versus "it wouldn't +really I<bother> me if X did Y"). + +Notably, when I say "the parser should do Y", the +parser may fail to do Y, if the calling application explicitly +requests that the parser I<not> do Y. I often phrase this as +"the parser should, by default, do Y." This doesn't I<require> +the parser to provide an option for turning off whatever +feature Y is (like expanding tabs in verbatim paragraphs), although +it implicates that such an option I<may> be provided. + +=head1 Pod Definitions + +Pod is embedded in files, typically Perl source files -- although you +can write a file that's nothing but Pod. + +A B<line> in a file consists of zero or more non-newline characters, +terminated by either a newline or the end of the file. + +A B<newline sequence> is usually a platform-dependent concept, but +Pod parsers should understand it to mean any of CR (ASCII 13), LF +(ASCII 10), or a CRLF (ASCII 13 followed immediately by ASCII 10), in +addition to any other system-specific meaning. The first CR/CRLF/LF +sequence in the file may be used as the basis for identifying the +newline sequence for parsing the rest of the file. + +A B<blank line> is a line consisting entirely of zero or more spaces +(ASCII 32) or tabs (ASCII 9), and terminated by a newline or end-of-file. +A B<non-blank line> is a line containing one or more characters other +than space or tab (and terminated by a newline or end-of-file). + +(I<Note:> Many older Pod parsers did not accept a line consisting of +spaces/tabs and then a newline as a blank line -- the only lines they +considered blank were lines consisting of I<no characters at all>, +terminated by a newline.) + +B<Whitespace> is used in this document as a blanket term for spaces, +tabs, and newline sequences. (By itself, this term usually refers +to literal whitespace. That is, sequences of whitespace characters +in Pod source, as opposed to "EE<lt>32>", which is a formatting +code that I<denotes> a whitespace character.) + +A B<Pod parser> is a module meant for parsing Pod (regardless of +whether this involves calling callbacks or building a parse tree or +directly formatting it). A B<Pod formatter> (or B<Pod translator>) +is a module or program that converts Pod to some other format (HTML, +plaintext, TeX, PostScript, RTF). A B<Pod processor> might be a +formatter or translator, or might be a program that does something +else with the Pod (like wordcounting it, scanning for index points, +etc.). + +Pod content is contained in B<Pod blocks>. A Pod block starts with a +line that matches <m/\A=[a-zA-Z]/>, and continues up to the next line +that matches C<m/\A=cut/> -- or up to the end of the file, if there is +no C<m/\A=cut/> line. + +=for comment + The current perlsyn says: + [beginquote] + Note that pod translators should look at only paragraphs beginning + with a pod directive (it makes parsing easier), whereas the compiler + actually knows to look for pod escapes even in the middle of a + paragraph. This means that the following secret stuff will be ignored + by both the compiler and the translators. + $a=3; + =secret stuff + warn "Neither POD nor CODE!?" + =cut back + print "got $a\n"; + You probably shouldn't rely upon the warn() being podded out forever. + Not all pod translators are well-behaved in this regard, and perhaps + the compiler will become pickier. + [endquote] + I think that those paragraphs should just be removed; paragraph-based + parsing seems to have been largely abandoned, because of the hassle + with non-empty blank lines messing up what people meant by "paragraph". + Even if the "it makes parsing easier" bit were especially true, + it wouldn't be worth the confusion of having perl and pod2whatever + actually disagree on what can constitute a Pod block. + +Within a Pod block, there are B<Pod paragraphs>. A Pod paragraph +consists of non-blank lines of text, separated by one or more blank +lines. + +For purposes of Pod processing, there are four types of paragraphs in +a Pod block: + +=over + +=item * + +A command paragraph (also called a "directive"). The first line of +this paragraph must match C<m/\A=[a-zA-Z]/>. Command paragraphs are +typically one line, as in: + + =head1 NOTES + + =item * + +But they may span several (non-blank) lines: + + =for comment + Hm, I wonder what it would look like if + you tried to write a BNF for Pod from this. + + =head3 Dr. Strangelove, or: How I Learned to + Stop Worrying and Love the Bomb + +I<Some> command paragraphs allow formatting codes in their content +(i.e., after the part that matches C<m/\A=[a-zA-Z]\S*\s*/>), as in: + + =head1 Did You Remember to C<use strict;>? + +In other words, the Pod processing handler for "head1" will apply the +same processing to "Did You Remember to CE<lt>use strict;>?" that it +would to an ordinary paragraph -- i.e., formatting codes (like +"CE<lt>...>") are parsed and presumably formatted appropriately, and +whitespace in the form of literal spaces and/or tabs is not +significant. + +=item * + +A B<verbatim paragraph>. The first line of this paragraph must be a +literal space or tab, and this paragraph must not be inside a "=begin +I<identifier>", ... "=end I<identifier>" sequence unless +"I<identifier>" begins with a colon (":"). That is, if a paragraph +starts with a literal space or tab, but I<is> inside a +"=begin I<identifier>", ... "=end I<identifier>" region, then it's +a data paragraph, unless "I<identifier>" begins with a colon. + +Whitespace I<is> significant in verbatim paragraphs (although, in +processing, tabs are probably expanded). + +=item * + +An B<ordinary paragraph>. A paragraph is an ordinary paragraph +if its first line matches neither C<m/\A=[a-zA-Z]/> nor +C<m/\A[ \t]/>, I<and> if it's not inside a "=begin I<identifier>", +... "=end I<identifier>" sequence unless "I<identifier>" begins with +a colon (":"). + +=item * + +A B<data paragraph>. This is a paragraph that I<is> inside a "=begin +I<identifier>" ... "=end I<identifier>" sequence where +"I<identifier>" does I<not> begin with a literal colon (":"). In +some sense, a data paragraph is not part of Pod at all (i.e., +effectively it's "out-of-band"), since it's not subject to most kinds +of Pod parsing; but it is specified here, since Pod +parsers need to be able to call an event for it, or store it in some +form in a parse tree, or at least just parse I<around> it. + +=back + +For example: consider the following paragraphs: + + # <- that's the 0th column + + =head1 Foo + + Stuff + + $foo->bar + + =cut + +Here, "=head1 Foo" and "=cut" are command paragraphs because the first +line of each matches C<m/\A=[a-zA-Z]/>. "I<[space][space]>$foo->bar" +is a verbatim paragraph, because its first line starts with a literal +whitespace character (and there's no "=begin"..."=end" region around). + +The "=begin I<identifier>" ... "=end I<identifier>" commands stop +paragraphs that they surround from being parsed as data or verbatim +paragraphs, if I<identifier> doesn't begin with a colon. This +is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +=head1 Pod Commands + +This section is intended to supplement and clarify the discussion in +L<perlpod/"Command Paragraph">. These are the currently recognized +Pod commands: + +=over + +=item "=head1", "=head2", "=head3", "=head4" + +This command indicates that the text in the remainder of the paragraph +is a heading. That text may contain formatting codes. Examples: + + =head1 Object Attributes + + =head3 What B<Not> to Do! + +=item "=pod" + +This command indicates that this paragraph begins a Pod block. (If we +are already in the middle of a Pod block, this command has no effect at +all.) If there is any text in this command paragraph after "=pod", +it must be ignored. Examples: + + =pod + + This is a plain Pod paragraph. + + =pod This text is ignored. + +=item "=cut" + +This command indicates that this line is the end of this previously +started Pod block. If there is any text after "=cut" on the line, it must be +ignored. Examples: + + =cut + + =cut The documentation ends here. + + =cut + # This is the first line of program text. + sub foo { # This is the second. + +It is an error to try to I<start> a Pod black with a "=cut" command. In +that case, the Pod processor must halt parsing of the input file, and +must by default emit a warning. + +=item "=over" + +This command indicates that this is the start of a list/indent +region. If there is any text following the "=over", it must consist +of only a nonzero positive numeral. The semantics of this numeral is +explained in the L</"About =over...=back Regions"> section, further +below. Formatting codes are not expanded. Examples: + + =over 3 + + =over 3.5 + + =over + +=item "=item" + +This command indicates that an item in a list begins here. Formatting +codes are processed. The semantics of the (optional) text in the +remainder of this paragraph are +explained in the L</"About =over...=back Regions"> section, further +below. Examples: + + =item + + =item * + + =item * + + =item 14 + + =item 3. + + =item C<< $thing->stuff(I<dodad>) >> + + =item For transporting us beyond seas to be tried for pretended + offenses + + =item He is at this time transporting large armies of foreign + mercenaries to complete the works of death, desolation and + tyranny, already begun with circumstances of cruelty and perfidy + scarcely paralleled in the most barbarous ages, and totally + unworthy the head of a civilized nation. + +=item "=back" + +This command indicates that this is the end of the region begun +by the most recent "=over" command. It permits no text after the +"=back" command. + +=item "=begin formatname" + +This marks the following paragraphs (until the matching "=end +formatname") as being for some special kind of processing. Unless +"formatname" begins with a colon, the contained non-command +paragraphs are data paragraphs. But if "formatname" I<does> begin +with a colon, then non-command paragraphs are ordinary paragraphs +or data paragraphs. This is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +It is advised that formatnames match the regexp +C<m/\A:?[-a-zA-Z0-9_]+\z/>. Implementors should anticipate future +expansion in the semantics and syntax of the first parameter +to "=begin"/"=end"/"=for". + +=item "=end formatname" + +This marks the end of the region opened by the matching +"=begin formatname" region. If "formatname" is not the formatname +of the most recent open "=begin formatname" region, then this +is an error, and must generate an error message. This +is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +=item "=for formatname text..." + +This is synonymous with: + + =begin formatname + + text... + + =end formatname + +That is, it creates a region consisting of a single paragraph; that +paragraph is to be treated as a normal paragraph if "formatname" +begins with a ":"; if "formatname" I<doesn't> begin with a colon, +then "text..." will constitute a data paragraph. There is no way +to use "=for formatname text..." to express "text..." as a verbatim +paragraph. + +=back + +If a Pod processor sees any command other than the ones listed +above (like "=head", or "=haed1", or "=stuff", or "=cuttlefish", +or "=w123"), that processor must by default treat this as an +error. It must not process the paragraph beginning with that +command, must by default warn of this as an error, and may +abort the parse. A Pod parser may allow a way for particular +applications to add to the above list of known commands, and to +stipulate, for each additional command, whether formatting +codes should be processed. + +Future versions of this specification may add additional +commands. + + + +=head1 Pod Formatting Codes + +(Note that in previous drafts of this document and of perlpod, +formatting codes were referred to as "interior sequences", and +this term may still be found in the documentation for Pod parsers, +and in error messages from Pod processors.) + +There are two syntaxes for formatting codes: + +=over + +=item * + +A formatting code starts with a capital letter (just US-ASCII [A-Z]) +followed by a "<", any number of characters, and ending with the first +matching ">". Examples: + + That's what I<you> think! + + What's C<dump()> for? + + X<C<chmod> and C<unlink()> Under Different Operating Systems> + +=item * + +A formatting code starts with a capital letter (just US-ASCII [A-Z]) +followed by two or more "<"'s, one or more whitespace characters, +any number of characters, one or more whitespace characters, +and ending with the first matching sequence of two or more ">"'s, where +the number of ">"'s equals the number of "<"'s in the opening of this +formatting code. Examples: + + That's what I<< you >> think! + + C<<< open(X, ">>thing.dat") || die $! >>> + + B<< $foo->bar(); >> + +With this syntax, the whitespace character(s) after the "CE<lt><<" +and before the ">>" (or whatever letter) are I<not> renderable -- they +do not signify whitespace, are merely part of the formatting codes +themselves. That is, these are all synonymous: + + C<thing> + C<< thing >> + C<< thing >> + C<<< thing >>> + C<<<< + thing + >>>> + +and so on. + +=back + +In parsing Pod, a notably tricky part is the correct parsing of +(potentially nested!) formatting codes. Implementors should +consult the code in the C<parse_text> routine in Pod::Parser as an +example of a correct implementation. + +=over + +=item C<IE<lt>textE<gt>> -- italic text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<BE<lt>textE<gt>> -- bold text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<CE<lt>codeE<gt>> -- code text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<FE<lt>filenameE<gt>> -- style for filenames + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<XE<lt>topic nameE<gt>> -- an index entry + +See the brief discussion in L<perlpod/"Formatting Codes">. + +This code is unusual in that most formatters completely discard +this code and its content. Other formatters will render it with +invisible codes that can be used in building an index of +the current document. + +=item C<ZE<lt>E<gt>> -- a null (zero-effect) formatting code + +Discussed briefly in L<perlpod/"Formatting Codes">. + +This code is unusual is that it should have no content. That is, +a processor may complain if it sees C<ZE<lt>potatoesE<gt>>. Whether +or not it complains, the I<potatoes> text should ignored. + +=item C<LE<lt>nameE<gt>> -- a hyperlink + +The complicated syntaxes of this code are discussed at length in +L<perlpod/"Formatting Codes">, and implementation details are +discussed below, in L</"About LE<lt>...E<gt> Codes">. Parsing the +contents of LE<lt>content> is tricky. Notably, the content has to be +checked for whether it looks like a URL, or whether it has to be split +on literal "|" and/or "/" (in the right order!), and so on, +I<before> EE<lt>...> codes are resolved. + +=item C<EE<lt>escapeE<gt>> -- a character escape + +See L<perlpod/"Formatting Codes">, and several points in +L</Notes on Implementing Pod Processors>. + +=item C<SE<lt>textE<gt>> -- text contains non-breaking spaces + +This formatting code is syntactically simple, but semantically +complex. What it means is that each space in the printable +content of this code signifies a nonbreaking space. + +Consider: + + C<$x ? $y : $z> + + S<C<$x ? $y : $z>> + +Both signify the monospace (c[ode] style) text consisting of +"$x", one space, "?", one space, ":", one space, "$z". The +difference is that in the latter, with the S code, those spaces +are not "normal" spaces, but instead are nonbreaking spaces. + +=back + + +If a Pod processor sees any formatting code other than the ones +listed above (as in "NE<lt>...>", or "QE<lt>...>", etc.), that +processor must by default treat this as an error. +A Pod parser may allow a way for particular +applications to add to the above list of known formatting codes; +a Pod parser might even allow a way to stipulate, for each additional +command, whether it requires some form of special processing, as +LE<lt>...> does. + +Future versions of this specification may add additional +formatting codes. + +Historical note: A few older Pod processors would not see a ">" as +closing a "CE<lt>" code, if the ">" was immediately preceded by +a "-". This was so that this: + + C<$foo->bar> + +would parse as equivalent to this: + + C<$foo-E<lt>bar> + +instead of as equivalent to a "C" formatting code containing +only "$foo-", and then a "bar>" outside the "C" formatting code. This +problem has since been solved by the addition of syntaxes like this: + + C<< $foo->bar >> + +Compliant parsers must not treat "->" as special. + +Formatting codes absolutely cannot span paragraphs. If a code is +opened in one paragraph, and no closing code is found by the end of +that paragraph, the Pod parser must close that formatting code, +and should complain (as in "Unterminated I code in the paragraph +starting at line 123: 'Time objects are not...'"). So these +two paragraphs: + + I<I told you not to do this! + + Don't make me say it again!> + +...must I<not> be parsed as two paragraphs in italics (with the I +code starting in one paragraph and starting in another.) Instead, +the first paragraph should generate a warning, but that aside, the +above code must parse as if it were: + + I<I told you not to do this!> + + Don't make me say it again!E<gt> + +(In SGMLish jargon, all Pod commands are like block-level +elements, whereas all Pod formatting codes are like inline-level +elements.) + + + +=head1 Notes on Implementing Pod Processors + +The following is a long section of miscellaneous requirements +and suggestions to do with Pod processing. + +=over + +=item * + +Pod formatters should tolerate lines in verbatim blocks that are of +any length, even if that means having to break them (possibly several +times, for very long lines) to avoid text running off the side of the +page. Pod formatters may warn of such line-breaking. Such warnings +are particularly appropriate for lines are over 100 characters long, which +are usually not intentional. + +=item * + +Pod parsers must recognize I<all> of the three well-known newline +formats: CR, LF, and CRLF. See L<perlport|perlport>. + +=item * + +Pod parsers should accept input lines that are of any length. + +=item * + +Since Perl recognizes a Unicode Byte Order Mark at the start of files +as signaling that the file is Unicode encoded as in UTF-16 (whether +big-endian or little-endian) or UTF-8, Pod parsers should do the +same. Otherwise, the character encoding should be understood as +being UTF-8 if the first highbit byte sequence in the file seems +valid as a UTF-8 sequence, or otherwise as Latin-1. + +Future versions of this specification may specify +how Pod can accept other encodings. Presumably treatment of other +encodings in Pod parsing would be as in XML parsing: whatever the +encoding declared by a particular Pod file, content is to be +stored in memory as Unicode characters. + +=item * + +The well known Unicode Byte Order Marks are as follows: if the +file begins with the two literal byte values 0xFE 0xFF, this is +the BOM for big-endian UTF-16. If the file begins with the two +literal byte value 0xFF 0xFE, this is the BOM for little-endian +UTF-16. If the file begins with the three literal byte values +0xEF 0xBB 0xBF, this is the BOM for UTF-8. + +=for comment + use bytes; print map sprintf(" 0x%02X", ord $_), split '', "\x{feff}"; + 0xEF 0xBB 0xBF + +=for comment + If toke.c is modified to support UTF32, add mention of those here. + +=item * + +A naive but sufficient heuristic for testing the first highbit +byte-sequence in a BOM-less file (whether in code or in Pod!), to see +whether that sequence is valid as UTF-8 (RFC 2279) is to check whether +that the first byte in the sequence is in the range 0xC0 - 0xFD +I<and> whether the next byte is in the range +0x80 - 0xBF. If so, the parser may conclude that this file is in +UTF-8, and all highbit sequences in the file should be assumed to +be UTF-8. Otherwise the parser should treat the file as being +in Latin-1. In the unlikely circumstance that the first highbit +sequence in a truly non-UTF-8 file happens to appear to be UTF-8, one +can cater to our heuristic (as well as any more intelligent heuristic) +by prefacing that line with a comment line containing a highbit +sequence that is clearly I<not> valid as UTF-8. A line consisting +of simply "#", an e-acute, and any non-highbit byte, +is sufficient to establish this file's encoding. + +=for comment + If/WHEN some brave soul makes these heuristics into a generic + text-file class (or file discipline?), we can presumably delete + mention of these icky details from this file, and can instead + tell people to just use appropriate class/discipline. + Auto-recognition of newline sequences would be another desirable + feature of such a class/discipline. + HINT HINT HINT. + +=for comment + "The probability that a string of characters + in any other encoding appears as valid UTF-8 is low" - RFC2279 + +=item * + +This document's requirements and suggestions about encodings +do not apply to Pod processors running on non-ASCII platforms, +notably EBCDIC platforms. + +=item * + +Pod processors must treat a "=for [label] [content...]" paragraph as +meaning the same thing as a "=begin [label]" paragraph, content, and +an "=end [label]" paragraph. (The parser may conflate these two +constructs, or may leave them distinct, in the expectation that the +formatter will nevertheless treat them the same.) + +=item * + +When rendering Pod to a format that allows comments (i.e., to nearly +any format other than plaintext), a Pod formatter must insert comment +text identifying its name and version number, and the name and +version numbers of any modules it might be using to process the Pod. +Minimal examples: + + %% POD::Pod2PS v3.14159, using POD::Parser v1.92 + + <!-- Pod::HTML v3.14159, using POD::Parser v1.92 --> + + {\doccomm generated by Pod::Tree::RTF 3.14159 using Pod::Tree 1.08} + + .\" Pod::Man version 3.14159, using POD::Parser version 1.92 + +Formatters may also insert additional comments, including: the +release date of the Pod formatter program, the contact address for +the author(s) of the formatter, the current time, the name of input +file, the formatting options in effect, version of Perl used, etc. + +Formatters may also choose to note errors/warnings as comments, +besides or instead of emitting them otherwise (as in messages to +STDERR, or C<die>ing). + +=item * + +Pod parsers I<may> emit warnings or error messages ("Unknown E code +EE<lt>zslig>!") to STDERR (whether through printing to STDERR, or +C<warn>ing/C<carp>ing, or C<die>ing/C<croak>ing), but I<must> allow +suppressing all such STDERR output, and instead allow an option for +reporting errors/warnings +in some other way, whether by triggering a callback, or noting errors +in some attribute of the document object, or some similarly unobtrusive +mechanism -- or even by appending a "Pod Errors" section to the end of +the parsed form of the document. + +=item * + +In cases of exceptionally aberrant documents, Pod parsers may abort the +parse. Even then, using C<die>ing/C<croak>ing is to be avoided; where +possible, the parser library may simply close the input file +and add text like "*** Formatting Aborted ***" to the end of the +(partial) in-memory document. + +=item * + +In paragraphs where formatting codes (like EE<lt>...>, BE<lt>...>) +are understood (i.e., I<not> verbatim paragraphs, but I<including> +ordinary paragraphs, and command paragraphs that produce renderable +text, like "=head1"), literal whitespace should generally be considered +"insignificant", in that one literal space has the same meaning as any +(nonzero) number of literal spaces, literal newlines, and literal tabs +(as long as this produces no blank lines, since those would terminate +the paragraph). Pod parsers should compact literal whitespace in each +processed paragraph, but may provide an option for overriding this +(since some processing tasks do not require it), or may follow +additional special rules (for example, specially treating +period-space-space or period-newline sequences). + +=item * + +Pod parsers should not, by default, try to coerce apostrophe (') and +quote (") into smart quotes (little 9's, 66's, 99's, etc), nor try to +turn backtick (`) into anything else but a single backtick character +(distinct from an openquote character!), nor "--" into anything but +two minus signs. They I<must never> do any of those things to text +in CE<lt>...> formatting codes, and never I<ever> to text in verbatim +paragraphs. + +=item * + +When rendering Pod to a format that has two kinds of hyphens (-), one +that's a nonbreaking hyphen, and another that's a breakable hyphen +(as in "object-oriented", which can be split across lines as +"object-", newline, "oriented"), formatters are encouraged to +generally translate "-" to nonbreaking hyphen, but may apply +heuristics to convert some of these to breaking hyphens. + +=item * + +Pod formatters should make reasonable efforts to keep words of Perl +code from being broken across lines. For example, "Foo::Bar" in some +formatting systems is seen as eligible for being broken across lines +as "Foo::" newline "Bar" or even "Foo::-" newline "Bar". This should +be avoided where possible, either by disabling all line-breaking in +mid-word, or by wrapping particular words with internal punctuation +in "don't break this across lines" codes (which in some formats may +not be a single code, but might be a matter of inserting non-breaking +zero-width spaces between every pair of characters in a word.) + +=item * + +Pod parsers should, by default, expand tabs in verbatim paragraphs as +they are processed, before passing them to the formatter or other +processor. Parsers may also allow an option for overriding this. + +=item * + +Pod parsers should, by default, remove newlines from the end of +ordinary and verbatim paragraphs before passing them to the +formatter. For example, while the paragraph you're reading now +could be considered, in Pod source, to end with (and contain) +the newline(s) that end it, it should be processed as ending with +(and containing) the period character that ends this sentence. + +=item * + +Pod parsers, when reporting errors, should make some effort to report +an approximate line number ("Nested EE<lt>>'s in Paragraph #52, near +line 633 of Thing/Foo.pm!"), instead of merely noting the paragraph +number ("Nested EE<lt>>'s in Paragraph #52 of Thing/Foo.pm!"). Where +this is problematic, the paragraph number should at least be +accompanied by an excerpt from the paragraph ("Nested EE<lt>>'s in +Paragraph #52 of Thing/Foo.pm, which begins 'Read/write accessor for +the CE<lt>interest rate> attribute...'"). + +=item * + +Pod parsers, when processing a series of verbatim paragraphs one +after another, should consider them to be one large verbatim +paragraph that happens to contain blank lines. I.e., these two +lines, which have an blank line between them: + + use Foo; + + print Foo->VERSION + +should be unified into one paragraph ("\tuse Foo;\n\n\tprint +Foo->VERSION") before being passed to the formatter or other +processor. Parsers may also allow an option for overriding this. + +While this might be too cumbersome to implement in event-based Pod +parsers, it is straightforward for parsers that return parse trees. + +=item * + +Pod formatters, where feasible, are advised to avoid splitting short +verbatim paragraphs (under twelve lines, say) across pages. + +=item * + +Pod parsers must treat a line with only spaces and/or tabs on it as a +"blank line" such as separates paragraphs. (Some older parsers +recognized only two adjacent newlines as a "blank line" but would not +recognize a newline, a space, and a newline, as a blank line. This +is noncompliant behavior.) + +=item * + +Authors of Pod formatters/processors should make every effort to +avoid writing their own Pod parser. There are already several in +CPAN, with a wide range of interface styles -- and one of them, +Pod::Parser, comes with modern versions of Perl. + +=item * + +Characters in Pod documents may be conveyed either as literals, or by +number in EE<lt>n> codes, or by an equivalent mnemonic, as in +EE<lt>eacute> which is exactly equivalent to EE<lt>233>. + +Characters in the range 32-126 refer to those well known US-ASCII +characters (also defined there by Unicode, with the same meaning), +which all Pod formatters must render faithfully. Characters +in the ranges 0-31 and 127-159 should not be used (neither as +literals, nor as EE<lt>number> codes), except for the +literal byte-sequences for newline (13, 13 10, or 13), and tab (9). + +Characters in the range 160-255 refer to Latin-1 characters (also +defined there by Unicode, with the same meaning). Characters above +255 should be understood to refer to Unicode characters. + +=item * + +Be warned +that some formatters cannot reliably render characters outside 32-126; +and many are able to handle 32-126 and 160-255, but nothing above +255. + +=item * + +Besides the well-known "EE<lt>lt>" and "EE<lt>gt>" codes for +less-than and greater-than, Pod parsers must understand "EE<lt>sol>" +for "/" (solidus, slash), and "EE<lt>verbar>" for "|" (vertical bar, +pipe). Pod parsers should also understand "EE<lt>lchevron>" and +"EE<lt>rchevron>" as legacy codes for characters 171 and 187, i.e., +"left-pointing double angle quotation mark" = "left pointing +guillemet" and "right-pointing double angle quotation mark" = "right +pointing guillemet". (These look like little "<<" and ">>", and they +are now preferably expressed with the HTML/XHTML codes "EE<lt>laquo>" +and "EE<lt>raquo>".) + +=item * + +Pod parsers should understand all "EE<lt>html>" codes as defined +in the entity declarations in the most recent XHTML specification at +C<www.W3.org>. Pod parsers must understand at least the entities +that define characters in the range 160-255 (Latin-1). Pod parsers, +when faced with some unknown "EE<lt>I<identifier>>" code, +shouldn't simply replace it with nullstring (by default, at least), +but may pass it through as a string consisting of the literal characters +E, less-than, I<identifier>, greater-than. Or Pod parsers may offer the +alternative option of processing such unknown +"EE<lt>I<identifier>>" codes by firing an event especially +for such codes, or by adding a special node-type to the in-memory +document tree. Such "EE<lt>I<identifier>>" may have special meaning +to some processors, or some processors may choose to add them to +a special error report. + +=item * + +Pod parsers must also support the XHTML codes "EE<lt>quot>" for +character 34 (doublequote, "), "EE<lt>amp>" for character 38 +(ampersand, &), and "EE<lt>apos>" for character 39 (apostrophe, '). + +=item * + +Note that in all cases of "EE<lt>whatever>", I<whatever> (whether +an htmlname, or a number in any base) must consist only of +alphanumeric characters -- that is, I<whatever> must watch +C<m/\A\w+\z/>. So "EE<lt> 0 1 2 3 >" is invalid, because +it contains spaces, which aren't alphanumeric characters. This +presumably does not I<need> special treatment by a Pod processor; +" 0 1 2 3 " doesn't look like a number in any base, so it would +presumably be looked up in the table of HTML-like names. Since +there is (and cannot be) an HTML-like entity called " 0 1 2 3 ", +this will be treated as an error. However, Pod processors may +treat "EE<lt> 0 1 2 3 >" or "EE<lt>e-acute>" as I<syntactically> +invalid, potentially earning a different error message than the +error message (or warning, or event) generated by a merely unknown +(but theoretically valid) htmlname, as in "EE<lt>qacute>" +[sic]. However, Pod parsers are not required to make this +distinction. + +=item * + +Note that EE<lt>number> I<must not> be interpreted as simply +"codepoint I<number> in the current/native character set". It always +means only "the character represented by codepoint I<number> in +Unicode." (This is identical to the semantics of &#I<number>; in XML.) + +This will likely require many formatters to have tables mapping from +treatable Unicode codepoints (such as the "\xE9" for the e-acute +character) to the escape sequences or codes necessary for conveying +such sequences in the target output format. A converter to *roff +would, for example know that "\xE9" (whether conveyed literally, or via +a EE<lt>...> sequence) is to be conveyed as "e\\*'". +Similarly, a program rendering Pod in a MacOS application window, would +presumably need to know that "\xE9" maps to codepoint 142 in MacRoman +encoding that (at time of writing) is native for MacOS. Such +Unicode2whatever mappings are presumably already widely available for +common output formats. (Such mappings may be incomplete! Implementers +are not expected to bend over backwards in an attempt to render +Cherokee syllabics, Etruscan runes, Byzantine musical symbols, or any +of the other weird things that Unicode can encode.) And +if a Pod document uses a character not found in such a mapping, the +formatter should consider it an unrenderable character. + +=item * + +If, surprisingly, the implementor of a Pod formatter can't find a +satisfactory pre-existing table mapping from Unicode characters to +escapes in the target format (e.g., a decent table of Unicode +characters to *roff escapes), it will be necessary to build such a +table. If you are in this circumstance, you should begin with the +characters in the range 0x00A0 - 0x00FF, which is mostly the heavily +used accented characters. Then proceed (as patience permits and +fastidiousness compels) through the characters that the (X)HTML +standards groups judged important enough to merit mnemonics +for. These are declared in the (X)HTML specifications at the +www.W3.org site. At time of writing (September 2001), the most recent +entity declaration files are: + + http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent + http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent + http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent + +Then you can progress through any remaining notable Unicode characters +in the range 0x2000-0x204D (consult the character tables at +www.unicode.org), and whatever else strikes your fancy. For example, +in F<xhtml-symbol.ent>, there is the entry: + + <!ENTITY infin "∞"> <!-- infinity, U+221E ISOtech --> + +While the mapping "infin" to the character "\x{221E}" will (hopefully) +have been already handled by the Pod parser, the presence of the +character in this file means that it's reasonably important enough to +include in a formatter's table that maps from notable Unicode characters +to the codes necessary for rendering them. So for a Unicode-to-*roff +mapping, for example, this would merit the entry: + + "\x{221E}" => '\(in', + +It is eagerly hoped that in the future, increasing numbers of formats +(and formatters) will support Unicode characters directly (as (X)HTML +does with C<∞>, C<∞>, or C<∞>), reducing the need +for idiosyncratic mappings of Unicode-to-I<my_escapes>. + +=item * + +It is up to individual Pod formatter to display good judgment when +confronted with an unrenderable character (which is distinct from an +unknown EE<lt>thing> sequence that the parser couldn't resolve to +anything, renderable or not). It is good practice to map Latin letters +with diacritics (like "EE<lt>eacute>"/"EE<lt>233>") to the corresponding +unaccented US-ASCII letters (like a simple character 101, "e"), but +clearly this is often not feasable, and an unrenderable character may +be represented as "?", or the like. In attempting a sane fallback +(as from EE<lt>233> to "e"), Pod formatters may use the +%Latin1Code_to_fallback table in L<Pod::Escapes|Pod::Escapes>, or +L<Text::Unidecode|Text::Unidecode>, if available. + +For example, this Pod text: + + magic is enabled if you set C<$Currency> to 'E<euro>'. + +may be rendered as: +"magic is enabled if you set C<$Currency> to 'I<?>'" or as +"magic is enabled if you set C<$Currency> to 'B<[euro]>'", or as +"magic is enabled if you set C<$Currency> to '[x20AC]', etc. + +A Pod formatter may also note, in a comment or warning, a list of what +unrenderable characters were encountered. + +=item * + +EE<lt>...> may freely appear in any formatting code (other than +in another EE<lt>...> or in an ZE<lt>>). That is, "XE<lt>The +EE<lt>euro>1,000,000 Solution>" is valid, as is "LE<lt>The +EE<lt>euro>1,000,000 Solution|Million::Euros>". + +=item * + +Some Pod formatters output to formats that implement nonbreaking +spaces as an individual character (which I'll call "NBSP"), and +others output to formats that implement nonbreaking spaces just as +spaces wrapped in a "don't break this across lines" code. Note that +at the level of Pod, both sorts of codes can occur: Pod can contain a +NBSP character (whether as a literal, or as a "EE<lt>160>" or +"EE<lt>nbsp>" code); and Pod can contain "SE<lt>foo +IE<lt>barE<gt> baz>" codes, where "mere spaces" (character 32) in +such codes are taken to represent nonbreaking spaces. Pod +parsers should consider supporting the optional parsing of "SE<lt>foo +IE<lt>barE<gt> baz>" as if it were +"fooI<NBSP>IE<lt>barE<gt>I<NBSP>baz", and, going the other way, the +optional parsing of groups of words joined by NBSP's as if each group +were in a SE<lt>...> code, so that formatters may use the +representation that maps best to what the output format demands. + +=item * + +Some processors may find it the C<SE<lt>...E<gt>> code easiest to +implement by replacing each space in the parse tree under the content +of the S, with an NBSP. But note: the replacement should apply I<not> to +spaces in I<all> text, but I<only> to spaces in I<printable> text. (This +distinction may or may not be evident in the particular tree/event +model implemented by the Pod parser.) For example, consider this +unusual case: + + S<L</Autoloaded Functions>> + +This means that the space in the middle of the visible link text must +not be broken across lines. In other words, it's the same as this: + + L<"AutoloadedE<160>Functions"/Autoloaded Functions> + +However, a misapplied space-to-NBSP replacement could (wrongly) +produce something equivalent to this: + + L<"AutoloadedE<160>Functions"/AutoloadedE<160>Functions> + +...which is almost definitely not going to work as a hyperlink (assuming +this formatter outputs a format supporting hypertext). + +Formatters may choose to just not support the S format code, +especially in cases where the output format simply has no NBSP +character/code and no code for "don't break this stuff across lines". + +=item * + +Besides the NBSP character discussed above, implementors are reminded +of the existence of the other "special" character in Latin-1, the +"soft hyphen" chararacter, also known as "discretionary hyphen", +i.e. C<EE<lt>173E<gt>> = C<EE<lt>0xADE<gt>> = +C<EE<lt>shyE<gt>>). This character expresses an optional hyphenation +point. That is, it normally renders as nothing, but may render as a +"-" if a formatter breaks the word at that point. Pod formatters +should, as appropriate, do one of the following: 1) render this with +a code with the same meaning (e.g., "\-" in RTF), 2) pass it through +in the expectation that the formatter understands this character as +such, or 3) delete it. + +For example: + + sigE<shy>action + manuE<shy>script + JarkE<shy>ko HieE<shy>taE<shy>nieE<shy>mi + +These signal to a formatter that if it is to hyphenate "sigaction" +or "manuscript", then it should be done as +"sig-I<[linebreak]>action" or "manu-I<[linebreak]>script" +(and if it doesn't hyphenate it, then the C<EE<lt>shyE<gt>> doesn't +show up at all). And if it is +to hyphenate "Jarkko" and/or "Hietaniemi", it can do +so only at the points where there is a C<EE<lt>shyE<gt>> code. + +In practice, it is anticipated that this character will not be used +often, but formatters should either support it, or delete it. + +=item * + +If you think that you want to add a new command to Pod (like, say, a +"=biblio" command), consider whether you could get the same +effect with a for or begin/end sequence: "=for biblio ..." or "=begin +biblio" ... "=end biblio". Pod processors that don't understand +"=for biblio", etc, will simply ignore it, whereas they may complain +loudly if they see "=biblio". + +=item * + +Throughout this document, "Pod" has been the preferred spelling for +the name of the documentation format. One may also use "POD" or +"pod". For the the documentation that is (typically) in the Pod +format, you may use "pod", or "Pod", or "POD". Understanding these +distinctions is useful; but obsessing over how to spell them, usually +is not. + +=back + + + + + +=head1 About LE<lt>...E<gt> Codes + +As you can tell from a glance at L<perlpod|perlpod>, the LE<lt>...> +code is the most complex of the Pod formatting codes. The points below +will hopefully clarify what it means and how processors should deal +with it. + +=over + +=item * + +In parsing an LE<lt>...> code, Pod parsers must distinguish at least +four attributes: + +=over + +=item First: + +The link-text. If there is none, this must be undef. (E.g., in +"LE<lt>Perl Functions|perlfunc>", the link-text is "Perl Functions". +In "LE<lt>Time::HiRes>" and even "LE<lt>|Time::HiRes>", there is no +link text. Note that link text may contain formatting.) + +=item Second: + +The possibly inferred link-text -- i.e., if there was no real link +text, then this is the text that we'll infer in its place. (E.g., for +"LE<lt>Getopt::Std>", the inferred link text is "Getopt::Std".) + +=item Third: + +The name or URL, or undef if none. (E.g., in "LE<lt>Perl +Functions|perlfunc>", the name -- also sometimes called the page -- +is "perlfunc". In "LE<lt>/CAVEATS>", the name is undef.) + +=item Fourth: + +The section (AKA "item" in older perlpods), or undef if none. E.g., +in L<Getopt::Std/DESCRIPTION>, "DESCRIPTION" is the section. (Note +that this is not the same as a manpage section like the "5" in "man 5 +crontab". "Section Foo" in the Pod sense means the part of the text +that's introduced by the heading or item whose text is "Foo". + +=back + +Pod parsers may also note additional attributes including: + +=over + +=item Fifth: + +A flag for whether item 3 (if present) is a URL (like +"http://lists.perl.org" is), in which case there should be no section +attribute; a Pod name (like "perldoc" and "Getopt::Std" are); or +possibly a man page name (like "crontab(5)" is). + +=item Sixth: + +The raw original LE<lt>...> content, before text is split on +"|", "/", etc, and before EE<lt>...> codes are expanded. + +=back + +(The above were numbered only for concise reference below. It is not +a requirement that these be passed as an actual list or array.) + +For example: + + L<Foo::Bar> + => undef, # link text + "Foo::Bar", # possibly inferred link text + "Foo::Bar", # name + undef, # section + 'pod', # what sort of link + "Foo::Bar" # original content + + L<Perlport's section on NL's|perlport/Newlines> + => "Perlport's section on NL's", # link text + "Perlport's section on NL's", # possibly inferred link text + "perlport", # name + "Newlines", # section + 'pod', # what sort of link + "Perlport's section on NL's|perlport/Newlines" # orig. content + + L<perlport/Newlines> + => undef, # link text + '"Newlines" in perlport', # possibly inferred link text + "perlport", # name + "Newlines", # section + 'pod', # what sort of link + "perlport/Newlines" # original content + + L<crontab(5)/"DESCRIPTION"> + => undef, # link text + '"DESCRIPTION" in crontab(5)', # possibly inferred link text + "crontab(5)", # name + "DESCRIPTION", # section + 'man', # what sort of link + 'crontab(5)/"DESCRIPTION"' # original content + + L</Object Attributes> + => undef, # link text + '"Object Attributes"', # possibly inferred link text + undef, # name + "Object Attributes", # section + 'pod', # what sort of link + "/Object Attributes" # original content + + L<http://www.perl.org/> + => undef, # link text + "http://www.perl.org/", # possibly inferred link text + "http://www.perl.org/", # name + undef, # section + 'url', # what sort of link + "http://www.perl.org/" # original content + +Note that you can distinguish URL-links from anything else by the +fact that they match C<m/\A\w+:[^:\s]\S*\z/>. So +C<LE<lt>http://www.perl.comE<gt>> is a URL, but +C<LE<lt>HTTP::ResponseE<gt>> isn't. + +=item * + +In case of LE<lt>...> codes with no "text|" part in them, +older formatters have exhibited great variation in actually displaying +the link or cross reference. For example, LE<lt>crontab(5)> would render +as "the C<crontab(5)> manpage", or "in the C<crontab(5)> manpage" +or just "C<crontab(5)>". + +Pod processors must now treat "text|"-less links as follows: + + L<name> => L<name|name> + L</section> => L<"section"|/section> + L<name/section> => L<"section" in name|name/section> + +=item * + +Note that section names might contain markup. I.e., if a section +starts with: + + =head2 About the C<-M> Operator + +or with: + + =item About the C<-M> Operator + +then a link to it would look like this: + + L<somedoc/About the C<-M> Operator> + +Formatters may choose to ignore the markup for purposes of resolving +the link and use only the renderable characters in the section name, +as in: + + <h1><a name="About_the_-M_Operator">About the <code>-M</code> + Operator</h1> + + ... + + <a href="somedoc#About_the_-M_Operator">About the <code>-M</code> + Operator" in somedoc</a> + +=item * + +Previous versions of perlpod distinguished C<LE<lt>name/"section"E<gt>> +links from C<LE<lt>name/itemE<gt>> links (and their targets). These +have been merged syntactically and semantically in the current +specification, and I<section> can refer either to a "=headI<n> Heading +Content" command or to a "=item Item Content" command. This +specification does not specify what behavior should be in the case +of a given document having several things all seeming to produce the +same I<section> identifier (e.g., in HTML, several things all producing +the same I<anchorname> in <a name="I<anchorname>">...</a> +elements). Where Pod processors can control this behavior, they should +use the first such anchor. That is, C<LE<lt>Foo/BarE<gt>> refers to the +I<first> "Bar" section in Foo. + +But for some processors/formats this cannot be easily controlled; as +with the HTML example, the behavior of multiple ambiguous +<a name="I<anchorname>">...</a> is most easily just left up to +browsers to decide. + +=item * + +Authors wanting to link to a particular (absolute) URL, must do so +only with "LE<lt>scheme:...>" codes (like +LE<lt>http://www.perl.org>), and must not attempt "LE<lt>Some Site +Name|scheme:...>" codes. This restriction avoids many problems +in parsing and rendering LE<lt>...> codes. + +=item * + +In a C<LE<lt>text|...E<gt>> code, text may contain formatting codes +for formatting or for EE<lt>...> escapes, as in: + + L<B<ummE<234>stuff>|...> + +For C<LE<lt>...E<gt>> codes without a "name|" part, only +C<EE<lt>...E<gt>> and C<ZE<lt>E<gt>> codes may occur -- no +other formatting codes. That is, authors should not use +"C<LE<lt>BE<lt>Foo::BarE<gt>E<gt>>". + +Note, however, that formatting codes and ZE<lt>>'s can occur in any +and all parts of an LE<lt>...> (i.e., in I<name>, I<section>, I<text>, +and I<url>). + +Authors must not nest LE<lt>...> codes. For example, "LE<lt>The +LE<lt>Foo::Bar> man page>" should be treated as an error. + +=item * + +Note that Pod authors may use formatting codes inside the "text" +part of "LE<lt>text|name>" (and so on for LE<lt>text|/"sec">). + +In other words, this is valid: + + Go read L<the docs on C<$.>|perlvar/"$."> + +Some output formats that do allow rendering "LE<lt>...>" codes as +hypertext, might not allow the link-text to be formatted; in +that case, formatters will have to just ignore that formatting. + +=item * + +At time of writing, C<LE<lt>nameE<gt>> values are of two types: +either the name of a Pod page like C<LE<lt>Foo::BarE<gt>> (which +might be a real Perl module or program in an @INC / PATH +directory, or a .pod file in those places); or the name of a UNIX +man page, like C<LE<lt>crontab(5)E<gt>>. In theory, C<LE<lt>chmodE<gt>> +in ambiguous between a Pod page called "chmod", or the Unix man page +"chmod" (in whatever man-section). However, the presence of a string +in parens, as in "crontab(5)", is sufficient to signal that what +is being discussed is not a Pod page, and so is presumably a +UNIX man page. The distinction is of no importance to many +Pod processors, but some processors that render to hypertext formats +may need to distinguish them in order to know how to render a +given C<LE<lt>fooE<gt>> code. + +=item * + +Previous versions of perlpod allowed for a C<LE<lt>sectionE<gt>> syntax +(as in "C<LE<lt>Object AttributesE<gt>>"), which was not easily distinguishable +from C<LE<lt>nameE<gt>> syntax. This syntax is no longer in the +specification, and has been replaced by the C<LE<lt>"section"E<gt>> syntax +(where the quotes were formerly optional). Pod parsers should tolerate +the C<LE<lt>sectionE<gt>> syntax, for a while at least. The suggested +heuristic for distinguishing C<LE<lt>sectionE<gt>> from C<LE<lt>nameE<gt>> +is that if it contains any whitespace, it's a I<section>. Pod processors +may warn about this being deprecated syntax. + +=back + +=head1 About =over...=back Regions + +"=over"..."=back" regions are used for various kinds of list-like +structures. (I use the term "region" here simply as a collective +term for everything from the "=over" to the matching "=back".) + +=over + +=item * + +The non-zero numeric I<indentlevel> in "=over I<indentlevel>" ... +"=back" is used for giving the formatter a clue as to how many +"spaces" (ems, or roughly equivalent units) it should tab over, +although many formatters will have to convert this to an absolute +measurement that may not exactly match with the size of spaces (or M's) +in the document's base font. Other formatters may have to completely +ignore the number. The lack of any explicit I<indentlevel> parameter is +equivalent to an I<indentlevel> value of 4. Pod processors may +complain if I<indentlevel> is present but is not a positive number +matching C<m/\A(\d*\.)?\d+\z/>. + +=item * + +Authors of Pod formatters are reminded that "=over" ... "=back" may +map to several different constructs in your output format. For +example, in converting Pod to (X)HTML, it can map to any of +<ul>...</ul>, <ol>...</ol>, <dl>...</dl>, or +<blockquote>...</blockquote>. Similarly, "=item" can map to <li> or +<dt>. + +=item * + +Each "=over" ... "=back" region should be one of the following: + +=over + +=item * + +An "=over" ... "=back" region containing only "=item *" commands, +each followed by some number of ordinary/verbatim paragraphs, other +nested "=over" ... "=back" regions, "=for..." paragraphs, and +"=begin"..."=end" regions. + +(Pod processors must tolerate a bare "=item" as if it were "=item +*".) Whether "*" is rendered as a literal asterisk, an "o", or as +some kind of real bullet character, is left up to the Pod formatter, +and may depend on the level of nesting. + +=item * + +An "=over" ... "=back" region containing only +C<m/\A=item\s+\d+\.?\s*\z/> paragraphs, each one (or each group of them) +followed by some number of ordinary/verbatim paragraphs, other nested +"=over" ... "=back" regions, "=for..." paragraphs, and/or +"=begin"..."=end" codes. Note that the numbers must start at 1 +in each section, and must proceed in order and without skipping +numbers. + +(Pod processors must tolerate lines like "=item 1" as if they were +"=item 1.", with the period.) + +=item * + +An "=over" ... "=back" region containing only "=item [text]" +commands, each one (or each group of them) followed by some number of +ordinary/verbatim paragraphs, other nested "=over" ... "=back" +regions, or "=for..." paragraphs, and "=begin"..."=end" regions. + +The "=item [text]" paragraph should not match +C<m/\A=item\s+\d+\.?\s*\z/> or C<m/\A=item\s+\*\s*\z/>, nor should it +match just C<m/\A=item\s*\z/>. + +=item * + +An "=over" ... "=back" region containing no "=item" paragraphs at +all, and containing only some number of +ordinary/verbatim paragraphs, and possibly also some nested "=over" +... "=back" regions, "=for..." paragraphs, and "=begin"..."=end" +regions. Such an itemless "=over" ... "=back" region in Pod is +equivalent in meaning to a "<blockquote>...</blockquote>" element in +HTML. + +=back + +Note that with all the above cases, you can determine which type of +"=over" ... "=back" you have, by examining the first (non-"=cut", +non-"=pod") Pod paragraph after the "=over" command. + +=item * + +Pod formatters I<must> tolerate arbitrarily large amounts of text +in the "=item I<text...>" paragraph. In practice, most such +paragraphs are short, as in: + + =item For cutting off our trade with all parts of the world + +But they may be arbitrarily long: + + =item For transporting us beyond seas to be tried for pretended + offenses + + =item He is at this time transporting large armies of foreign + mercenaries to complete the works of death, desolation and + tyranny, already begun with circumstances of cruelty and perfidy + scarcely paralleled in the most barbarous ages, and totally + unworthy the head of a civilized nation. + +=item * + +Pod processors should tolerate "=item *" / "=item I<number>" commands +with no accompanying paragraph. The middle item is an example: + + =over + + =item 1 + + Pick up dry cleaning. + + =item 2 + + =item 3 + + Stop by the store. Get Abba Zabas, Stoli, and cheap lawn chairs. + + =back + +=item * + +No "=over" ... "=back" region can contain headings. Processors may +treat such a heading as an error. + +=item * + +Note that an "=over" ... "=back" region should have some +content. That is, authors should not have an empty region like this: + + =over + + =back + +Pod processors seeing such a contentless "=over" ... "=back" region, +may ignore it, or may report it as an error. + +=item * + +Processors must tolerate an "=over" list that goes off the end of the +document (i.e., which has no matching "=back"), but they may warn +about such a list. + +=item * + +Authors of Pod formatters should note that this construct: + + =item Neque + + =item Porro + + =item Quisquam Est + + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + =item Ut Enim + +is semantically ambiguous, in a way that makes formatting decisions +a bit difficult. On the one hand, it could be mention of an item +"Neque", mention of another item "Porro", and mention of another +item "Quisquam Est", with just the last one requiring the explanatory +paragraph "Qui dolorem ipsum quia dolor..."; and then an item +"Ut Enim". In that case, you'd want to format it like so: + + Neque + + Porro + + Quisquam Est + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +But it could equally well be a discussion of three (related or equivalent) +items, "Neque", "Porro", and "Quisquam Est", followed by a paragraph +explaining them all, and then a new item "Ut Enim". In that case, you'd +probably want to format it like so: + + Neque + Porro + Quisquam Est + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +But (for the forseeable future), Pod does not provide any way for Pod +authors to distinguish which grouping is meant by the above +"=item"-cluster structure. So formatters should format it like so: + + Neque + + Porro + + Quisquam Est + + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +That is, there should be (at least roughtly) equal spacing between +items as between paragraphs (although that spacing may well be less +than the full height of a line of text). This leaves it to the reader +to use (con)textual cues to figure out whether the "Qui dolorem +ipsum..." paragraph applies to the "Quisquam Est" item or to all three +items "Neque", "Porro", and "Quisquam Est". While not an ideal +situation, this is preferable to providing formatting cues that may +be actually contrary to the author's intent. + +=back + + + +=head1 About Data Paragraphs and "=begin/=end" Regions + +Data paragraphs are typically used for inlining non-Pod data that is +to be used (typically passed through) when rendering the document to +a specific format: + + =begin rtf + + \par{\pard\qr\sa4500{\i Printed\~\chdate\~\chtime}\par} + + =end rtf + +The exact same effect could, incidentally, be achieved with a single +"=for" paragraph: + + =for rtf \par{\pard\qr\sa4500{\i Printed\~\chdate\~\chtime}\par} + +(Although that is not formally a data paragraph, it has the same +meaning as one, and Pod parsers may parse it as one.) + +Another example of a data paragraph: + + =begin html + + I like <em>PIE</em>! + + <hr>Especially pecan pie! + + =end html + +If these were ordinary paragraphs, the Pod parser would try to +expand the "EE<lt>/em>" (in the first paragraph) as a formatting +code, just like "EE<lt>lt>" or "EE<lt>eacute>". But since this +is in a "=begin I<identifier>"..."=end I<identifier>" region I<and> +the identifier "html" doesn't begin have a ":" prefix, the contents +of this region are stored as data paragraphs, instead of being +processed as ordinary paragraphs (or if they began with a spaces +and/or tabs, as verbatim paragraphs). + +As a further example: At time of writing, no "biblio" identifier is +supported, but suppose some processor were written to recognize it as +a way of (say) denoting a bibliographic reference (necessarily +containing formatting codes in ordinary paragraphs). The fact that +"biblio" paragraphs were meant for ordinary processing would be +indicated by prefacing each "biblio" identifier with a colon: + + =begin :biblio + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =end :biblio + +This would signal to the parser that paragraphs in this begin...end +region are subject to normal handling as ordinary/verbatim paragraphs +(while still tagged as meant only for processors that understand the +"biblio" identifier). The same effect could be had with: + + =for :biblio + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + +The ":" on these identifiers means simply "process this stuff +normally, even though the result will be for some special target". +I suggest that parser APIs report "biblio" as the target identifier, +but also report that it had a ":" prefix. (And similarly, with the +above "html", report "html" as the target identifier, and note the +I<lack> of a ":" prefix.) + +Note that a "=begin I<identifier>"..."=end I<identifier>" region where +I<identifier> begins with a colon, I<can> contain commands. For example: + + =begin :biblio + + Wirth's classic is available in several editions, including: + + =for comment + hm, check abebooks.com for how much used copies cost. + + =over + + =item + + Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.> + Teubner, Stuttgart. [Yes, it's in German.] + + =item + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =back + + =end :biblio + +Note, however, a "=begin I<identifier>"..."=end I<identifier>" +region where I<identifier> does I<not> begin with a colon, should not +directly contain "=head1" ... "=head4" commands, nor "=over", nor "=back", +nor "=item". For example, this may be considered invalid: + + =begin somedata + + This is a data paragraph. + + =head1 Don't do this! + + This is a data paragraph too. + + =end somedata + +A Pod processor may signal that the above (specifically the "=head1" +paragraph) is an error. Note, however, that the following should +I<not> be treated as an error: + + =begin somedata + + This is a data paragraph. + + =cut + + # Yup, this isn't Pod anymore. + sub excl { (rand() > .5) ? "hoo!" : "hah!" } + + =pod + + This is a data paragraph too. + + =end somedata + +And this too is valid: + + =begin someformat + + This is a data paragraph. + + And this is a data paragraph. + + =begin someotherformat + + This is a data paragraph too. + + And this is a data paragraph too. + + =begin :yetanotherformat + + =head2 This is a command paragraph! + + This is an ordinary paragraph! + + And this is a verbatim paragraph! + + =end :yetanotherformat + + =end someotherformat + + Another data paragraph! + + =end someformat + +The contents of the above "=begin :yetanotherformat" ... +"=end :yetanotherformat" region I<aren't> data paragraphs, because +the immediately containing region's identifier (":yetanotherformat") +begins with a colon. In practice, most regions that contain +data paragraphs will contain I<only> data paragraphs; however, +the above nesting is syntactically valid as Pod, even if it is +rare. However, the handlers for some formats, like "html", +will accept only data paragraphs, not nested regions; and they may +complain if they see (targeted for them) nested regions, or commands, +other than "=end", "=pod", and "=cut". + +Also consider this valid structure: + + =begin :biblio + + Wirth's classic is available in several editions, including: + + =over + + =item + + Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.> + Teubner, Stuttgart. [Yes, it's in German.] + + =item + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =back + + Buy buy buy! + + =begin html + + <img src='wirth_spokesmodeling_book.png'> + + <hr> + + =end html + + Now now now! + + =end :biblio + +There, the "=begin html"..."=end html" region is nested inside +the larger "=begin :biblio"..."=end :biblio" region. Note that the +content of the "=begin html"..."=end html" region is data +paragraph(s), because the immediately containing region's identifier +("html") I<doesn't> begin with a colon. + +Pod parsers, when processing a series of data paragraphs one +after another (within a single region), should consider them to +be one large data paragraph that happens to contain blank lines. So +the content of the above "=begin html"..."=end html" I<may> be stored +as two data paragraphs (one consisting of +"<img src='wirth_spokesmodeling_book.png'>\n" +and another consisting of "<hr>\n"), but I<should> be stored as +a single data paragraph (consisting of +"<img src='wirth_spokesmodeling_book.png'>\n\n<hr>\n"). + +Pod processors should tolerate empty +"=begin I<something>"..."=end I<something>" regions, +empty "=begin :I<something>"..."=end :I<something>" regions, and +contentless "=for I<something>" and "=for :I<something>" +paragraphs. I.e., these should be tolerated: + + =for html + + =begin html + + =end html + + =begin :biblio + + =end :biblio + +Incidentally, note that there's no easy way to express a data +paragraph starting with something that looks like a command. Consider: + + =begin stuff + + =shazbot + + =end stuff + +There, "=shazbot" will be parsed as a Pod command "shazbot", not as a data +paragraph "=shazbot\n". However, you can express a data paragraph consisting +of "=shazbot\n" using this code: + + =for stuff =shazbot + +The situation where this is necessary, is presumably quite rare. + +Note that =end commands must match the currently open =begin command. That +is, they must properly nest. For example, this is valid: + + =begin outer + + X + + =begin inner + + Y + + =end inner + + Z + + =end outer + +while this is invalid: + + =begin outer + + X + + =begin inner + + Y + + =end outer + + Z + + =end inner + +This latter is improper because when the "=end outer" command is seen, the +currently open region has the formatname "inner", not "outer". (It just +happens that "outer" is the format name of a higher-up region.) This is +an error. Processors must by default report this as an error, and may halt +processing the document containing that error. A corrolary of this is that +regions cannot "overlap" -- i.e., the latter block above does not represent +a region called "outer" which contains X and Y, overlapping a region called +"inner" which contains Y and Z. But because it is invalid (as all +apparently overlapping regions would be), it doesn't represent that, or +anything at all. + +Similarly, this is invalid: + + =begin thing + + =end hting + +This is an error because the region is opened by "thing", and the "=end" +tries to close "hting" [sic]. + +This is also invalid: + + =begin thing + + =end + +This is invalid because every "=end" command must have a formatname +parameter. + +=head1 SEE ALSO + +L<perlpod>, L<perlsyn/"PODs: Embedded Documentation">, +L<podchecker> + +=head1 AUTHOR + +Sean M. Burke + +=cut + + diff --git a/pod/perltoc.pod b/pod/perltoc.pod index 3ad81d4f12..859c2a42ff 100644 --- a/pod/perltoc.pod +++ b/pod/perltoc.pod @@ -642,7 +642,7 @@ more elaborate constructs =back -=head2 perlpod - plain old documentation +=head2 perlpod - the Plain Old Documentation format =over 4 @@ -650,20 +650,73 @@ more elaborate constructs =over 4 +=item Ordinary Paragraph + =item Verbatim Paragraph =item Command Paragraph -=item Ordinary Block of Text +C<=head1 I<Heading Text>>, C<=head2 I<Heading Text>>, C<=head3 I<Heading +Text>>, C<=head4 I<Heading Text>>, C<=over I<indentlevel>>, C<=item +I<stuff...>>, C<=back>, C<=cut>, C<=pod>, C<=begin I<formatname>>, C<=end +I<formatname>>, C<=for I<formatname> I<text...>> + +=item Formatting Codes + +C<IE<lt>textE<gt>> -- italic text, C<BE<lt>textE<gt>> -- bold text, +C<CE<lt>codeE<gt>> -- code text, C<LE<lt>nameE<gt>> -- a hyperlink, +C<EE<lt>escapeE<gt>> -- a character escape, C<FE<lt>filenameE<gt>> -- used +for filenames, C<SE<lt>textE<gt>> -- text contains non-breaking spaces, +C<XE<lt>topic nameE<gt>> -- an index entry, C<ZE<lt>E<gt>> -- a null +(zero-effect) formatting code =item The Intent =item Embedding Pods in Perl Modules -=item Common Pod Pitfalls +=item Hints for Writing Pod + +=back + +=item SEE ALSO + +=item AUTHOR =back +=head2 perlpodspec - Plain Old Documentation: format specification and +notes + +=over 4 + +=item DESCRIPTION + +=item Pod Definitions + +=item Pod Commands + +"=head1", "=head2", "=head3", "=head4", "=pod", "=cut", "=over", "=item", +"=back", "=begin formatname", "=end formatname", "=for formatname text..." + +=item Pod Formatting Codes + +C<IE<lt>textE<gt>> -- italic text, C<BE<lt>textE<gt>> -- bold text, +C<CE<lt>codeE<gt>> -- code text, C<FE<lt>filenameE<gt>> -- style for +filenames, C<XE<lt>topic nameE<gt>> -- an index entry, C<ZE<lt>E<gt>> -- a +null (zero-effect) formatting code, C<LE<lt>nameE<gt>> -- a hyperlink, +C<EE<lt>escapeE<gt>> -- a character escape, C<SE<lt>textE<gt>> -- text +contains non-breaking spaces + +=item Notes on Implementing Pod Processors + +=item About LE<lt>...E<gt> Codes + +First:, Second:, Third:, Fourth:, Fifth:, Sixth: + +=item About =over...=back Regions + +=item About Data Paragraphs and "=begin/=end" Regions + =item SEE ALSO =item AUTHOR diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index 6bd0423c68..9b4d2e3eec 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -254,7 +254,8 @@ The following reserved ranges have C<In> tests: Plane 16 Private Use For example C<"\x{AC00}" =~ \p{HangulSyllable}> will test true. -(Handling of surrogates is not implemented yet.) +(Handling of surrogates is not implemented yet, because Perl +uses UTF-8 and not UTF-16 internally to represent Unicode.) Additionally, because scripts differ in their directionality (for example Hebrew is written right to left), all characters @@ -285,66 +286,66 @@ have their directionality defined: The scripts available for C<\p{In...}> and C<\P{In...}>, for example \p{InCyrillic>, are as follows, for example C<\p{InLatin}> or C<\P{InHan}>: - Latin - Greek - Cyrillic - Armenian - Hebrew Arabic - Syriac - Thaana - Devanagari + Armenian Bengali - Gurmukhi + Bopomofo + Canadian-Aboriginal + Cherokee + Cyrillic + Deseret + Devanagari + Ethiopic + Georgian + Gothic + Greek Gujarati - Oriya - Tamil - Telugu + Gurmukhi + Han + Hangul + Hebrew + Hiragana + Inherited Kannada - Malayalam - Sinhala - Thai + Katakana + Khmer Lao - Tibetan + Latin + Malayalam + Mongolian Myanmar - Georgian - Hangul - Ethiopic - Cherokee - Canadian Aboriginal Ogham + Old-Italic + Oriya Runic - Khmer - Mongolian - Hiragana - Katakana - Bopomofo - Han + Sinhala + Syriac + Tamil + Telugu + Thaana + Thai + Tibetan Yi - Old Italic - Gothic - Deseret - Inherited There are also extended property classes that supplement the basic properties, defined by the F<PropList> Unicode database: - White_space + ASCII_Hex_Digit Bidi_Control - Join_Control Dash - Hyphen - Quotation_Mark - Other_Math - Hex_Digit - ASCII_Hex_Digit - Other_Alphabetic - Ideographic Diacritic Extender + Hex_Digit + Hyphen + Ideographic + Join_Control + Noncharacter_Code_Point + Other_Alphabetic Other_Lowercase + Other_Math Other_Uppercase - Noncharacter_Code_Point + Quotation_Mark + White_space and further derived properties: @@ -365,11 +366,14 @@ and further derived properties: In addition to B<scripts>, Unicode also defines B<blocks> of characters. The difference between scripts and blocks is that the -former concept is closer to natural languages, while the latter +scripts concept is closer to natural languages, while the blocks concept is more an artificial grouping based on groups of 256 Unicode characters. For example, the C<Latin> script contains letters from -many blocks, but it does not contain all the characters from those -blocks, it does not for example contain digits. +many blocks. On the other hand, the C<Latin> script does not contain +all the characters from those blocks, it does not for example contain +digits because digits are shared across many scripts. Digits and +other similar groups, like punctuation, are in a category called +C<Common>. For more about scripts see the UTR #24: http://www.unicode.org/unicode/reports/tr24/ @@ -386,102 +390,102 @@ preferential Unicode character class definition; this meant that the definitions of some character classes changed (the ones in the below list that have the C<Block> appended). + Alphabetic Presentation Forms + Arabic Block + Arabic Presentation Forms-A + Arabic Presentation Forms-B + Armenian Block + Arrows Basic Latin - Latin 1 Supplement - Latin Extended-A - Latin Extended-B - IPA Extensions - Spacing Modifier Letters + Bengali Block + Block Elements + Bopomofo Block + Bopomofo Extended + Box Drawing + Braille Patterns + Byzantine Musical Symbols + CJK Compatibility + CJK Compatibility Forms + CJK Compatibility Ideographs + CJK Compatibility Ideographs Supplement + CJK Radicals Supplement + CJK Symbols and Punctuation + CJK Unified Ideographs + CJK Unified Ideographs Extension A + CJK Unified Ideographs Extension B + Cherokee Block Combining Diacritical Marks - Greek Block + Combining Half Marks + Combining Marks for Symbols + Control Pictures + Currency Symbols Cyrillic Block - Armenian Block - Hebrew Block - Arabic Block - Syriac Block - Thaana Block + Deseret Block Devanagari Block - Bengali Block - Gurmukhi Block - Gujarati Block - Oriya Block - Tamil Block - Telugu Block - Kannada Block - Malayalam Block - Sinhala Block - Thai Block - Lao Block - Tibetan Block - Myanmar Block + Dingbats + Enclosed Alphanumerics + Enclosed CJK Letters and Months + Ethiopic Block + General Punctuation + Geometric Shapes Georgian Block + Gothic Block + Greek Block + Greek Extended + Gujarati Block + Gurmukhi Block + Halfwidth and Fullwidth Forms + Hangul Compatibility Jamo Hangul Jamo - Ethiopic Block - Cherokee Block - Unified Canadian Aboriginal Syllabics - Ogham Block - Runic Block + Hangul Syllables + Hebrew Block + High Private Use Surrogates + High Surrogates + Hiragana Block + IPA Extensions + Ideographic Description Characters + Kanbun + Kangxi Radicals + Kannada Block + Katakana Block Khmer Block - Mongolian Block + Lao Block + Latin 1 Supplement Latin Extended Additional - Greek Extended - General Punctuation - Superscripts and Subscripts - Currency Symbols - Combining Marks for Symbols + Latin Extended-A + Latin Extended-B Letterlike Symbols - Number Forms - Arrows + Low Surrogates + Malayalam Block + Mathematical Alphanumeric Symbols Mathematical Operators + Miscellaneous Symbols Miscellaneous Technical - Control Pictures + Mongolian Block + Musical Symbols + Myanmar Block + Number Forms + Ogham Block + Old Italic Block Optical Character Recognition - Enclosed Alphanumerics - Box Drawing - Block Elements - Geometric Shapes - Miscellaneous Symbols - Dingbats - Braille Patterns - CJK Radicals Supplement - Kangxi Radicals - Ideographic Description Characters - CJK Symbols and Punctuation - Hiragana Block - Katakana Block - Bopomofo Block - Hangul Compatibility Jamo - Kanbun - Bopomofo Extended - Enclosed CJK Letters and Months - CJK Compatibility - CJK Unified Ideographs Extension A - CJK Unified Ideographs - Yi Syllables - Yi Radicals - Hangul Syllables - High Surrogates - High Private Use Surrogates - Low Surrogates + Oriya Block Private Use - CJK Compatibility Ideographs - Alphabetic Presentation Forms - Arabic Presentation Forms-A - Combining Half Marks - CJK Compatibility Forms + Runic Block + Sinhala Block Small Form Variants - Arabic Presentation Forms-B + Spacing Modifier Letters Specials - Halfwidth and Fullwidth Forms - Old Italic Block - Gothic Block - Deseret Block - Byzantine Musical Symbols - Musical Symbols - Mathematical Alphanumeric Symbols - CJK Unified Ideographs Extension B - CJK Compatibility Ideographs Supplement + Superscripts and Subscripts + Syriac Block Tags + Tamil Block + Telugu Block + Thaana Block + Thai Block + Tibetan Block + Unified Canadian Aboriginal Syllabics + Yi Radicals + Yi Syllables =item * @@ -548,15 +552,37 @@ wide bit complement. =item * -lc(), uc(), lcfirst(), and ucfirst() work only for some of the -simplest cases, where the mapping goes from a single Unicode character -to another single Unicode character, and where the mapping does not -depend on surrounding characters, or on locales. More complex cases, -where for example one character maps into several, are not yet -implemented. See the Unicode Technical Report #21, Case Mappings, -for more details. The Unicode::UCD module (part of Perl since 5.8.0) -casespec() and casefold() interfaces supply information about the more -complex cases. +lc(), uc(), lcfirst(), and ucfirst() work for the following cases: + +=over 8 + +=item * + +the case mapping is from a single Unicode character to another +single Unicode character + +=item * + +the case mapping is from a single Unicode character to more +than one Unicode character + +=back + +What doesn't yet work are the followng cases: + +=over 8 + +=item * + +the "final sigma" (Greek) + +=item * + +anything to with locales (Lithuanian, Turkish, Azeri) + +=back + +See the Unicode Technical Report #21, Case Mappings, for more details. =item * diff --git a/pod/perlvar.pod b/pod/perlvar.pod index bcd09fdadb..64fc7fd654 100644 --- a/pod/perlvar.pod +++ b/pod/perlvar.pod @@ -844,10 +844,9 @@ of perl in the right bracket?) Example: See also the documentation of C<use VERSION> and C<require VERSION> for a convenient way to fail if the running Perl interpreter is too old. -The use of this variable is deprecated. The floating point representation -can sometimes lead to inaccurate numeric comparisons. See C<$^V> for a -more modern representation of the Perl version that allows accurate string -comparisons. +The floating point representation can sometimes lead to inaccurate +numeric comparisons. See C<$^V> for a more modern representation of +the Perl version that allows accurate string comparisons. =item $COMPILING @@ -797,6 +797,7 @@ PERL_CALLCONV SV* Perl_swash_init(pTHX_ char* pkg, char* name, SV* listsv, I32 m PERL_CALLCONV UV Perl_swash_fetch(pTHX_ SV *sv, U8 *ptr, bool do_utf8); PERL_CALLCONV void Perl_taint_env(pTHX); PERL_CALLCONV void Perl_taint_proper(pTHX_ const char* f, const char* s); +PERL_CALLCONV UV Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swash, char *normal, char *special); PERL_CALLCONV UV Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp); PERL_CALLCONV UV Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp); PERL_CALLCONV UV Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp); diff --git a/sharedsv.c b/sharedsv.c index 2d347b8204..0deabb23ab 100644 --- a/sharedsv.c +++ b/sharedsv.c @@ -84,8 +84,17 @@ looking at magic, or by checking if it is tied again threads::shared. shared_sv * Perl_sharedsv_find(pTHX_ SV* sv) { - /* does all it can to find a shared_sv struct, returns NULL otherwise */ - shared_sv* ssv = NULL; + /* does all it can to find a shared_sv struct, returns NULL otherwise */ + shared_sv* ssv = NULL; + switch (SvTYPE(sv)) { + case SVt_PVMG: + {MAGIC* mg = mg_find(sv, PERL_MAGIC_ext); + + if(strcmp(mg->mg_ptr,"threads::shared")) + break; + ssv = (shared_sv*) SvIV(mg->mg_obj); + } + } return ssv; } @@ -164,9 +173,9 @@ Increments the threadcount of a sharedsv. void Perl_sharedsv_thrcnt_inc(pTHX_ shared_sv* ssv) { - SHAREDSvEDIT(ssv); + SHAREDSvLOCK(ssv); SvREFCNT_inc(ssv->sv); - SHAREDSvRELEASE(ssv); + SHAREDSvUNLOCK(ssv); } /* @@ -182,7 +191,7 @@ void Perl_sharedsv_thrcnt_dec(pTHX_ shared_sv* ssv) { SV* sv; - SHAREDSvEDIT(ssv); + SHAREDSvLOCK(ssv); sv = SHAREDSvGET(ssv); if (SvREFCNT(sv) == 1) { switch (SvTYPE(sv)) { @@ -211,8 +220,8 @@ Perl_sharedsv_thrcnt_dec(pTHX_ shared_sv* ssv) } } } - SvREFCNT_dec(sv); - SHAREDSvRELEASE(ssv); + Perl_sv_free(PL_sharedsv_space,SHAREDSvGET(ssv)); + SHAREDSvUNLOCK(ssv); } #endif /* USE_ITHREADS */ diff --git a/t/base/rs.t b/t/base/rs.t index e470f3a30c..306d646e6d 100755 --- a/t/base/rs.t +++ b/t/base/rs.t @@ -1,7 +1,7 @@ #!./perl # Test $! -print "1..14\n"; +print "1..16\n"; $teststring = "1\n12\n123\n1234\n1234\n12345\n\n123456\n1234567\n"; @@ -86,9 +86,7 @@ $/ = \$foo; $bar = <TESTFILE>; if ($bar eq "78") {print "ok 10\n";} else {print "not ok 10\n";} -# Get rid of the temp file close TESTFILE; -unlink "./foo"; # Now for the tricky bit--full record reading if ($^O eq 'VMS') { @@ -130,3 +128,35 @@ if ($^O eq 'VMS') { # put their own tests in) so we just punt foreach $test (11..14) {print "ok $test # skipped on non-VMS system\n"}; } + +$/ = "\n"; + +# see if open/readline/close work on our and my variables +{ + if (open our $T, "./foo") { + my $line = <$T>; + print "# $line\n"; + length($line) == 40 or print "not "; + close $T or print "not "; + } + else { + print "not "; + } + print "ok 15\n"; +} + +{ + if (open my $T, "./foo") { + my $line = <$T>; + print "# $line\n"; + length($line) == 40 or print "not "; + close $T or print "not "; + } + else { + print "not "; + } + print "ok 16\n"; +} + +# Get rid of the temp file +END { unlink "./foo"; } diff --git a/t/lib/strict/vars b/t/lib/strict/vars index 40b55572b8..f7f8a1cf0d 100644 --- a/t/lib/strict/vars +++ b/t/lib/strict/vars @@ -399,6 +399,20 @@ EXPECT Name "Foo::foo" used only once: possible typo at - line 11. ######## +--FILE-- abc +ok +--FILE-- +# check if our variables are introduced correctly in readline() +package Foo; +use strict 'vars'; +our $FH; +open $FH, "abc" or die "Can't open 'abc': $!"; +print <$FH>; +close $FH; +EXPECT +ok +######## + # Make sure the strict vars failure still occurs # now that the `@i should be written as \@i' failure does not occur # 20000522 mjd@plover.com (MJD) @@ -1,59 +1,93 @@ #!./perl -print "1..40\n"; +print "1..42\n"; + +my $test = 1; + +sub ok { + if ($_[0]) { + if ($_[1]) { + print "ok $test - $_[1]\n"; + } else { + print "ok $test\n"; + } + } else { + if ($_[1]) { + print "not ok $test - $_[1]\n"; + } else { + print "not ok $test\n"; + } + } + $test++; +} $a = "HELLO.* world"; $b = "hello.* WORLD"; -print "ok 1\n" if "\Q$a\E." eq "HELLO\\.\\*\\ world."; -print "ok 2\n" if "\u$a" eq "HELLO\.\* world"; -print "ok 3\n" if "\l$a" eq "hELLO\.\* world"; -print "ok 4\n" if "\U$a" eq "HELLO\.\* WORLD"; -print "ok 5\n" if "\L$a" eq "hello\.\* world"; - -print "ok 6\n" if quotemeta($a) eq "HELLO\\.\\*\\ world"; -print "ok 7\n" if ucfirst($a) eq "HELLO\.\* world"; -print "ok 8\n" if lcfirst($a) eq "hELLO\.\* world"; -print "ok 9\n" if uc($a) eq "HELLO\.\* WORLD"; -print "ok 10\n" if lc($a) eq "hello\.\* world"; - -print "ok 11\n" if "\Q$b\E." eq "hello\\.\\*\\ WORLD."; -print "ok 12\n" if "\u$b" eq "Hello\.\* WORLD"; -print "ok 13\n" if "\l$b" eq "hello\.\* WORLD"; -print "ok 14\n" if "\U$b" eq "HELLO\.\* WORLD"; -print "ok 15\n" if "\L$b" eq "hello\.\* world"; - -print "ok 16\n" if quotemeta($b) eq "hello\\.\\*\\ WORLD"; -print "ok 17\n" if ucfirst($b) eq "Hello\.\* WORLD"; -print "ok 18\n" if lcfirst($b) eq "hello\.\* WORLD"; -print "ok 19\n" if uc($b) eq "HELLO\.\* WORLD"; -print "ok 20\n" if lc($b) eq "hello\.\* world"; +ok("\Q$a\E." eq "HELLO\\.\\*\\ world.", '\Q\E HELLO.* world'); +ok("\u$a" eq "HELLO\.\* world", '\u'); +ok("\l$a" eq "hELLO\.\* world", '\l'); +ok("\U$a" eq "HELLO\.\* WORLD", '\U'); +ok("\L$a" eq "hello\.\* world", '\L'); + +ok(quotemeta($a) eq "HELLO\\.\\*\\ world", 'quotemeta'); +ok(ucfirst($a) eq "HELLO\.\* world", 'ucfirst'); +ok(lcfirst($a) eq "hELLO\.\* world", 'lcfirst'); +ok(uc($a) eq "HELLO\.\* WORLD", 'uc'); +ok(lc($a) eq "hello\.\* world", 'lc'); + +ok("\Q$b\E." eq "hello\\.\\*\\ WORLD.", '\Q\E hello.* WORLD'); +ok("\u$b" eq "Hello\.\* WORLD", '\u'); +ok("\l$b" eq "hello\.\* WORLD", '\l'); +ok("\U$b" eq "HELLO\.\* WORLD", '\U'); +ok("\L$b" eq "hello\.\* world", '\L'); + +ok(quotemeta($b) eq "hello\\.\\*\\ WORLD", 'quotemeta'); +ok(ucfirst($b) eq "Hello\.\* WORLD", 'ucfirst'); +ok(lcfirst($b) eq "hello\.\* WORLD", 'lcfirst'); +ok(uc($b) eq "HELLO\.\* WORLD", 'uc'); +ok(lc($b) eq "hello\.\* world", 'lc'); + +# \x{100} is LATIN CAPITAL LETTER A WITH MACRON; its bijective lowercase is +# \x{100}, LATIN SMALL LETTER A WITH MACRON. $a = "\x{100}\x{101}\x{41}\x{61}"; $b = "\x{101}\x{100}\x{61}\x{41}"; -print "ok 21\n" if "\Q$a\E." eq "\x{100}\x{101}\x{41}\x{61}."; -print "ok 22\n" if "\u$a" eq "\x{100}\x{101}\x{41}\x{61}"; -print "ok 23\n" if "\l$a" eq "\x{101}\x{101}\x{41}\x{61}"; -print "ok 24\n" if "\U$a" eq "\x{100}\x{100}\x{41}\x{41}"; -print "ok 25\n" if "\L$a" eq "\x{101}\x{101}\x{61}\x{61}"; - -print "ok 26\n" if quotemeta($a) eq "\x{100}\x{101}\x{41}\x{61}"; -print "ok 27\n" if ucfirst($a) eq "\x{100}\x{101}\x{41}\x{61}"; -print "ok 28\n" if lcfirst($a) eq "\x{101}\x{101}\x{41}\x{61}"; -print "ok 29\n" if uc($a) eq "\x{100}\x{100}\x{41}\x{41}"; -print "ok 30\n" if lc($a) eq "\x{101}\x{101}\x{61}\x{61}"; - -print "ok 31\n" if "\Q$b\E." eq "\x{101}\x{100}\x{61}\x{41}."; -print "ok 32\n" if "\u$b" eq "\x{100}\x{100}\x{61}\x{41}"; -print "ok 33\n" if "\l$b" eq "\x{101}\x{100}\x{61}\x{41}"; -print "ok 34\n" if "\U$b" eq "\x{100}\x{100}\x{41}\x{41}"; -print "ok 35\n" if "\L$b" eq "\x{101}\x{101}\x{61}\x{61}"; - -print "ok 36\n" if quotemeta($b) eq "\x{101}\x{100}\x{61}\x{41}"; -print "ok 37\n" if ucfirst($b) eq "\x{100}\x{100}\x{61}\x{41}"; -print "ok 38\n" if lcfirst($b) eq "\x{101}\x{100}\x{61}\x{41}"; -print "ok 39\n" if uc($b) eq "\x{100}\x{100}\x{41}\x{41}"; -print "ok 40\n" if lc($b) eq "\x{101}\x{101}\x{61}\x{61}"; +ok("\Q$a\E." eq "\x{100}\x{101}\x{41}\x{61}.", '\Q\E \x{100}\x{101}\x{41}\x{61}'); +ok("\u$a" eq "\x{100}\x{101}\x{41}\x{61}", '\u'); +ok("\l$a" eq "\x{101}\x{101}\x{41}\x{61}", '\l'); +ok("\U$a" eq "\x{100}\x{100}\x{41}\x{41}", '\U'); +ok("\L$a" eq "\x{101}\x{101}\x{61}\x{61}", '\L'); + +ok(quotemeta($a) eq "\x{100}\x{101}\x{41}\x{61}", 'quotemeta'); +ok(ucfirst($a) eq "\x{100}\x{101}\x{41}\x{61}", 'ucfirst'); +ok(lcfirst($a) eq "\x{101}\x{101}\x{41}\x{61}", 'lcfirst'); +ok(uc($a) eq "\x{100}\x{100}\x{41}\x{41}", 'uc'); +ok(lc($a) eq "\x{101}\x{101}\x{61}\x{61}", 'lc'); + +ok("\Q$b\E." eq "\x{101}\x{100}\x{61}\x{41}.", '\Q\E \x{101}\x{100}\x{61}\x{41}'); +ok("\u$b" eq "\x{100}\x{100}\x{61}\x{41}", '\u'); +ok("\l$b" eq "\x{101}\x{100}\x{61}\x{41}", '\l'); +ok("\U$b" eq "\x{100}\x{100}\x{41}\x{41}", '\U'); +ok("\L$b" eq "\x{101}\x{101}\x{61}\x{61}", '\L'); + +ok(quotemeta($b) eq "\x{101}\x{100}\x{61}\x{41}", 'quotemeta'); +ok(ucfirst($b) eq "\x{100}\x{100}\x{61}\x{41}", 'ucfirst'); +ok(lcfirst($b) eq "\x{101}\x{100}\x{61}\x{41}", 'lcfirst'); +ok(uc($b) eq "\x{100}\x{100}\x{41}\x{41}", 'uc'); +ok(lc($b) eq "\x{101}\x{101}\x{61}\x{61}", 'lc'); + +# \x{DF} is LATIN SMALL LETTER SHARP S, its uppercase is SS or \x{53}\x{53}; +# \x{149} is LATIN SMALL LETTER N PRECEDED BY APOSTROPHE, its uppercase is +# \x{2BC}\x{E4} or MODIFIER LETTER APOSTROPHE and N. + +ok("\U\x{DF}ab\x{149}cd" eq "\x{53}\x{53}AB\x{2BC}\x{4E}CD", + "multicharacter uppercase"); + +# The \x{DF} is its own lowercase, ditto for \x{149}. +# There are no single character -> multiple characters lowercase mappings. +ok("\L\x{DF}AB\x{149}CD" eq "\x{DF}ab\x{149}cd", + "multicharacter lowercase"); @@ -6646,12 +6646,29 @@ S_scan_inputsymbol(pTHX_ char *start) add symbol table ops */ if ((tmp = pad_findmy(d)) != NOT_IN_PAD) { - OP *o = newOP(OP_PADSV, 0); - o->op_targ = tmp; - PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, o); + SV *namesv = AvARRAY(PL_comppad_name)[tmp]; + if (SvFLAGS(namesv) & SVpad_OUR) { + SV *sym = sv_2mortal(newSVpv(HvNAME(GvSTASH(namesv)),0)); + sv_catpvn(sym, "::", 2); + sv_catpv(sym, d+1); + d = SvPVX(sym); + goto intro_sym; + } + else { + OP *o = newOP(OP_PADSV, 0); + o->op_targ = tmp; + PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, o); + } } else { - GV *gv = gv_fetchpv(d+1,TRUE, SVt_PV); + GV *gv; + ++d; +intro_sym: + gv = gv_fetchpv(d, + (PL_in_eval + ? (GV_ADDMULTI | GV_ADDINEVAL) + : TRUE), + SVt_PV); PL_lex_op = (OP*)newUNOP(OP_READLINE, 0, newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv))); @@ -1181,45 +1181,63 @@ Perl_is_utf8_mark(pTHX_ U8 *p) } UV -Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) +Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal, char *special) { UV uv; - if (!PL_utf8_toupper) - PL_utf8_toupper = swash_init("utf8", "ToUpper", &PL_sv_undef, 4, 0); - uv = swash_fetch(PL_utf8_toupper, p, TRUE); - uv = uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p, 0); + if (!*swashp) + *swashp = swash_init("utf8", normal, &PL_sv_undef, 4, 0); + uv = swash_fetch(*swashp, p, TRUE); + if (uv) + uv = UNI_TO_NATIVE(uv); + else { + HV *hv; + SV *keysv; + HE *he; + + uv = utf8_to_uvchr(p, 0); + + if ((hv = get_hv(special, FALSE)) && + (keysv = sv_2mortal(Perl_newSVpvf(aTHX_ "%"UVuf, uv))) && + (he = hv_fetch_ent(hv, keysv, FALSE, 0))) { + SV *val = HeVAL(he); + char *s = SvPV(val, *lenp); + U8 c = *(U8*)s; + if (*lenp > 1 || UNI_IS_INVARIANT(c)) + Copy(s, ustrp, *lenp, U8); + else { + /* something in the 0x80..0xFF range */ + ustrp[0] = UTF8_EIGHT_BIT_HI(c); + ustrp[1] = UTF8_EIGHT_BIT_LO(c); + *lenp = 2; + } + return 0; + } + } *lenp = UNISKIP(uv); uvuni_to_utf8(ustrp, uv); return uv; } UV -Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) +Perl_to_utf8_upper(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { - UV uv; + return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, + &PL_utf8_toupper, "ToUpper", "utf8::ToSpecUpper"); +} - if (!PL_utf8_totitle) - PL_utf8_totitle = swash_init("utf8", "ToTitle", &PL_sv_undef, 4, 0); - uv = swash_fetch(PL_utf8_totitle, p, TRUE); - uv = uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p, 0); - *lenp = UNISKIP(uv); - uvuni_to_utf8(ustrp, uv); - return uv; +UV +Perl_to_utf8_title(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) +{ + return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, + &PL_utf8_totitle, "ToTitle", "utf8::ToSpecTitle"); } UV Perl_to_utf8_lower(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp) { - UV uv; - - if (!PL_utf8_tolower) - PL_utf8_tolower = swash_init("utf8", "ToLower", &PL_sv_undef, 4, 0); - uv = swash_fetch(PL_utf8_tolower, p, TRUE); - uv = uv ? UNI_TO_NATIVE(uv) : utf8_to_uvchr(p, 0); - *lenp = UNISKIP(uv); - uvuni_to_utf8(ustrp, uv); - return uv; + return Perl_to_utf8_case(aTHX_ p, ustrp, lenp, + &PL_utf8_tolower, "ToLower", "utf8::ToSpecLower"); } /* a "swash" is a swatch hash */ diff --git a/win32/bin/pl2bat.pl b/win32/bin/pl2bat.pl index cdbac6f273..6e0f166a1d 100644 --- a/win32/bin/pl2bat.pl +++ b/win32/bin/pl2bat.pl @@ -13,7 +13,7 @@ Usage: $0 [-h] or: $0 [-w] [-u] [-n ntargs] [-o otherargs] [-s stripsuffix] [files] -n ntargs arguments to invoke perl with in generated file when run from Windows NT. Defaults to - '-x -S "%0" %*'. + '-x -S %0 %*'. -o otherargs arguments to invoke perl with in generated file other than when run from Windows NT. Defaults to '-x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9'. @@ -33,7 +33,8 @@ EOT my %OPT = (); warn($usage), exit(0) if !getopts('whun:o:a:s:',\%OPT) or $OPT{'h'}; -$OPT{'n'} = '-x -S "%0" %*' unless exists $OPT{'n'}; +# NOTE: %0 is already enclosed in doublequotes by cmd.exe, as appropriate +$OPT{'n'} = '-x -S %0 %*' unless exists $OPT{'n'}; $OPT{'o'} = '-x -S "%0" %1 %2 %3 %4 %5 %6 %7 %8 %9' unless exists $OPT{'o'}; $OPT{'s'} = '/\\.plx?/' unless exists $OPT{'s'}; $OPT{'s'} = ($OPT{'s'} =~ m#^/([^/]*[^/\$]|)\$?/?$# ? $1 : "\Q$OPT{'s'}\E"); @@ -316,7 +317,7 @@ deprecated C<-a> option. =item B<-n> I<ntargs> Arguments to invoke perl with in generated batch file when run from -Windows NT (or Windows 98, probably). Defaults to S<'-x -S "%0" %*'>. +Windows NT (or Windows 98, probably). Defaults to S<'-x -S %0 %*'>. =item B<-o> I<otherargs> diff --git a/win32/win32.c b/win32/win32.c index 19bb1dc2e5..3ebf182b91 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -605,12 +605,27 @@ do_spawn2(char *cmd, int exectype) strcpy(cmd2, cmd); a = argv; for (s = cmd2; *s;) { + bool in_quotes = FALSE; while (*s && isSPACE(*s)) s++; if (*s) *(a++) = s; - while (*s && !isSPACE(*s)) - s++; + while (*s) { + /* ignore doubled backslashes, or backslash+quote */ + if (*s == '\\' && (s[1] == '\\' || s[1] == '"')) { + s += 2; + } + /* keep track of when we're within quotes */ + else if (*s == '"') { + s++; + in_quotes = !in_quotes; + } + /* break it up only at spaces that aren't in quotes */ + else if (!in_quotes && isSPACE(*s)) + break; + else + s++; + } if (*s) *s++ = '\0'; } @@ -3047,26 +3062,94 @@ win32_chmod(const char *path, int mode) static char * -create_command_line(const char* command, const char * const *args) +create_command_line(const char * const *args) { dTHX; - int index; - char *cmd, *ptr, *arg; - STRLEN len = strlen(command) + 1; + int index, argc; + char *cmd, *ptr; + const char *arg; + STRLEN len = 0; + bool cmd_shell = FALSE; + bool extra_quotes = FALSE; + + /* The NT cmd.exe shell has the following peculiarity that needs to be + * worked around. It strips a leading and trailing dquote when any + * of the following is true: + * 1. the /S switch was used + * 2. there are more than two dquotes + * 3. there is a special character from this set: &<>()@^| + * 4. no whitespace characters within the two dquotes + * 5. string between two dquotes isn't an executable file + * To work around this, we always add a leading and trailing dquote + * to the string, if the first argument is either "cmd.exe" or "cmd", + * and there were at least two or more arguments passed to cmd.exe + * (not including switches). + */ + if (args[0] + && (stricmp(args[0], "cmd.exe") == 0 + || stricmp(args[0], "cmd") == 0)) + { + cmd_shell = TRUE; + len += 3; + } - for (index = 0; (ptr = (char*)args[index]) != NULL; ++index) - len += strlen(ptr) + 1; + DEBUG_p(PerlIO_printf(Perl_debug_log, "Args ")); + for (index = 0; (arg = (char*)args[index]) != NULL; ++index) { + STRLEN curlen = strlen(arg); + if (!(arg[0] == '"' && arg[curlen-1] == '"')) + len += 2; /* assume quoting needed (worst case) */ + len += curlen + 1; + DEBUG_p(PerlIO_printf(Perl_debug_log, "[%s]",arg)); + } + DEBUG_p(PerlIO_printf(Perl_debug_log, "\n")); + argc = index; New(1310, cmd, len, char); ptr = cmd; - strcpy(ptr, command); for (index = 0; (arg = (char*)args[index]) != NULL; ++index) { - ptr += strlen(ptr); - *ptr++ = ' '; + bool do_quote = 0; + STRLEN curlen = strlen(arg); + + /* we want to protect arguments with spaces with dquotes, + * but only if they aren't already there */ + if (!(arg[0] == '"' && arg[curlen-1] == '"')) { + STRLEN i = 0; + while (i < curlen) { + if (isSPACE(arg[i])) { + do_quote = 1; + break; + } + i++; + } + } + + if (do_quote) + *ptr++ = '"'; + strcpy(ptr, arg); + ptr += curlen; + + if (do_quote) + *ptr++ = '"'; + + if (args[index+1]) + *ptr++ = ' '; + + if (cmd_shell && !extra_quotes + && (stricmp(arg, "/x/c") == 0 || stricmp(arg, "/c") == 0) + && (argc-1 > index+1)) /* two or more arguments to cmd.exe? */ + { + *ptr++ = '"'; + extra_quotes = TRUE; + } } + if (extra_quotes) + *ptr++ = '"'; + + *ptr = '\0'; + return cmd; } @@ -3229,8 +3312,7 @@ win32_spawnvp(int mode, const char *cmdname, const char *const *argv) PROCESS_INFORMATION ProcessInformation; DWORD create = 0; - char *cmd = create_command_line(cmdname, strcmp(cmdname, argv[0]) == 0 - ? &argv[1] : argv); + char *cmd = create_command_line(argv); char *fullcmd = Nullch; env = PerlEnv_get_childenv(); @@ -3277,6 +3359,8 @@ win32_spawnvp(int mode, const char *cmdname, const char *const *argv) create |= CREATE_NEW_CONSOLE; } + DEBUG_p(PerlIO_printf(Perl_debug_log, "Spawning [%s] with [%s]\n", + cmdname,cmd)); RETRY: if (!CreateProcess(cmdname, /* search PATH to find executable */ cmd, /* executable, and its arguments */ @@ -3299,6 +3383,9 @@ RETRY: fullcmd = qualified_path(cmdname); if (fullcmd) { cmdname = fullcmd; + DEBUG_p(PerlIO_printf(Perl_debug_log, + "Retrying [%s] with same args\n", + cmdname)); goto RETRY; } } |