summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorCharles Bailey <bailey@newman.upenn.edu>2000-01-20 00:25:30 +0000
committerbailey <bailey@newman.upenn.edu>2000-01-20 00:25:30 +0000
commit146174a91a192983720a158796dc066226ad0e55 (patch)
tree6e7f2035aae6d864035ea40ad9821bab4d0bad93 /lib
parentc529f79d594c53d3968d464c57ac24a21137dd09 (diff)
downloadperl-146174a91a192983720a158796dc066226ad0e55.tar.gz
Quick integration of mainline changes to date
p4raw-id: //depot/vmsperl@4821 p4raw-branched: from //depot/perl@4249 'branch in' eg/cgi/dna_small_gif.uu eg/cgi/wilogo_gif.uu epoc/config.sh epoc/epocish.c epoc/link.pl ext/DB_File/hints/sco.pl ext/DynaLoader/XSLoader_pm.PL ext/DynaLoader/hints/aix.pl ext/DynaLoader/hints/openbsd.pl ext/File/Glob/Makefile.PL ext/File/Glob/TODO ext/IPC/SysV/hints/cygwin.pl ext/NDBM_File/hints/cygwin.pl ext/NDBM_File/hints/sco.pl ext/ODBM_File/hints/cygwin.pl lib/byte.pm lib/byte_heavy.pl lib/unicode/Jamo.txt lib/unicode/NamesList.html lib/unicode/UCD300.html lib/unicode/Unicode.300 lib/unicode/Unicode3.html os2/OS2/REXX/DLL/Changes os2/OS2/REXX/DLL/DLL.pm os2/OS2/REXX/DLL/DLL.xs os2/OS2/REXX/DLL/MANIFEST os2/OS2/REXX/DLL/Makefile.PL os2/OS2/REXX/t/rx_emxrv.t t/lib/glob-case.t t/lib/glob-taint.t t/pod/multiline_items.xr t/pod/pod2usage.xr t/pod/podselect.xr win32/vmem.h t/pod/multiline_items.t t/pod/pod2usage.t t/pod/podselect.t (@4280..) pod/perlhack.pod (@4340..) ext/File/Glob/Changes ext/File/Glob/Glob.xs t/lib/glob-global.t (@4356..) t/lib/glob-basic.t (@4393..) lib/Pod/Man.pm (@4404..) pod/perlfilter.pod (@4406..) t/io/nargv.t (@4503..) ext/File/Glob/bsd_glob.c ext/File/Glob/bsd_glob.h (@4514..) epoc/createpkg.pl epoc/epoc_stubs.c (@4556..) lib/unicode/Eq/Latin1.pl lib/unicode/Eq/Unicode.pl lib/unicode/In/BopomofoExtended.pl lib/unicode/In/BraillePatterns.pl lib/unicode/In/CJKRadicalsSupplement.pl lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl lib/unicode/In/Cherokee.pl lib/unicode/In/IdeographicDescriptionCharacters.pl lib/unicode/In/KangxiRadicals.pl lib/unicode/In/Khmer.pl lib/unicode/In/Mongolian.pl lib/unicode/In/Myanmar.pl lib/unicode/In/Ogham.pl lib/unicode/In/Runic.pl lib/unicode/In/Sinhala.pl lib/unicode/In/Syriac.pl lib/unicode/In/Thaana.pl lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl lib/unicode/In/YiRadicals.pl lib/unicode/In/YiSyllables.pl (@4573..) pod/perlfork.pod (@4602..) ext/File/Glob/Glob.pm (@4615..) win32/genmk95.pl (@4653..) win32/vdir.h (@4702..) win32/perlhost.h (@4789..) p4raw-deleted: from //depot/perl@4249 'delete in' eg/cgi/dna.small.gif.uu eg/cgi/wilogo.gif.uu (@2830..) os2/POSIX.mkfifo (@3518..) lib/warning.pm warning.h (@4008..) lib/unicode/Jamo-2.txt lib/unicode/UnicodeData-Latest.txt (@4184..) lib/unicode/Unicode.html (@4209..) lib/unicode/Eq/Latin1 lib/unicode/Eq/Unicode (@4228..) lib/Pod/PlainText.pm (@4280..) ext/DynaLoader/dl_cygwin.xs (@4302..) epoc/config.h (@4475..) epoc/perl.mmp epoc/perl.pkg (@4556..) p4raw-integrated: from //depot/perl@4249 'copy in' ext/B/NOTES ext/B/ramblings/runtime.porting (@562..) hints/amigaos.sh (@575..) lib/Net/Ping.pm (@854..) lib/strict.pm (@988..) ext/Thread/Thread/Queue.pm (@1085..) ext/Thread/Thread/Semaphore.pm (@1086..) lib/ExtUtils/Installed.pm (@1315..) plan9/plan9ish.h (@1451..) mpeix/mpeixish.h (@1478..) Porting/p4d2p (@1485..) ext/ODBM_File/hints/sco.pl ext/Thread/sync.t ext/Thread/sync2.t hints/lynxos.sh lib/Text/Tabs.pm os2/OS2/REXX/Changes os2/OS2/REXX/t/rx_dllld.t os2/OS2/REXX/t/rx_objcall.t os2/OS2/REXX/t/rx_tievar.t os2/OS2/REXX/t/rx_tieydb.t os2/OS2/REXX/t/rx_vrexx.t os2/dl_os2.c t/comp/term.t t/io/print.t t/op/glob.t util.h win32/bin/perlglob.pl (@1575..) ext/B/O.pm (@1617..) ext/Thread/typemap lib/File/DosGlob.pm (@1760..) t/op/substr.t (@1780..) vos/vosish.h (@1838..) lib/ExtUtils/Mkbootstrap.pm (@1932..) Porting/genlog (@1978..) lib/constant.pm (@2029..) t/op/array.t (@2210..) lib/Math/Complex.pm (@2219..) hints/dynixptx.sh (@2318..) ext/IO/IO.pm (@2354..) hints/mint.sh lib/Tie/Array.pm lib/Tie/Hash.pm (@2620..) os2/Changes (@2695..) globvar.sym (@2746..) t/comp/bproto.t (@2817..) lib/DB.pm (@2820..) hints/mpeix.sh lib/FindBin.pm (@2830..) ext/IO/lib/IO/Select.pm (@2882..) t/lib/english.t (@2891..) t/op/subst.t (@2892..) t/op/range.t (@2923..) pod/perl5005delta.pod (@2929..) hints/next_3.sh hints/next_3_0.sh (@3023..) lib/Getopt/Std.pm (@3034..) lib/File/Spec.pm (@3042..) t/pragma/warn/1global (@3096..) t/pod/emptycmd.t t/pod/for.t t/pod/for.xr t/pod/headings.t t/pod/headings.xr t/pod/include.t t/pod/include.xr t/pod/included.t t/pod/included.xr t/pod/lref.t t/pod/lref.xr t/pod/nested_items.t t/pod/nested_items.xr t/pod/nested_seqs.t t/pod/nested_seqs.xr t/pod/oneline_cmds.t t/pod/oneline_cmds.xr t/pod/testcmp.pl (@3129..) README.hurd (@3148..) ext/re/re.pm (@3152..) lib/ExtUtils/Liblist.pm t/comp/require.t (@3153..) Porting/p4desc (@3183..) Porting/pumpkin.pod hints/linux.sh myconfig.SH (@3267..) t/op/readdir.t (@3299..) t/pod/special_seqs.t t/pod/special_seqs.xr (@3304..) t/lib/fields.t (@3335..) t/op/taint.t (@3357..) lib/File/Copy.pm (@3362..) installhtml (@3371..) ext/Socket/Socket.pm (@3391..) t/lib/ipc_sysv.t t/op/nothread.t (@3399..) lib/CPAN/FirstTime.pm (@3458..) pod/perlfaq8.pod (@3459..) pod/perlcall.pod pod/perlipc.pod pod/perltie.pod pod/perlxs.pod (@3460..) t/pragma/strict-subs (@3514..) ext/ByteLoader/ByteLoader.pm lib/Math/BigFloat.pm (@3516..) x2p/walk.c (@3518..) win32/win32thread.c win32/win32thread.h (@3525..) os2/OS2/REXX/REXX.xs (@3531..) utf8.h (@3537..) lib/ExtUtils/Embed.pm (@3553..) ext/Thread/Thread/Specific.pm (@3564..) ext/POSIX/Makefile.PL lib/Cwd.pm (@3582..) hv.h (@3602..) ext/NDBM_File/NDBM_File.pm ext/ODBM_File/ODBM_File.pm ext/SDBM_File/SDBM_File.pm (@3603..) lib/Sys/Hostname.pm (@3631..) os2/os2.c (@3640..) emacs/ptags miniperlmain.c (@3660..) ext/IO/IO.xs win32/config_h.PL win32/dl_win32.xs win32/runperl.c win32/win32sck.c (@3667..) pod/Win32.pod pod/perlfaq4.pod pod/perltodo.pod (@3676..) lib/vars.pm (@3686..) lib/ExtUtils/Manifest.pm (@3693..) hints/README.hints hints/epix.sh hints/esix4.sh hints/next_4.sh (@3753..) ext/GDBM_File/GDBM_File.pm lib/CPAN.pm pod/perllocale.pod (@3754..) lib/bigfloat.pl (@3759..) lib/Pod/Text/Color.pm lib/Pod/Text/Termcap.pm pod/pod2text.PL (@3788..) ext/POSIX/POSIX.pm lib/AutoLoader.pm (@3794..) Porting/makerel (@3797..) t/lib/io_unix.t (@3825..) EXTERN.h Porting/patchls ext/SDBM_File/sdbm/pair.c makedepend.SH (@3852..) lib/File/Spec/Unix.pm unixish.h (@3855..) lib/ExtUtils/Mksymlists.pm (@3856..) t/pragma/utf8.t (@3892..) README (@3901..) t/op/eval.t (@3988..) MAINTAIN ext/B/B/Stash.pm ext/ByteLoader/ByteLoader.xs ext/Fcntl/Fcntl.xs ext/SDBM_File/Makefile.PL lib/Math/Trig.pm os2/OS2/REXX/Makefile.PL perlsdio.h regnodes.h utils/perlbug.PL (@4008..) epoc/epoc.c pod/perltoc.pod pod/perlvar.pod regexp.h t/lib/attrs.t t/op/time.t t/pragma/warn/2use t/pragma/warn/3both t/pragma/warn/7fatal universal.c warnings.h warnings.pl (@4076..) ext/Opcode/Opcode.pm ext/attrs/attrs.xs t/pragma/warn/pp_ctl (@4081..) t/pragma/warn/pp_sys (@4088..) t/pragma/sub_lval.t (@4090..) t/TEST (@4092..) xsutils.c (@4101..) pod/buildtoc (@4120..) djgpp/config.over djgpp/djgppsed.sh pod/pod2usage.PL pod/podselect.PL (@4121..) lib/Pod/Html.pm (@4122..) av.h (@4123..) t/pragma/locale.t (@4130..) pod/perldata.pod (@4131..) pod/perllexwarn.pod (@4132..) ext/B/typemap ext/DB_File/DB_File.pm lib/ExtUtils/typemap (@4142..) ext/B/Makefile.PL t/lib/bigfltpm.t (@4149..) lib/ExtUtils/MM_VMS.pm vms/descrip_mms.template (@4182..) ext/DynaLoader/dl_vmesa.xs ext/DynaLoader/dl_vms.xs lib/unicode/ReadMe.txt pod/perlsyn.pod t/op/groups.t (@4184..) t/pragma/warn/op (@4189..) thrdvar.h (@4197..) ext/B/B/Terse.pm (@4199..) t/lib/posix.t (@4223..) keywords.h keywords.pl pod/perlfaq3.pod pod/perlsub.pod t/pragma/strict-vars (@4227..) pod/perlfaq9.pod (@4228..) djgpp/configure.bat lib/Exporter/Heavy.pm (@4242..) Porting/findvars lib/ExtUtils/xsubpp pod/perlguts.pod t/lib/filecopy.t (@4271..) ext/attrs/attrs.pm (@4278..) t/op/avhv.t (@4279..) lib/Pod/Checker.pm lib/Pod/InputObjects.pm t/pod/testp2pt.pl (@4280..) lib/Pod/Usage.pm pod/podchecker.PL t/pod/poderrs.t t/pod/poderrs.xr t/pod/testpchk.pl (@4281..) lib/Pod/Text.pm pod/pod2man.PL (@4282..) ext/Devel/Peek/Peek.xs ext/DynaLoader/dl_beos.xs ext/DynaLoader/dl_dld.xs ext/DynaLoader/dl_mpeix.xs ext/DynaLoader/dlutils.c perlio.c (@4302..) ext/B/defsubs_h.PL t/pragma/constant.t (@4303..) ext/Thread/Thread.xs (@4316..) ext/Thread/Thread.pm (@4328..) lib/Exporter.pm (@4331..) ext/DynaLoader/dl_aix.xs (@4336..) pod/Makefile pod/roffitall (@4340..) lib/lib.pm (@4343..) pod/perlref.pod (@4345..) perly.y perly_c.diff (@4350..) t/lib/safe2.t (@4353..) hints/svr5.sh (@4377..) pod/perlfaq2.pod (@4383..) lib/Benchmark.pm (@4384..) win32/include/dirent.h (@4385..) pod/perlopentut.pod (@4390..) hints/os2.sh os2/Makefile.SHs (@4393..) lib/Pod/Parser.pm lib/Pod/Select.pm (@4400..) malloc.c (@4402..) pod/perlmodlib.pod (@4404..) perlvars.h (@4409..) t/op/sort.t (@4418..) t/op/int.t (@4430..) os2/OS2/REXX/REXX.pm t/io/fs.t t/op/magic.t (@4432..) lib/File/Path.pm (@4433..) t/op/lex_assign.t (@4436..) lib/attributes.pm (@4437..) pod/perlop.pod (@4438..) ext/POSIX/POSIX.xs (@4448..) Policy_sh.SH ext/Data/Dumper/Dumper.xs hints/dec_osf.sh t/lib/charnames.t (@4475..) lib/Time/Local.pm (@4481..) cv.h ext/B/B/Xref.pm (@4485..) doop.c handy.h hints/irix_6.sh pp.h taint.c (@4496..) deb.c (@4505..) dosish.h os2/os2ish.h perly.c vms/perly_c.vms (@4511..) ext/B/B/Lint.pm pod/perlmod.pod pod/perlrun.pod (@4515..) bytecode.pl ext/B/B.pm ext/B/B/Asmdata.pm ext/B/B/CC.pm ext/B/B/Debug.pm ext/B/B/Deparse.pm ext/ByteLoader/bytecode.h ext/ByteLoader/byterun.c ext/ByteLoader/byterun.h ext/Devel/Peek/Peek.pm gv.h (@4545..) README.epoc epoc/epocish.h ext/B/B.xs ext/Fcntl/Fcntl.pm hints/hpux.sh t/lib/syslfs.t t/op/lfs.t t/op/pat.t (@4556..) ext/DynaLoader/DynaLoader_pm.PL hints/solaris_2.sh lib/unicode/Is/SylA.pl lib/unicode/Is/SylC.pl lib/unicode/Is/SylE.pl lib/unicode/Is/SylI.pl lib/unicode/Is/SylO.pl lib/unicode/Is/SylU.pl lib/unicode/Is/SylV.pl lib/unicode/Is/SylWA.pl lib/unicode/Is/SylWC.pl lib/unicode/Is/SylWE.pl lib/unicode/Is/SylWI.pl lib/unicode/Is/SylWV.pl lib/unicode/mktables.PL t/op/pack.t t/op/regexp.t utils/h2xs.PL utils/perldoc.PL vms/vms.c vms/vmsish.h win32/win32iop.h (@4573..) t/lib/dumper.t t/pragma/overload.t (@4574..) ext/Errno/Errno_pm.PL ext/IO/lib/IO/Socket.pm (@4575..) t/op/misc.t (@4578..) ext/Opcode/Opcode.xs (@4579..) cop.h (@4588..) lib/perl5db.pl (@4601..) XSUB.h globals.c pod/perl.pod run.c scope.c (@4602..) op.h win32/perllib.c (@4603..) AUTHORS pod/perlport.pod t/op/runlevel.t (@4604..) scope.h (@4605..) README.vms hints/aix.sh vms/subconfigure.com (@4606..) pod/perlxstut.pod (@4620..) regcomp.h (@4622..) ext/Devel/DProf/DProf.pm ext/DynaLoader/Makefile.PL (@4623..) pod/perltrap.pod (@4630..) ext/B/B/Bytecode.pm (@4631..) opcode.h opcode.pl t/pragma/warn/4lint t/pragma/warn/doio t/pragma/warn/pp_hot (@4641..) mg.c (@4658..) iperlsys.h (@4660..) ext/B/B/C.pm (@4662..) pod/perlre.pod (@4666..) embedvar.h (@4668..) t/lib/filefind.t (@4671..) intrpvar.h (@4672..) ext/DynaLoader/dl_hpux.xs ext/DynaLoader/dl_next.xs ext/DynaLoader/dl_rhapsody.xs (@4686..) lib/File/Find.pm (@4687..) cygwin/Makefile.SHs (@4688..) t/op/re_tests (@4693..) hv.c (@4694..) t/op/delete.t (@4695..) utf8.c (@4698..) thread.h (@4704..) pod/perldiag.pod pp_sys.c (@4709..) Makefile.SH (@4712..) hints/cygwin.sh t/op/stat.t (@4717..) README.os2 lib/ExtUtils/Install.pm (@4720..) t/pragma/warn/doop t/pragma/warn/pp t/pragma/warn/regcomp t/pragma/warn/sv t/pragma/warn/toke t/pragma/warn/utf8 (@4721..) lib/diagnostics.pm (@4722..) regcomp.c (@4724..) configpm pp_ctl.c sv.h (@4726..) global.sym (@4727..) INTERN.h README.win32 lib/ExtUtils/MM_Win32.pm makedef.pl (@4729..) t/io/argv.t (@4732..) doio.c pp_hot.c (@4736..) toke.c (@4740..) gv.c (@4742..) win32/win32.h (@4743..) ext/Devel/DProf/DProf.xs objXSUB.h (@4744..) ext/Data/Dumper/Dumper.pm (@4745..) embed.h embed.pl ext/DynaLoader/dl_dlopen.xs proto.h (@4746..) pp.c (@4747..) sv.c (@4749..) lib/ExtUtils/MM_Unix.pm lib/ExtUtils/MakeMaker.pm (@4754..) util.c utils/perlcc.PL (@4755..) t/io/open.t (@4757..) regexec.c (@4759..) MANIFEST installman (@4769..) Porting/Glossary (@4771..) t/lib/thread.t (@4772..) Changes INSTALL Porting/config.sh Porting/config_H config_h.SH installperl patchlevel.h win32/config.bc win32/config.gc win32/config.vc win32/config_sh.PL win32/win32.c (@4773..) win32/Makefile win32/config_H.bc win32/config_H.gc win32/config_H.vc win32/makefile.mk (@4774..) perl.c (@4779..) t/op/fork.t (@4791..) av.c pod/perldelta.pod (@4796..) pod/perlfunc.pod (@4799..) dump.c (@4800..) op.c (@4801..) perl.h (@4805..) Configure (@4814..) 'merge in' configure.com (@4767..)
Diffstat (limited to 'lib')
-rw-r--r--lib/AutoLoader.pm2
-rw-r--r--lib/Benchmark.pm404
-rw-r--r--lib/CPAN.pm3
-rw-r--r--lib/CPAN/FirstTime.pm3
-rw-r--r--lib/Cwd.pm2
-rw-r--r--lib/DB.pm2
-rw-r--r--lib/Exporter.pm18
-rw-r--r--lib/Exporter/Heavy.pm3
-rw-r--r--lib/ExtUtils/Embed.pm6
-rw-r--r--lib/ExtUtils/Install.pm9
-rw-r--r--lib/ExtUtils/Installed.pm2
-rw-r--r--lib/ExtUtils/Liblist.pm6
-rw-r--r--lib/ExtUtils/MM_Unix.pm69
-rw-r--r--lib/ExtUtils/MM_VMS.pm7
-rw-r--r--lib/ExtUtils/MM_Win32.pm61
-rw-r--r--lib/ExtUtils/MakeMaker.pm96
-rw-r--r--lib/ExtUtils/Manifest.pm1
-rw-r--r--lib/ExtUtils/Mkbootstrap.pm4
-rw-r--r--lib/ExtUtils/Mksymlists.pm2
-rw-r--r--lib/ExtUtils/typemap2
-rwxr-xr-xlib/ExtUtils/xsubpp134
-rw-r--r--lib/File/Copy.pm23
-rw-r--r--lib/File/DosGlob.pm2
-rw-r--r--lib/File/Find.pm726
-rw-r--r--lib/File/Path.pm19
-rw-r--r--lib/File/Spec.pm5
-rw-r--r--lib/File/Spec/Unix.pm2
-rw-r--r--lib/FindBin.pm2
-rw-r--r--lib/Getopt/Std.pm2
-rw-r--r--lib/Math/BigFloat.pm11
-rw-r--r--lib/Math/Complex.pm2
-rw-r--r--lib/Math/Trig.pm2
-rw-r--r--lib/Net/Ping.pm2
-rw-r--r--lib/Pod/Checker.pm795
-rw-r--r--lib/Pod/Html.pm2
-rw-r--r--lib/Pod/InputObjects.pm46
-rw-r--r--lib/Pod/Man.pm1194
-rw-r--r--lib/Pod/Parser.pm392
-rw-r--r--lib/Pod/PlainText.pm650
-rw-r--r--lib/Pod/Select.pm9
-rw-r--r--lib/Pod/Text.pm214
-rw-r--r--lib/Pod/Text/Color.pm21
-rw-r--r--lib/Pod/Text/Termcap.pm19
-rw-r--r--lib/Pod/Usage.pm20
-rw-r--r--lib/Sys/Hostname.pm4
-rw-r--r--lib/Text/Tabs.pm6
-rw-r--r--lib/Tie/Array.pm37
-rw-r--r--lib/Tie/Hash.pm2
-rw-r--r--lib/Time/Local.pm53
-rw-r--r--lib/attributes.pm47
-rw-r--r--lib/bigfloat.pl7
-rw-r--r--lib/byte.pm33
-rw-r--r--lib/byte_heavy.pl8
-rw-r--r--lib/constant.pm201
-rwxr-xr-xlib/diagnostics.pm21
-rw-r--r--lib/lib.pm49
-rw-r--r--lib/perl5db.pl70
-rw-r--r--lib/strict.pm3
-rw-r--r--lib/unicode/Eq/Latin1.pl (renamed from lib/unicode/Eq/Latin1)5
-rw-r--r--lib/unicode/Eq/Unicode.pl (renamed from lib/unicode/Eq/Unicode)5
-rw-r--r--lib/unicode/In/BopomofoExtended.pl6
-rw-r--r--lib/unicode/In/BraillePatterns.pl6
-rw-r--r--lib/unicode/In/CJKRadicalsSupplement.pl6
-rw-r--r--lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl6
-rw-r--r--lib/unicode/In/Cherokee.pl6
-rw-r--r--lib/unicode/In/IdeographicDescriptionCharacters.pl6
-rw-r--r--lib/unicode/In/KangxiRadicals.pl6
-rw-r--r--lib/unicode/In/Khmer.pl6
-rw-r--r--lib/unicode/In/Mongolian.pl6
-rw-r--r--lib/unicode/In/Myanmar.pl6
-rw-r--r--lib/unicode/In/Ogham.pl6
-rw-r--r--lib/unicode/In/Runic.pl6
-rw-r--r--lib/unicode/In/Sinhala.pl6
-rw-r--r--lib/unicode/In/Syriac.pl6
-rw-r--r--lib/unicode/In/Thaana.pl6
-rw-r--r--lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl6
-rw-r--r--lib/unicode/In/YiRadicals.pl6
-rw-r--r--lib/unicode/In/YiSyllables.pl6
-rw-r--r--lib/unicode/Is/SylA.pl3
-rw-r--r--lib/unicode/Is/SylC.pl3
-rw-r--r--lib/unicode/Is/SylE.pl3
-rw-r--r--lib/unicode/Is/SylI.pl3
-rw-r--r--lib/unicode/Is/SylO.pl3
-rw-r--r--lib/unicode/Is/SylU.pl3
-rw-r--r--lib/unicode/Is/SylV.pl3
-rw-r--r--lib/unicode/Is/SylWA.pl3
-rw-r--r--lib/unicode/Is/SylWC.pl3
-rw-r--r--lib/unicode/Is/SylWE.pl3
-rw-r--r--lib/unicode/Is/SylWI.pl3
-rw-r--r--lib/unicode/Is/SylWV.pl3
-rw-r--r--lib/unicode/Jamo.txt (renamed from lib/unicode/Jamo-2.txt)0
-rw-r--r--lib/unicode/NamesList.html226
-rw-r--r--lib/unicode/ReadMe.txt33
-rw-r--r--lib/unicode/UCD300.html (renamed from lib/unicode/Unicode.html)0
-rw-r--r--lib/unicode/Unicode.300 (renamed from lib/unicode/UnicodeData-Latest.txt)0
-rw-r--r--lib/unicode/Unicode3.html1988
-rwxr-xr-xlib/unicode/mktables.PL49
-rw-r--r--lib/vars.pm6
-rw-r--r--lib/warning.pm163
99 files changed, 6609 insertions, 1547 deletions
diff --git a/lib/AutoLoader.pm b/lib/AutoLoader.pm
index 8e15c1f60c..4bbcb33e10 100644
--- a/lib/AutoLoader.pm
+++ b/lib/AutoLoader.pm
@@ -11,7 +11,7 @@ BEGIN {
@EXPORT_OK = @EXPORT_OK = qw(AUTOLOAD);
$is_dosish = $^O eq 'dos' || $^O eq 'os2' || $^O eq 'MSWin32';
$is_vms = $^O eq 'VMS';
- $VERSION = $VERSION = '5.57';
+ $VERSION = '5.57';
}
AUTOLOAD {
diff --git a/lib/Benchmark.pm b/lib/Benchmark.pm
index 767cb67d13..487ddd5717 100644
--- a/lib/Benchmark.pm
+++ b/lib/Benchmark.pm
@@ -2,13 +2,7 @@ package Benchmark;
=head1 NAME
-Benchmark - benchmark running times of code
-
-timethis - run a chunk of code several times
-
-timethese - run several chunks of code several times
-
-timeit - run a chunk of code and see how long it goes
+Benchmark - benchmark running times of Perl code
=head1 SYNOPSIS
@@ -26,14 +20,50 @@ timeit - run a chunk of code and see how long it goes
'Name2' => sub { ...code2... },
});
+ # cmpthese can be used both ways as well
+ cmpthese($count, {
+ 'Name1' => '...code1...',
+ 'Name2' => '...code2...',
+ });
+
+ cmpthese($count, {
+ 'Name1' => sub { ...code1... },
+ 'Name2' => sub { ...code2... },
+ });
+
+ # ...or in two stages
+ $results = timethese($count,
+ {
+ 'Name1' => sub { ...code1... },
+ 'Name2' => sub { ...code2... },
+ },
+ 'none'
+ );
+ cmpthese( $results ) ;
+
$t = timeit($count, '...other code...')
print "$count loops of other code took:",timestr($t),"\n";
+ $t = countit($time, '...other code...')
+ $count = $t->iters ;
+ print "$count loops of other code took:",timestr($t),"\n";
+
=head1 DESCRIPTION
The Benchmark module encapsulates a number of routines to help you
figure out how long it takes to execute some code.
+timethis - run a chunk of code several times
+
+timethese - run several chunks of code several times
+
+cmpthese - print results of timethese as a comparison chart
+
+timeit - run a chunk of code and see how long it goes
+
+countit - see how many times a chunk of code runs in a given time
+
+
=head2 Methods
=over 10
@@ -57,6 +87,10 @@ Enables or disable debugging by setting the C<$Benchmark::Debug> flag:
$t = timeit(10, ' 5 ** $Global ');
debug Benchmark 0;
+=item iters
+
+Returns the number of iterations.
+
=back
=head2 Standard Exports
@@ -119,28 +153,26 @@ The routines are called in string comparison order of KEY.
The COUNT can be zero or negative, see timethis().
+Returns a hash of Benchmark objects, keyed by name.
+
=item timediff ( T1, T2 )
Returns the difference between two Benchmark times as a Benchmark
object suitable for passing to timestr().
-=item timesum ( T1, T2 )
-
-Returns the sum of two Benchmark times as a Benchmark object suitable
-for passing to timestr().
-
=item timestr ( TIMEDIFF, [ STYLE, [ FORMAT ] ] )
Returns a string that formats the times in the TIMEDIFF object in
the requested STYLE. TIMEDIFF is expected to be a Benchmark object
similar to that returned by timediff().
-STYLE can be any of 'all', 'noc', 'nop' or 'auto'. 'all' shows each
-of the 5 times available ('wallclock' time, user time, system time,
+STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' shows
+each of the 5 times available ('wallclock' time, user time, system time,
user time of children, and system time of children). 'noc' shows all
except the two children times. 'nop' shows only wallclock and the
two children times. 'auto' (the default) will act as 'all' unless
the children times are both zero, in which case it acts as 'noc'.
+'none' prevents output.
FORMAT is the L<printf(3)>-style format specifier (without the
leading '%') to use to print the times. It defaults to '5.2f'.
@@ -162,6 +194,34 @@ Clear the cached time for COUNT rounds of the null loop.
Clear all cached times.
+=item cmpthese ( COUT, CODEHASHREF, [ STYLE ] )
+
+=item cmpthese ( RESULTSHASHREF )
+
+Optionally calls timethese(), then outputs comparison chart. This
+chart is sorted from slowest to fastest, and shows the percent
+speed difference between each pair of tests. Can also be passed
+the data structure that timethese() returns:
+
+ $results = timethese( .... );
+ cmpthese( $results );
+
+Returns the data structure returned by timethese() (or passed in).
+
+=item countit(TIME, CODE)
+
+Arguments: TIME is the minimum length of time to run CODE for, and CODE is
+the code to run. CODE may be either a code reference or a string to
+be eval'd; either way it will be run in the caller's package.
+
+TIME is I<not> negative. countit() will run the loop many times to
+calculate the speed of CODE before running it for TIME. The actual
+time run for will usually be greater than TIME due to system clock
+resolution, so it's best to look at the number of iterations divided
+by the times that you are concerned with, not just the iterations.
+
+Returns: a Benchmark object.
+
=item disablecache ( )
Disable caching of timings for the null loop. This will force Benchmark
@@ -173,6 +233,11 @@ Enable caching of timings for the null loop. The time taken for COUNT
rounds of the null loop will be calculated only once for each
different COUNT used.
+=item timesum ( T1, T2 )
+
+Returns the sum of two Benchmark times as a Benchmark object suitable
+for passing to timestr().
+
=back
=head1 NOTES
@@ -180,7 +245,7 @@ different COUNT used.
The data is stored as a list of values from the time and times
functions:
- ($real, $user, $system, $children_user, $children_system)
+ ($real, $user, $system, $children_user, $children_system, $iters)
in seconds for the whole loop (not divided by the number of rounds).
@@ -192,7 +257,7 @@ The time of the null loop (a loop with the same
number of rounds but empty loop body) is subtracted
from the time of the real loop.
-The null loop times are cached, the key being the
+The null loop times can be cached, the key being the
number of rounds. The caching can be controlled using
calls like these:
@@ -202,6 +267,38 @@ calls like these:
disablecache();
enablecache();
+Caching is off by default, as it can (usually slightly) decrease
+accuracy and does not usually noticably affect runtimes.
+
+=head1 EXAMPLES
+
+For example,
+
+ use Benchmark;$x=3;cmpthese(-5,{a=>sub{$x*$x},b=>sub{$x**2}})
+
+outputs something like this:
+
+ Benchmark: running a, b, each for at least 5 CPU seconds...
+ a: 10 wallclock secs ( 5.14 usr + 0.13 sys = 5.27 CPU) @ 3835055.60/s (n=20210743)
+ b: 5 wallclock secs ( 5.41 usr + 0.00 sys = 5.41 CPU) @ 1574944.92/s (n=8520452)
+ Rate b a
+ b 1574945/s -- -59%
+ a 3835056/s 144% --
+
+while
+
+ use Benchmark;
+ $x=3;
+ $r=timethese(-5,{a=>sub{$x*$x},b=>sub{$x**2}},'none');
+ cmpthese($r);
+
+outputs something like this:
+
+ Rate b a
+ b 1559428/s -- -62%
+ a 4152037/s 166% --
+
+
=head1 INHERITANCE
Benchmark inherits from no other class, except of course
@@ -210,7 +307,7 @@ for Exporter.
=head1 CAVEATS
Comparing eval'd strings with code references will give you
-inaccurate results: a code reference will show a slower
+inaccurate results: a code reference will show a slightly slower
execution time than the equivalent eval'd string.
The real time timing is done using time(2) and
@@ -226,6 +323,10 @@ The system time of the null loop might be slightly
more than the system time of the loop with the actual
code and therefore the difference might end up being E<lt> 0.
+=head1 SEE ALSO
+
+L<Devel::DProf> - a Perl code profiler
+
=head1 AUTHORS
Jarkko Hietaniemi <F<jhi@iki.fi>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>
@@ -241,6 +342,10 @@ documentation.
April 04-07th, 1997: by Jarkko Hietaniemi, added the run-for-some-time
functionality.
+September, 1999; by Barrie Slaymaker: math fixes and accuracy and
+efficiency tweaks. Added cmpthese(). A result is now returned from
+timethese(). Exposed countit() (was runfor()).
+
=cut
# evaluate something in a clean lexical environment
@@ -253,8 +358,11 @@ sub _doeval { eval shift }
use Carp;
use Exporter;
@ISA=(Exporter);
-@EXPORT=qw(timeit timethis timethese timediff timesum timestr);
-@EXPORT_OK=qw(clearcache clearallcache disablecache enablecache);
+@EXPORT=qw(timeit timethis timethese timediff timestr);
+@EXPORT_OK=qw(timesum cmpthese countit
+ clearcache clearallcache disablecache enablecache);
+
+$VERSION = 1.00;
&init;
@@ -290,6 +398,7 @@ sub cpu_p { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps ; }
sub cpu_c { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $cu+$cs ; }
sub cpu_a { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $pu+$ps+$cu+$cs ; }
sub real { my($r,$pu,$ps,$cu,$cs) = @{$_[0]}; $r ; }
+sub iters { $_[0]->[5] ; }
sub timediff {
my($a, $b) = @_;
@@ -364,15 +473,14 @@ sub runloop {
croak "runloop unable to compile '$c': $@\ncode: $subcode\n" if $@;
print STDERR "runloop $n '$subcode'\n" if $debug;
- # Wait for the user timer to tick. This makes the error range more like -0.01, +0. If
- # we don't wait, then it's more like -0.01, +0.01. This may not seem important, but it
- # significantly reduces the chances of getting too low initial $n in the initial, 'find
- # the minimum' loop in &runfor. This, in turn, can reduce the number of calls to
+ # Wait for the user timer to tick. This makes the error range more like
+ # -0.01, +0. If we don't wait, then it's more like -0.01, +0.01. This
+ # may not seem important, but it significantly reduces the chances of
+ # getting a too low initial $n in the initial, 'find the minimum' loop
+ # in &countit. This, in turn, can reduce the number of calls to
# &runloop a lot, and thus reduce additive errors.
my $tbase = Benchmark->new(0)->[1];
- do {
- $t0 = Benchmark->new(0);
- } while ( $t0->[1] == $tbase ) ;
+ while ( ( $t0 = Benchmark->new(0) )->[1] == $tbase ) {} ;
&$subref;
$t1 = Benchmark->new($n);
$td = &timediff($t1, $t0);
@@ -386,18 +494,20 @@ sub timeit {
my($wn, $wc, $wd);
printf STDERR "timeit $n $code\n" if $debug;
- my $cache_key = $n . ( ref( $code ) ? 'c' : 's' ) ;
+ my $cache_key = $n . ( ref( $code ) ? 'c' : 's' );
if ($cache && exists $cache{$cache_key} ) {
$wn = $cache{$cache_key};
} else {
$wn = &runloop($n, ref( $code ) ? sub { undef } : '' );
+ # Can't let our baseline have any iterations, or they get subtracted
+ # out of the result.
+ $wn->[5] = 0;
$cache{$cache_key} = $wn;
}
$wc = &runloop($n, $code);
$wd = timediff($wc, $wn);
-
timedebug("timeit: ",$wc);
timedebug(" - ",$wn);
timedebug(" = ",$wd);
@@ -409,8 +519,9 @@ sub timeit {
my $default_for = 3;
my $min_for = 0.1;
-sub runfor {
- my ($code, $tmax) = @_;
+
+sub countit {
+ my ( $tmax, $code ) = @_;
if ( not defined $tmax or $tmax == 0 ) {
$tmax = $default_for;
@@ -418,52 +529,61 @@ sub runfor {
$tmax = -$tmax;
}
- die "runfor(..., $tmax): timelimit cannot be less than $min_for.\n"
+ die "countit($tmax, ...): timelimit cannot be less than $min_for.\n"
if $tmax < $min_for;
- my ($n, $td, $tc, $ntot, $rtot, $utot, $stot, $cutot, $cstot );
+ my ($n, $tc);
# First find the minimum $n that gives a significant timing.
-
- my $nmin;
+ for ($n = 1; ; $n *= 2 ) {
+ my $td = timeit($n, $code);
+ $tc = $td->[1] + $td->[2];
+ last if $tc > 0.1;
+ }
- for ($n = 1, $tc = 0; ; $n *= 2 ) {
- $td = timeit($n, $code);
+ my $nmin = $n;
+
+ # Get $n high enough that we can guess the final $n with some accuracy.
+ my $tpra = 0.1 * $tmax; # Target/time practice.
+ while ( $tc < $tpra ) {
+ # The 5% fudge is to keep us from iterating again all
+ # that often (this speeds overall responsiveness when $tmax is big
+ # and we guess a little low). This does not noticably affect
+ # accuracy since we're not couting these times.
+ $n = int( $tpra * 1.05 * $n / $tc ); # Linear approximation.
+ my $td = timeit($n, $code);
$tc = $td->[1] + $td->[2];
- last if $tc > 0.1 ;
}
- $nmin = $n;
-
- my $ttot = 0;
- my $tpra = 0.05 * $tmax; # Target/time practice.
- # Double $n until we have think we have practiced enough.
- for ( ; $ttot < $tpra; $n *= 2 ) {
- $td = timeit($n, $code);
- $ntot += $n;
- $rtot += $td->[0];
- $utot += $td->[1];
- $stot += $td->[2];
- $ttot = $utot + $stot;
+ # Now, do the 'for real' timing(s), repeating until we exceed
+ # the max.
+ my $ntot = 0;
+ my $rtot = 0;
+ my $utot = 0.0;
+ my $stot = 0.0;
+ my $cutot = 0.0;
+ my $cstot = 0.0;
+ my $ttot = 0.0;
+
+ # The 5% fudge is because $n is often a few % low even for routines
+ # with stable times and avoiding extra timeit()s is nice for
+ # accuracy's sake.
+ $n = int( $n * ( 1.05 * $tmax / $tc ) );
+
+ while () {
+ my $td = timeit($n, $code);
+ $ntot += $n;
+ $rtot += $td->[0];
+ $utot += $td->[1];
+ $stot += $td->[2];
$cutot += $td->[3];
$cstot += $td->[4];
- }
-
- my $r;
+ $ttot = $utot + $stot;
+ last if $ttot >= $tmax;
- # Then iterate towards the $tmax.
- while ( $ttot < $tmax ) {
- $r = $tmax / $ttot - 1; # Linear approximation.
+ my $r = $tmax / $ttot - 1; # Linear approximation.
$n = int( $r * $ntot );
$n = $nmin if $n < $nmin;
- $td = timeit($n, $code);
- $ntot += $n;
- $rtot += $td->[0];
- $utot += $td->[1];
- $stot += $td->[2];
- $ttot = $utot + $stot;
- $cutot += $td->[3];
- $cstot += $td->[4];
}
return bless [ $rtot, $utot, $stot, $cutot, $cstot, $ntot ];
@@ -486,14 +606,14 @@ sub timethis{
$title = "timethis $n" unless defined $title;
} else {
$fort = n_to_for( $n );
- $t = runfor($code, $fort);
+ $t = countit( $fort, $code );
$title = "timethis for $fort" unless defined $title;
$forn = $t->[-1];
}
local $| = 1;
$style = "" unless defined $style;
- printf("%10s: ", $title);
- print timestr($t, $style, $defaultfmt),"\n";
+ printf("%10s: ", $title) unless $style eq 'none';
+ print timestr($t, $style, $defaultfmt),"\n" unless $style eq 'none';
$n = $forn if defined $forn;
@@ -513,25 +633,163 @@ sub timethese{
unless ref $alt eq HASH;
my @names = sort keys %$alt;
$style = "" unless defined $style;
- print "Benchmark: ";
+ print "Benchmark: " unless $style eq 'none';
if ( $n > 0 ) {
croak "non-integer loopcount $n, stopped" if int($n)<$n;
- print "timing $n iterations of";
+ print "timing $n iterations of" unless $style eq 'none';
} else {
- print "running";
+ print "running" unless $style eq 'none';
}
- print " ", join(', ',@names);
+ print " ", join(', ',@names) unless $style eq 'none';
unless ( $n > 0 ) {
my $for = n_to_for( $n );
- print ", each for at least $for CPU seconds";
+ print ", each for at least $for CPU seconds" unless $style eq 'none';
}
- print "...\n";
+ print "...\n" unless $style eq 'none';
# we could save the results in an array and produce a summary here
# sum, min, max, avg etc etc
+ my %results;
foreach my $name (@names) {
- timethis ($n, $alt -> {$name}, $name, $style);
+ $results{$name} = timethis ($n, $alt -> {$name}, $name, $style);
}
+
+ return \%results;
}
+sub cmpthese{
+ my $results = ref $_[0] ? $_[0] : timethese( @_ );
+
+ return $results
+ if defined $_[2] && $_[2] eq 'none';
+
+ # Flatten in to an array of arrays with the name as the first field
+ my @vals = map{ [ $_, @{$results->{$_}} ] } keys %$results;
+
+ for (@vals) {
+ # The epsilon fudge here is to prevent div by 0. Since clock
+ # resolutions are much larger, it's below the noise floor.
+ my $rate = $_->[6] / ( $_->[2] + $_->[3] + 0.000000000000001 );
+ $_->[7] = $rate;
+ }
+
+ # Sort by rate
+ @vals = sort { $a->[7] <=> $b->[7] } @vals;
+
+ # If more than half of the rates are greater than one...
+ my $display_as_rate = $vals[$#vals>>1]->[7] > 1;
+
+ my @rows;
+ my @col_widths;
+
+ my @top_row = (
+ '',
+ $display_as_rate ? 'Rate' : 's/iter',
+ map { $_->[0] } @vals
+ );
+
+ push @rows, \@top_row;
+ @col_widths = map { length( $_ ) } @top_row;
+
+ # Build the data rows
+ # We leave the last column in even though it never has any data. Perhaps
+ # it should go away. Also, perhaps a style for a single column of
+ # percentages might be nice.
+ for my $row_val ( @vals ) {
+ my @row;
+
+ # Column 0 = test name
+ push @row, $row_val->[0];
+ $col_widths[0] = length( $row_val->[0] )
+ if length( $row_val->[0] ) > $col_widths[0];
+
+ # Column 1 = performance
+ my $row_rate = $row_val->[7];
+
+ # We assume that we'll never get a 0 rate.
+ my $a = $display_as_rate ? $row_rate : 1 / $row_rate;
+
+ # Only give a few decimal places before switching to sci. notation,
+ # since the results aren't usually that accurate anyway.
+ my $format =
+ $a >= 100 ?
+ "%0.0f" :
+ $a >= 10 ?
+ "%0.1f" :
+ $a >= 1 ?
+ "%0.2f" :
+ $a >= 0.1 ?
+ "%0.3f" :
+ "%0.2e";
+
+ $format .= "/s"
+ if $display_as_rate;
+ # Using $b here due to optimizing bug in _58 through _61
+ my $b = sprintf( $format, $a );
+ push @row, $b;
+ $col_widths[1] = length( $b )
+ if length( $b ) > $col_widths[1];
+
+ # Columns 2..N = performance ratios
+ my $skip_rest = 0;
+ for ( my $col_num = 0 ; $col_num < @vals ; ++$col_num ) {
+ my $col_val = $vals[$col_num];
+ my $out;
+ if ( $skip_rest ) {
+ $out = '';
+ }
+ elsif ( $col_val->[0] eq $row_val->[0] ) {
+ $out = "--";
+ # $skip_rest = 1;
+ }
+ else {
+ my $col_rate = $col_val->[7];
+ $out = sprintf( "%.0f%%", 100*$row_rate/$col_rate - 100 );
+ }
+ push @row, $out;
+ $col_widths[$col_num+2] = length( $out )
+ if length( $out ) > $col_widths[$col_num+2];
+
+ # A little wierdness to set the first column width properly
+ $col_widths[$col_num+2] = length( $col_val->[0] )
+ if length( $col_val->[0] ) > $col_widths[$col_num+2];
+ }
+ push @rows, \@row;
+ }
+
+ # Equalize column widths in the chart as much as possible without
+ # exceeding 80 characters. This does not use or affect cols 0 or 1.
+ my @sorted_width_refs =
+ sort { $$a <=> $$b } map { \$_ } @col_widths[2..$#col_widths];
+ my $max_width = ${$sorted_width_refs[-1]};
+
+ my $total = @col_widths - 1 ;
+ for ( @col_widths ) { $total += $_ }
+
+ STRETCHER:
+ while ( $total < 80 ) {
+ my $min_width = ${$sorted_width_refs[0]};
+ last
+ if $min_width == $max_width;
+ for ( @sorted_width_refs ) {
+ last
+ if $$_ > $min_width;
+ ++$$_;
+ ++$total;
+ last STRETCHER
+ if $total >= 80;
+ }
+ }
+
+ # Dump the output
+ my $format = join( ' ', map { "%${_}s" } @col_widths ) . "\n";
+ substr( $format, 1, 0 ) = '-';
+ for ( @rows ) {
+ printf $format, @$_;
+ }
+
+ return $results;
+}
+
+
1;
diff --git a/lib/CPAN.pm b/lib/CPAN.pm
index 432e72da05..2f22b773c7 100644
--- a/lib/CPAN.pm
+++ b/lib/CPAN.pm
@@ -3325,7 +3325,8 @@ sub perl {
$perl ||= $candidate if MM->maybe_command($candidate);
unless ($perl) {
my ($component,$perl_name);
- DIST_PERLNAME: foreach $perl_name ($^X, 'perl', 'perl5', "perl$]") {
+ DIST_PERLNAME:
+ foreach $perl_name ($^X, 'perl', 'perl5', "perl$Config::Config{version}") {
PATH_COMPONENT: foreach $component (MM->path(),
$Config::Config{'binexp'}) {
next unless defined($component) && $component;
diff --git a/lib/CPAN/FirstTime.pm b/lib/CPAN/FirstTime.pm
index 731d3ff2e4..289984956c 100644
--- a/lib/CPAN/FirstTime.pm
+++ b/lib/CPAN/FirstTime.pm
@@ -78,7 +78,8 @@ dialog anytime later by typing 'o conf init' at the cpan prompt.)
} else {
$fastread = 1;
$CPAN::Config->{urllist} ||= [];
- *prompt = sub {
+ # prototype should match that of &MakeMaker::prompt
+ *prompt = sub ($;$) {
my($q,$a) = @_;
my($ret) = defined $a ? $a : "";
printf qq{%s [%s]\n\n}, $q, $ret;
diff --git a/lib/Cwd.pm b/lib/Cwd.pm
index 8a99da975a..ee1bc28367 100644
--- a/lib/Cwd.pm
+++ b/lib/Cwd.pm
@@ -372,7 +372,7 @@ sub _qnx_abs_path {
*abs_path = \&_qnx_abs_path;
*fast_abs_path = \&_qnx_abs_path;
}
- elsif ($^O =~ /cygwin/) {
+ elsif ($^O eq 'cygwin') {
*getcwd = \&cwd;
*fastgetcwd = \&cwd;
*fastcwd = \&cwd;
diff --git a/lib/DB.pm b/lib/DB.pm
index 1395c81b5a..2575423593 100644
--- a/lib/DB.pm
+++ b/lib/DB.pm
@@ -794,7 +794,7 @@ highly experimental and subject to change.
=head1 AUTHOR
-Gurusamy Sarathy gsar@umich.edu
+Gurusamy Sarathy gsar@activestate.com
This code heavily adapted from an early version of perl5db.pl attributable
to Larry Wall and the Perl Porters.
diff --git a/lib/Exporter.pm b/lib/Exporter.pm
index bc07e9b2be..585109e7d0 100644
--- a/lib/Exporter.pm
+++ b/lib/Exporter.pm
@@ -4,6 +4,7 @@ require 5.001;
$ExportLevel = 0;
$Verbose ||= 0;
+$VERSION = '5.562';
sub export_to_level {
require Exporter::Heavy;
@@ -118,6 +119,18 @@ in L<perlfunc> and L<perlmod>. Understanding the concept of
modules and how the C<use> statement operates is important to
understanding the Exporter.
+=head2 How to Export
+
+The arrays C<@EXPORT> and C<@EXPORT_OK> in a module hold lists of
+symbols that are going to be exported into the users name space by
+default, or which they can request to be exported, respectively. The
+symbols can represent functions, scalars, arrays, hashes, or typeglobs.
+The symbols must be given by full name with the exception that the
+ampersand in front of a function is optional, e.g.
+
+ @EXPORT = qw(afunc $scalar @array); # afunc is a function
+ @EXPORT_OK = qw(&bfunc %hash *typeglob); # explicit prefix on &bfunc
+
=head2 Selecting What To Export
Do B<not> export method names!
@@ -196,11 +209,12 @@ Exporter has a special method, 'export_to_level' which is used in situations
where you can't directly call Export's import method. The export_to_level
method looks like:
-MyPackage->export_to_level($where_to_export, @what_to_export);
+MyPackage->export_to_level($where_to_export, $package, @what_to_export);
where $where_to_export is an integer telling how far up the calling stack
to export your symbols, and @what_to_export is an array telling what
-symbols *to* export (usually this is @_).
+symbols *to* export (usually this is @_). The $package argument is
+currently unused.
For example, suppose that you have a module, A, which already has an
import function:
diff --git a/lib/Exporter/Heavy.pm b/lib/Exporter/Heavy.pm
index 95ffc554be..1f9b432514 100644
--- a/lib/Exporter/Heavy.pm
+++ b/lib/Exporter/Heavy.pm
@@ -213,7 +213,8 @@ sub require_version {
my $version = ${"${pkg}::VERSION"};
if (!$version or $version < $wanted) {
$version ||= "(undef)";
- my $file = $INC{"$pkg.pm"};
+ # %INC contains slashes, but $pkg contains double-colons.
+ my $file = (map {s,::,/,g; $INC{$_}} "$pkg.pm")[0];
$file &&= " ($file)";
require Carp;
Carp::croak("$pkg $wanted required--this is only version $version$file")
diff --git a/lib/ExtUtils/Embed.pm b/lib/ExtUtils/Embed.pm
index e0ea0685f0..b649b6b77b 100644
--- a/lib/ExtUtils/Embed.pm
+++ b/lib/ExtUtils/Embed.pm
@@ -332,7 +332,7 @@ B<[@modules]> is an array ref, same as additional arguments mentioned above.
This will generate code with an B<xs_init> function that glues the perl B<Socket::bootstrap> function
-to the C B<boot_Socket> function and writes it to a file named "xsinit.c".
+to the C B<boot_Socket> function and writes it to a file named F<xsinit.c>.
Note that B<DynaLoader> is a special case where it must call B<boot_DynaLoader> directly.
@@ -378,7 +378,7 @@ we should find B<auto/Socket/Socket.a>
When looking for B<DBD::Oracle> relative to a search path,
we should find B<auto/DBD/Oracle/Oracle.a>
-Keep in mind, you can always supply B</my/own/path/ModuleName.a>
+Keep in mind that you can always supply B</my/own/path/ModuleName.a>
as an additional linker argument.
B<--> E<lt>list of linker argsE<gt>
@@ -392,7 +392,7 @@ When invoked with parameters the following are accepted and optional:
C<ldopts($std,[@modules],[@link_args],$path)>
-Where,
+Where:
B<$std> is boolean, equivalent to the B<-std> option.
diff --git a/lib/ExtUtils/Install.pm b/lib/ExtUtils/Install.pm
index 47bde0deb0..d6b1375fb6 100644
--- a/lib/ExtUtils/Install.pm
+++ b/lib/ExtUtils/Install.pm
@@ -67,7 +67,6 @@ sub install {
}
$packlist->read($pack{"read"}) if (-f $pack{"read"});
my $cwd = cwd();
- my $umask = umask 0 unless $Is_VMS;
my($source);
MOD_INSTALL: foreach $source (sort keys %hash) {
@@ -140,7 +139,6 @@ sub install {
print "Writing $pack{'write'}\n";
$packlist->write($pack{'write'});
}
- umask $umask unless $Is_VMS;
}
sub directory_not_empty ($) {
@@ -193,7 +191,6 @@ sub uninstall {
forceunlink($_) unless $nonono;
}
print "unlink $fil\n" if $verbose;
- close P;
forceunlink($fil) unless $nonono;
}
@@ -259,7 +256,6 @@ sub pm_to_blib {
close(FROMTO);
}
- my $umask = umask 0022 unless $Is_VMS;
mkpath($autodir,0,0755);
foreach (keys %$fromto) {
next if -f $fromto->{$_} && -M $fromto->{$_} < -M $_;
@@ -280,7 +276,6 @@ sub pm_to_blib {
next unless /\.pm$/;
autosplit($fromto->{$_},$autodir);
}
- umask $umask unless $Is_VMS;
}
package ExtUtils::Install::Warn;
@@ -343,7 +338,7 @@ There are two keys with a special meaning in the hash: "read" and
target files to the file named by C<$hashref-E<gt>{write}>. If there is
another file named by C<$hashref-E<gt>{read}>, the contents of this file will
be merged into the written file. The read and the written file may be
-identical, but on AFS it is quite likely, people are installing to a
+identical, but on AFS it is quite likely that people are installing to a
different directory than the one where the files later appear.
install_default() takes one or less arguments. If no arguments are
@@ -356,7 +351,7 @@ The argument-less form is convenient for install scripts like
perl -MExtUtils::Install -e install_default Tk/Canvas
-Assuming this command is executed in a directory with populated F<blib>
+Assuming this command is executed in a directory with a populated F<blib>
directory, it will proceed as if the F<blib> was build by MakeMaker on
this machine. This is useful for binary distributions.
diff --git a/lib/ExtUtils/Installed.pm b/lib/ExtUtils/Installed.pm
index dda594e784..41f3c9b3b8 100644
--- a/lib/ExtUtils/Installed.pm
+++ b/lib/ExtUtils/Installed.pm
@@ -56,7 +56,7 @@ my $self = {};
# Read the core packlist
$self->{Perl}{packlist} =
ExtUtils::Packlist->new("$Config{installarchlib}/.packlist");
-$self->{Perl}{version} = $];
+$self->{Perl}{version} = $Config{version};
# Read the module packlists
my $sub = sub
diff --git a/lib/ExtUtils/Liblist.pm b/lib/ExtUtils/Liblist.pm
index 13e4e29e88..b992ec0116 100644
--- a/lib/ExtUtils/Liblist.pm
+++ b/lib/ExtUtils/Liblist.pm
@@ -540,7 +540,7 @@ below.
=head2 EXTRALIBS
List of libraries that need to be linked with when linking a perl
-binary which includes this extension Only those libraries that
+binary which includes this extension. Only those libraries that
actually exist are included. These are written to a file and used
when linking perl.
@@ -562,7 +562,7 @@ object file. This list is used to create a .bs (bootstrap) file.
=head1 PORTABILITY
This module deals with a lot of system dependencies and has quite a
-few architecture specific B<if>s in the code.
+few architecture specific C<if>s in the code.
=head2 VMS implementation
@@ -682,7 +682,7 @@ enable searching for default libraries specified by C<$Config{libs}>.
The libraries specified may be a mixture of static libraries and
import libraries (to link with DLLs). Since both kinds are used
-pretty transparently on the win32 platform, we do not attempt to
+pretty transparently on the Win32 platform, we do not attempt to
distinguish between them.
=item *
diff --git a/lib/ExtUtils/MM_Unix.pm b/lib/ExtUtils/MM_Unix.pm
index 88240764d4..f4329e13d7 100644
--- a/lib/ExtUtils/MM_Unix.pm
+++ b/lib/ExtUtils/MM_Unix.pm
@@ -377,10 +377,22 @@ sub cflags {
if ($Is_PERL_OBJECT) {
$self->{CCFLAGS} =~ s/-DPERL_OBJECT(\b|$)/-DPERL_CAPI/g;
- if ($Is_Win32 && $Config{'cc'} =~ /^cl/i) {
- # Turn off C++ mode of the MSC compiler
- $self->{CCFLAGS} =~ s/-TP(\s|$)//;
- $self->{OPTIMIZE} =~ s/-TP(\s|$)//;
+ if ($Is_Win32) {
+ if ($Config{'cc'} =~ /^cl/i) {
+ # Turn off C++ mode of the MSC compiler
+ $self->{CCFLAGS} =~ s/-TP(\s|$)//g;
+ $self->{OPTIMIZE} =~ s/-TP(\s|$)//g;
+ }
+ elsif ($Config{'cc'} =~ /^bcc32/i) {
+ # Turn off C++ mode of the Borland compiler
+ $self->{CCFLAGS} =~ s/-P(\s|$)//g;
+ $self->{OPTIMIZE} =~ s/-P(\s|$)//g;
+ }
+ elsif ($Config{'cc'} =~ /^gcc/i) {
+ # Turn off C++ mode of the GCC compiler
+ $self->{CCFLAGS} =~ s/-xc\+\+(\s|$)//g;
+ $self->{OPTIMIZE} =~ s/-xc\+\+(\s|$)//g;
+ }
}
}
@@ -425,7 +437,19 @@ clean ::
');
# clean subdirectories first
for $dir (@{$self->{DIR}}) {
- push @m, "\t-cd $dir && \$(TEST_F) $self->{MAKEFILE} && \$(MAKE) clean\n";
+ if ($Is_Win32 && Win32::IsWin95()) {
+ push @m, <<EOT;
+ cd $dir
+ \$(TEST_F) $self->{MAKEFILE}
+ \$(MAKE) clean
+ cd ..
+EOT
+ }
+ else {
+ push @m, <<EOT;
+ -cd $dir && \$(TEST_F) $self->{MAKEFILE} && \$(MAKE) clean
+EOT
+ }
}
my(@otherfiles) = values %{$self->{XS}}; # .c files from *.xs files
@@ -1980,7 +2004,8 @@ usually solves this kind of problem.
push @defpath, $component if defined $component;
}
$self->{PERL} ||=
- $self->find_perl(5.0, [ $self->canonpath($^X), 'miniperl','perl','perl5',"perl$]" ],
+ $self->find_perl(5.0, [ $self->canonpath($^X), 'miniperl',
+ 'perl','perl5',"perl$Config{version}" ],
\@defpath, $Verbose );
# don't check if perl is executable, maybe they have decided to
# supply switches with perl
@@ -2123,6 +2148,7 @@ pure_site_install ::
}.$self->catdir('$(PERL_ARCHLIB)','auto','$(FULLEXT)').q{
doc_perl_install ::
+ -}.$self->{NOECHO}.q{$(MKPATH) $(INSTALLARCHLIB)
-}.$self->{NOECHO}.q{$(DOC_INSTALL) \
"Module" "$(NAME)" \
"installed into" "$(INSTALLPRIVLIB)" \
@@ -2132,6 +2158,7 @@ doc_perl_install ::
>> }.$self->catfile('$(INSTALLARCHLIB)','perllocal.pod').q{
doc_site_install ::
+ -}.$self->{NOECHO}.q{$(MKPATH) $(INSTALLARCHLIB)
-}.$self->{NOECHO}.q{$(DOC_INSTALL) \
"Module" "$(NAME)" \
"installed into" "$(INSTALLSITELIB)" \
@@ -2498,6 +2525,7 @@ $tmp/perlmain.c: $makefilename}, q{
push @m, q{
doc_inst_perl:
}.$self->{NOECHO}.q{echo Appending installation info to $(INSTALLARCHLIB)/perllocal.pod
+ -}.$self->{NOECHO}.q{$(MKPATH) $(INSTALLARCHLIB)
-}.$self->{NOECHO}.q{$(DOC_INSTALL) \
"Perl binary" "$(MAP_TARGET)" \
MAP_STATIC "$(MAP_STATIC)" \
@@ -3071,7 +3099,9 @@ sub realclean {
realclean purge :: clean
');
# realclean subdirectories first (already cleaned)
- my $sub = "\t-cd %s && \$(TEST_F) %s && \$(MAKE) %s realclean\n";
+ my $sub = ($Is_Win32 && Win32::IsWin95()) ?
+ "\tcd %s\n\t\$(TEST_F) %s\n\t\$(MAKE) %s realclean\n\tcd ..\n" :
+ "\t-cd %s && \$(TEST_F) %s && \$(MAKE) %s realclean\n";
foreach(@{$self->{DIR}}){
push(@m, sprintf($sub,$_,"$self->{MAKEFILE}.old","-f $self->{MAKEFILE}.old"));
push(@m, sprintf($sub,$_,"$self->{MAKEFILE}",''));
@@ -3215,12 +3245,25 @@ Helper subroutine for subdirs
sub subdir_x {
my($self, $subdir) = @_;
my(@m);
- qq{
+ if ($Is_Win32 && Win32::IsWin95()) {
+ # XXX: dmake-specific, like rest of Win95 port
+ return <<EOT;
+subdirs ::
+@[
+ cd $subdir
+ \$(MAKE) all \$(PASTHRU)
+ cd ..
+]
+EOT
+ }
+ else {
+ return <<EOT;
subdirs ::
$self->{NOECHO}cd $subdir && \$(MAKE) all \$(PASTHRU)
-};
+EOT
+ }
}
=item subdirs (o)
@@ -3471,7 +3514,7 @@ sub tool_xsubpp {
XSUBPPDIR = $xsdir
XSUBPP = \$(XSUBPPDIR)/$xsubpp
XSPROTOARG = $self->{XSPROTOARG}
-XSUBPPDEPS = @tmdeps
+XSUBPPDEPS = @tmdeps \$(XSUBPP)
XSUBPPARGS = @tmargs
};
};
@@ -3569,12 +3612,6 @@ config :: $(INST_AUTODIR)/.exists
'.$self->{NOECHO}.'$(NOOP)
';
- push @m, qq{
-config :: Version_check
- $self->{NOECHO}\$(NOOP)
-
-} unless $self->{PARENT} or ($self->{PERL_SRC} && $self->{INSTALLDIRS} eq "perl") or $self->{NO_VC};
-
push @m, $self->dir_target(qw[$(INST_AUTODIR) $(INST_LIBDIR) $(INST_ARCHAUTODIR)]);
if (%{$self->{HTMLLIBPODS}}) {
diff --git a/lib/ExtUtils/MM_VMS.pm b/lib/ExtUtils/MM_VMS.pm
index 31ca69067e..f3de323e53 100644
--- a/lib/ExtUtils/MM_VMS.pm
+++ b/lib/ExtUtils/MM_VMS.pm
@@ -1106,13 +1106,6 @@ config :: $(INST_AUTODIR).exists
$(NOECHO) $(NOOP)
';
- push @m, q{
-config :: Version_check
- $(NOECHO) $(NOOP)
-
-} unless $self->{PARENT} or ($self->{PERL_SRC} && $self->{INSTALLDIRS} eq "perl") or $self->{NO_VC};
-
-
push @m, $self->dir_target(qw[$(INST_AUTODIR) $(INST_LIBDIR) $(INST_ARCHAUTODIR)]);
if (%{$self->{MAN1PODS}}) {
push @m, q[
diff --git a/lib/ExtUtils/MM_Win32.pm b/lib/ExtUtils/MM_Win32.pm
index f6d19a26c5..534f26d823 100644
--- a/lib/ExtUtils/MM_Win32.pm
+++ b/lib/ExtUtils/MM_Win32.pm
@@ -36,6 +36,49 @@ $NMAKE = 1 if $Config{'make'} =~ /^nmake/i;
$PERLMAKE = 1 if $Config{'make'} =~ /^pmake/i;
$OBJ = 1 if $Config{'ccflags'} =~ /PERL_OBJECT/i;
+# a few workarounds for command.com (very basic)
+{
+ package ExtUtils::MM_Win95;
+
+ # the $^O test may be overkill, but we want to be sure Win32::IsWin95()
+ # exists before we try it
+
+ unshift @MM::ISA, 'ExtUtils::MM_Win95'
+ if ($^O =~ /Win32/ && Win32::IsWin95());
+
+ sub xs_c {
+ my($self) = shift;
+ return '' unless $self->needs_linking();
+ '
+.xs.c:
+ $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) $(XSUBPP) \\
+ $(XSPROTOARG) $(XSUBPPARGS) $*.xs > $*.c
+ '
+ }
+
+ sub xs_cpp {
+ my($self) = shift;
+ return '' unless $self->needs_linking();
+ '
+.xs.cpp:
+ $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) $(XSUBPP) \\
+ $(XSPROTOARG) $(XSUBPPARGS) $*.xs > $*.cpp
+ ';
+ }
+
+ # many makes are too dumb to use xs_c then c_o
+ sub xs_o {
+ my($self) = shift;
+ return '' unless $self->needs_linking();
+ '
+.xs$(OBJ_EXT):
+ $(PERL) -I$(PERL_ARCHLIB) -I$(PERL_LIB) $(XSUBPP) \\
+ $(XSPROTOARG) $(XSUBPPARGS) $*.xs > $*.c
+ $(CCCMD) $(CCCDLFLAGS) -I$(PERL_INC) $(DEFINE) $*.c
+ ';
+ }
+} # end of command.com workarounds
+
sub dlsyms {
my($self,%attribs) = @_;
@@ -441,6 +484,18 @@ sub dynamic_lib {
my($inst_dynamic_dep) = $attribs{INST_DYNAMIC_DEP} || "";
my($ldfrom) = '$(LDFROM)';
my(@m);
+
+# one thing for GCC/Mingw32:
+# we try to overcome non-relocateable-DLL problems by generating
+# a (hopefully unique) image-base from the dll's name
+# -- BKS, 10-19-1999
+ if ($GCC) {
+ my $dllname = $self->{BASEEXT} . "." . $self->{DLEXT};
+ $dllname =~ /(....)(.{0,4})/;
+ my $baseaddr = unpack("n", $1 ^ $2);
+ $otherldflags .= sprintf("-Wl,--image-base,0x%x0000 ", $baseaddr);
+ }
+
push(@m,'
# This section creates the dynamically loadable $(INST_DYNAMIC)
# from $(OBJECT) and possibly $(MYEXTLIB).
@@ -694,12 +749,6 @@ config :: $(INST_AUTODIR)\.exists
'.$self->{NOECHO}.'$(NOOP)
';
- push @m, qq{
-config :: Version_check
- $self->{NOECHO}\$(NOOP)
-
-} unless $self->{PARENT} or ($self->{PERL_SRC} && $self->{INSTALLDIRS} eq "perl") or $self->{NO_VC};
-
push @m, $self->dir_target(qw[$(INST_AUTODIR) $(INST_LIBDIR) $(INST_ARCHAUTODIR)]);
if (%{$self->{HTMLLIBPODS}}) {
diff --git a/lib/ExtUtils/MakeMaker.pm b/lib/ExtUtils/MakeMaker.pm
index 0f00e39afc..0426575f87 100644
--- a/lib/ExtUtils/MakeMaker.pm
+++ b/lib/ExtUtils/MakeMaker.pm
@@ -2,7 +2,7 @@ BEGIN {require 5.002;} # MakeMaker 5.17 was the last MakeMaker that was compatib
package ExtUtils::MakeMaker;
-$VERSION = "5.4302";
+$VERSION = "5.44";
$Version_OK = "5.17"; # Makefiles older than $Version_OK will die
# (Will be checked from MakeMaker version 4.13 onwards)
($Revision = substr(q$Revision: 1.222 $, 10)) =~ s/\s+$//;
@@ -17,7 +17,7 @@ use Carp ();
use vars qw(
@ISA @EXPORT @EXPORT_OK $AUTOLOAD
- $ISA_TTY $Is_Mac $Is_OS2 $Is_VMS $Revision $Setup_done
+ $ISA_TTY $Is_Mac $Is_OS2 $Is_VMS $Revision
$VERSION $Verbose $Version_OK %Config %Keep_after_flush
%MM_Sections %Prepend_dot_dot %Recognized_Att_Keys
@Get_from_Config @MM_Sections @Overridable @Parent
@@ -70,7 +70,7 @@ $Is_VMS = $^O eq 'VMS';
$Is_OS2 = $^O eq 'os2';
$Is_Mac = $^O eq 'MacOS';
$Is_Win32 = $^O eq 'MSWin32';
-$Is_Cygwin= $^O =~ /cygwin/i;
+$Is_Cygwin= $^O eq 'cygwin';
require ExtUtils::MM_Unix;
@@ -91,35 +91,11 @@ if ($Is_Cygwin) {
require ExtUtils::MM_Cygwin;
}
-# The SelfLoader would bring a lot of overhead for MakeMaker, because
-# we know for sure we will use most of the autoloaded functions once
-# we have to use one of them. So we write our own loader
-
-sub AUTOLOAD {
- my $code;
- if (defined fileno(DATA)) {
- my $fh = select DATA;
- my $o = $/; # For future reads from the file.
- $/ = "\n__END__\n";
- $code = <DATA>;
- $/ = $o;
- select $fh;
- close DATA;
- eval $code;
- if ($@) {
- $@ =~ s/ at .*\n//;
- Carp::croak $@;
- }
- } else {
- warn "AUTOLOAD called unexpectedly for $AUTOLOAD";
- }
- defined(&$AUTOLOAD) or die "Myloader inconsistency error";
- goto &$AUTOLOAD;
-}
+full_setup();
-# The only subroutine we do not SelfLoad is Version_Check because it's
-# called so often. Loading this minimum still requires 1.2 secs on my
-# Indy :-(
+# The use of the Version_check target has been dropped between perl
+# 5.5.63 and 5.5.64. We must keep the subroutine for a while so that
+# old Makefiles can satisfy the Version_check target.
sub Version_check {
my($checkversion) = @_;
@@ -140,38 +116,10 @@ sub warnhandler {
warn @_;
}
-sub ExtUtils::MakeMaker::eval_in_subdirs ;
-sub ExtUtils::MakeMaker::eval_in_x ;
-sub ExtUtils::MakeMaker::full_setup ;
-sub ExtUtils::MakeMaker::writeMakefile ;
-sub ExtUtils::MakeMaker::new ;
-sub ExtUtils::MakeMaker::check_manifest ;
-sub ExtUtils::MakeMaker::parse_args ;
-sub ExtUtils::MakeMaker::check_hints ;
-sub ExtUtils::MakeMaker::mv_all_methods ;
-sub ExtUtils::MakeMaker::skipcheck ;
-sub ExtUtils::MakeMaker::flush ;
-sub ExtUtils::MakeMaker::mkbootstrap ;
-sub ExtUtils::MakeMaker::mksymlists ;
-sub ExtUtils::MakeMaker::neatvalue ;
-sub ExtUtils::MakeMaker::selfdocument ;
-sub ExtUtils::MakeMaker::WriteMakefile ;
-sub ExtUtils::MakeMaker::prompt ($;$) ;
-
-1;
-
-__DATA__
-
-package ExtUtils::MakeMaker;
-
sub WriteMakefile {
Carp::croak "WriteMakefile: Need even number of args" if @_ % 2;
local $SIG{__WARN__} = \&warnhandler;
- unless ($Setup_done++){
- full_setup();
- undef &ExtUtils::MakeMaker::full_setup; #safe memory
- }
my %att = @_;
MM->new(\%att)->flush;
}
@@ -382,9 +330,13 @@ sub ExtUtils::MakeMaker::new {
my($prereq);
foreach $prereq (sort keys %{$self->{PREREQ_PM}}) {
- my $eval = "use $prereq $self->{PREREQ_PM}->{$prereq}";
+ my $eval = "require $prereq";
eval $eval;
- if ($@){
+
+ if ($@) {
+ warn "Warning: prerequisite $prereq failed to load: $@";
+ }
+ elsif ($prereq->VERSION < $self->{PREREQ_PM}->{$prereq} ){
warn "Warning: prerequisite $prereq $self->{PREREQ_PM}->{$prereq} not found";
# Why is/was this 'delete' here? We need PREREQ_PM later to make PPDs.
# } else {
@@ -1183,7 +1135,7 @@ MakeMaker gives you much more freedom than needed to configure
internal variables and get different results. It is worth to mention,
that make(1) also lets you configure most of the variables that are
used in the Makefile. But in the majority of situations this will not
-be necessary, and should only be done, if the author of a package
+be necessary, and should only be done if the author of a package
recommends it (or you know what you're doing).
=head2 Using Attributes and Parameters
@@ -1598,9 +1550,9 @@ Makefile.PL.
=item NEEDS_LINKING
-MakeMaker will figure out, if an extension contains linkable code
+MakeMaker will figure out if an extension contains linkable code
anywhere down the directory tree, and will set this variable
-accordingly, but you can speed it up a very little bit, if you define
+accordingly, but you can speed it up a very little bit if you define
this boolean variable yourself.
=item NOECHO
@@ -1615,7 +1567,7 @@ Boolean. Attribute to inhibit descending into subdirectories.
=item NO_VC
-In general any generated Makefile checks for the current version of
+In general, any generated Makefile checks for the current version of
MakeMaker and the version the Makefile was built under. If NO_VC is
set, the version check is neglected. Do not write this into your
Makefile.PL, use it interactively instead.
@@ -1642,7 +1594,7 @@ to $(CC).
=item PERL_ARCHLIB
-Same as above for architecture dependent files
+Same as above for architecture dependent files.
=item PERL_LIB
@@ -1699,14 +1651,14 @@ Defining PM in the Makefile.PL will override PMLIBDIRS.
=item POLLUTE
Release 5.005 grandfathered old global symbol names by providing preprocessor
-macros for extension source compatibility. As of release 5.006, these
+macros for extension source compatibility. As of release 5.6, these
preprocessor definitions are not available by default. The POLLUTE flag
specifies that the old names should still be defined:
perl Makefile.PL POLLUTE=1
Please inform the module author if this is necessary to successfully install
-a module under 5.006 or later.
+a module under 5.6 or later.
=item PPM_INSTALL_EXEC
@@ -1736,8 +1688,8 @@ only check if any version is installed already.
=item SKIP
Arryref. E.g. [qw(name1 name2)] skip (do not write) sections of the
-Makefile. Caution! Do not use the SKIP attribute for the neglectible
-speedup. It may seriously damage the resulting Makefile. Only use it,
+Makefile. Caution! Do not use the SKIP attribute for the negligible
+speedup. It may seriously damage the resulting Makefile. Only use it
if you really need it.
=item TYPEMAPS
@@ -1860,7 +1812,7 @@ NB: Extensions that have nothing but *.pm files had to say
{LINKTYPE => ''}
with Pre-5.0 MakeMakers. Since version 5.00 of MakeMaker such a line
-can be deleted safely. MakeMaker recognizes, when there's nothing to
+can be deleted safely. MakeMaker recognizes when there's nothing to
be linked.
=item macro
@@ -1963,7 +1915,7 @@ details)
=item make distclean
does a realclean first and then the distcheck. Note that this is not
-needed to build a new distribution as long as you are sure, that the
+needed to build a new distribution as long as you are sure that the
MANIFEST file is ok.
=item make manifest
diff --git a/lib/ExtUtils/Manifest.pm b/lib/ExtUtils/Manifest.pm
index 52cfc2a80a..58c91bc44b 100644
--- a/lib/ExtUtils/Manifest.pm
+++ b/lib/ExtUtils/Manifest.pm
@@ -187,7 +187,6 @@ sub manicopy {
require File::Basename;
my(%dirs,$file);
$target = VMS::Filespec::unixify($target) if $Is_VMS;
- umask 0 unless $Is_VMS;
File::Path::mkpath([ $target ],1,$Is_VMS ? undef : 0755);
foreach $file (keys %$read){
$file = VMS::Filespec::unixify($file) if $Is_VMS;
diff --git a/lib/ExtUtils/Mkbootstrap.pm b/lib/ExtUtils/Mkbootstrap.pm
index 25c374c153..323c3ab6ba 100644
--- a/lib/ExtUtils/Mkbootstrap.pm
+++ b/lib/ExtUtils/Mkbootstrap.pm
@@ -81,8 +81,8 @@ C<mkbootstrap>
Mkbootstrap typically gets called from an extension Makefile.
-There is no C<*.bs> file supplied with the extension. Instead a
-C<*_BS> file which has code for the special cases, like posix for
+There is no C<*.bs> file supplied with the extension. Instead, there may
+be a C<*_BS> file which has code for the special cases, like posix for
berkeley db on the NeXT.
This file will get parsed, and produce a maybe empty
diff --git a/lib/ExtUtils/Mksymlists.pm b/lib/ExtUtils/Mksymlists.pm
index cfc1e7dff8..9dcedbf35e 100644
--- a/lib/ExtUtils/Mksymlists.pm
+++ b/lib/ExtUtils/Mksymlists.pm
@@ -78,7 +78,7 @@ sub _write_os2 {
}
my $distname = $data->{DISTNAME} || $data->{NAME};
$distname = "Distribution $distname";
- my $comment = "Perl (v$]$threaded) module $data->{NAME}";
+ my $comment = "Perl (v$Config::Config{version}$threaded) module $data->{NAME}";
if ($data->{INSTALLDIRS} and $data->{INSTALLDIRS} eq 'perl') {
$distname = 'perl5-porters@perl.org';
$comment = "Core $comment";
diff --git a/lib/ExtUtils/typemap b/lib/ExtUtils/typemap
index d84435e50f..a34cd4f9ea 100644
--- a/lib/ExtUtils/typemap
+++ b/lib/ExtUtils/typemap
@@ -251,7 +251,7 @@ T_REFOBJ
T_OPAQUE
sv_setpvn($arg, (char *)&$var, sizeof($var));
T_OPAQUEPTR
- sv_setpvn($arg, (char *)$var, sizeof(*$var)), XFree((char *)$var);
+ sv_setpvn($arg, (char *)$var, sizeof(*$var));
T_PACKED
XS_pack_$ntype($arg, $var);
T_PACKEDARRAY
diff --git a/lib/ExtUtils/xsubpp b/lib/ExtUtils/xsubpp
index e5c7e0989e..ff9b452caf 100755
--- a/lib/ExtUtils/xsubpp
+++ b/lib/ExtUtils/xsubpp
@@ -6,10 +6,12 @@ xsubpp - compiler to convert Perl XS code into C code
=head1 SYNOPSIS
-B<xsubpp> [B<-v>] [B<-C++>] [B<-except>] [B<-s pattern>] [B<-prototypes>] [B<-noversioncheck>] [B<-nolinenumbers>] [B<-typemap typemap>] ... file.xs
+B<xsubpp> [B<-v>] [B<-C++>] [B<-except>] [B<-s pattern>] [B<-prototypes>] [B<-noversioncheck>] [B<-nolinenumbers>] [B<-nooptimize>] [B<-typemap typemap>] ... file.xs
=head1 DESCRIPTION
+This compiler is typically run by the makefiles created by L<ExtUtils::MakeMaker>.
+
I<xsubpp> will compile XS code into C code by embedding the constructs
necessary to let C functions manipulate Perl values and creates the glue
necessary to let Perl access those functions. The compiler uses typemaps to
@@ -23,13 +25,15 @@ typemap taking precedence.
=head1 OPTIONS
+Note that the C<XSOPT> MakeMaker option may be used to add these options to
+any makefiles generated by MakeMaker.
+
=over 5
=item B<-C++>
Adds ``extern "C"'' to the C code.
-
=item B<-except>
Adds exception handling stubs to the C code.
@@ -59,6 +63,13 @@ number.
Prevents the inclusion of `#line' directives in the output.
+=item B<-nooptimize>
+
+Disables certain optimizations. The only optimization that is currently
+affected is the use of I<target>s by the output C code (see L<perlguts>).
+This may significantly slow down the generated code, but this is the way
+B<xsubpp> of 5.005 and earlier operated.
+
=back
=head1 ENVIRONMENT
@@ -103,7 +114,7 @@ if ($^O eq 'VMS') {
$FH = 'File0000' ;
-$usage = "Usage: xsubpp [-v] [-C++] [-except] [-prototypes] [-noversioncheck] [-nolinenumbers] [-s pattern] [-typemap typemap]... file.xs\n";
+$usage = "Usage: xsubpp [-v] [-C++] [-except] [-prototypes] [-noversioncheck] [-nolinenumbers] [-nooptimize] [-s pattern] [-typemap typemap]... file.xs\n";
$proto_re = "[" . quotemeta('\$%&*@;') . "]" ;
# mjn
@@ -114,6 +125,7 @@ $WantPrototypes = -1 ;
$WantVersionChk = 1 ;
$ProtoUsed = 0 ;
$WantLineNumbers = 1 ;
+$WantOptimize = 1 ;
SWITCH: while (@ARGV and $ARGV[0] =~ /^-./) {
$flag = shift @ARGV;
$flag =~ s/^-// ;
@@ -129,7 +141,9 @@ SWITCH: while (@ARGV and $ARGV[0] =~ /^-./) {
push(@tm,shift), next SWITCH if $flag eq 'typemap';
$WantLineNumbers = 0, next SWITCH if $flag eq 'nolinenumbers';
$WantLineNumbers = 1, next SWITCH if $flag eq 'linenumbers';
- (print "xsubpp version $XSUBPP_version\n"), exit
+ $WantOptimize = 0, next SWITCH if $flag eq 'nooptimize';
+ $WantOptimize = 1, next SWITCH if $flag eq 'optimize';
+ (print "xsubpp version $XSUBPP_version\n"), exit
if $flag eq 'v';
die $usage;
}
@@ -235,6 +249,24 @@ foreach $key (keys %input_expr) {
$input_expr{$key} =~ s/\n+$//;
}
+$bal = qr[(?:(?>[^()]+)|\((?p{ $bal })\))*]; # ()-balanced
+$cast = qr[(?:\(\s*SV\s*\*\s*\)\s*)?]; # Optional (SV*) cast
+$size = qr[,\s* (?p{ $bal }) ]x; # Third arg (to setpvn)
+
+foreach $key (keys %output_expr) {
+ use re 'eval';
+
+ my ($t, $with_size, $arg, $sarg) =
+ ($output_expr{$key} =~
+ m[^ \s+ sv_set ( [iunp] ) v (n)? # Type, is_setpvn
+ \s* \( \s* $cast \$arg \s* ,
+ \s* ( (?p{ $bal }) ) # Set from
+ ( (?p{ $size }) )? # Possible sizeof set-from
+ \) \s* ; \s* $
+ ]x);
+ $targetable{$key} = [$t, $with_size, $arg, $sarg] if $t;
+}
+
$END = "!End!\n\n"; # "impossible" keyword (multiple newline)
# Match an XS keyword
@@ -367,7 +399,17 @@ sub INPUT_handler {
$thisdone |= $var_name eq "THIS";
$retvaldone |= $var_name eq "RETVAL";
$var_types{$var_name} = $var_type;
- print "\t" . &map_type($var_type);
+ # XXXX This check is a safeguard against the unfinished conversion of
+ # generate_init(). When generate_init() is fixed,
+ # one can use 2-args map_type() unconditionally.
+ if ($var_type =~ / \( \s* \* \s* \) /x) {
+ # Function pointers are not yet supported with &output_init!
+ print "\t" . &map_type($var_type, $var_name);
+ $name_printed = 1;
+ } else {
+ print "\t" . &map_type($var_type);
+ $name_printed = 0;
+ }
$var_num = $args_match{$var_name};
$proto_arg[$var_num] = ProtoString($var_type)
@@ -377,12 +419,16 @@ sub INPUT_handler {
$func_args =~ s/\b($var_name)\b/&$1/;
}
if ($var_init =~ /^[=;]\s*NO_INIT\s*;?\s*$/) {
+ if ($name_printed) {
+ print ";\n";
+ } else {
print "\t$var_name;\n";
+ }
} elsif ($var_init =~ /\S/) {
- &output_init($var_type, $var_num, $var_name, $var_init);
+ &output_init($var_type, $var_num, $var_name, $var_init, $name_printed);
} elsif ($var_num) {
# generate initialization code
- &generate_init($var_type, $var_num, $var_name);
+ &generate_init($var_type, $var_num, $var_name, $name_printed);
} else {
print ";\n";
}
@@ -1081,10 +1127,12 @@ EOF
$_ = '' ;
} else {
if ($ret_type ne "void") {
- print "\t" . &map_type($ret_type) . "\tRETVAL;\n"
+ print "\t" . &map_type($ret_type, 'RETVAL') . ";\n"
if !$retvaldone;
$args_match{"RETVAL"} = 0;
$var_types{"RETVAL"} = $ret_type;
+ print "\tdXSTARG;\n"
+ if $WantOptimize and $targetable{$type_kind{$ret_type}};
}
print $deferred;
@@ -1137,8 +1185,32 @@ EOF
if ($gotRETVAL && $RETVAL_code) {
print "\t$RETVAL_code\n";
} elsif ($gotRETVAL || $wantRETVAL) {
- # RETVAL almost never needs SvSETMAGIC()
- &generate_output($ret_type, 0, 'RETVAL', 0);
+ my $t = $WantOptimize && $targetable{$type_kind{$ret_type}};
+ my $var = 'RETVAL';
+ my $type = $ret_type;
+
+ # 0: type, 1: with_size, 2: how, 3: how_size
+ if ($t and not $t->[1] and $t->[0] eq 'p') {
+ # PUSHp corresponds to setpvn. Treate setpv directly
+ my $what = eval qq("$t->[2]");
+ warn $@ if $@;
+
+ print "\tsv_setpv(TARG, $what); XSprePUSH; PUSHTARG;\n";
+ }
+ elsif ($t) {
+ my $what = eval qq("$t->[2]");
+ warn $@ if $@;
+
+ my $size = $t->[3];
+ $size = '' unless defined $size;
+ $size = eval qq("$size");
+ warn $@ if $@;
+ print "\tXSprePUSH; PUSH$t->[0]($what$size);\n";
+ }
+ else {
+ # RETVAL almost never needs SvSETMAGIC()
+ &generate_output($ret_type, 0, 'RETVAL', 0);
+ }
}
# do cleanup
@@ -1305,15 +1377,22 @@ warn("Please specify prototyping behavior for $filename (see perlxs manual)\n")
&Exit;
sub output_init {
- local($type, $num, $var, $init) = @_;
+ local($type, $num, $var, $init, $name_printed) = @_;
local($arg) = "ST(" . ($num - 1) . ")";
if( $init =~ /^=/ ) {
- eval qq/print "\\t$var $init\\n"/;
+ if ($name_printed) {
+ eval qq/print " $init\\n"/;
+ } else {
+ eval qq/print "\\t$var $init\\n"/;
+ }
warn $@ if $@;
} else {
if( $init =~ s/^\+// && $num ) {
- &generate_init($type, $num, $var);
+ &generate_init($type, $num, $var, $name_printed);
+ } elsif ($name_printed) {
+ print ";\n";
+ $init =~ s/^;//;
} else {
eval qq/print "\\t$var;\\n"/;
warn $@ if $@;
@@ -1382,16 +1461,26 @@ sub generate_init {
if (defined($defaults{$var})) {
$expr =~ s/(\t+)/$1 /g;
$expr =~ s/ /\t/g;
- eval qq/print "\\t$var;\\n"/;
- warn $@ if $@;
+ if ($name_printed) {
+ print ";\n";
+ } else {
+ eval qq/print "\\t$var;\\n"/;
+ warn $@ if $@;
+ }
$deferred .= eval qq/"\\n\\tif (items < $num)\\n\\t $var = $defaults{$var};\\n\\telse {\\n$expr;\\n\\t}\\n"/;
warn $@ if $@;
} elsif ($ScopeThisXSUB or $expr !~ /^\t\$var =/) {
- eval qq/print "\\t$var;\\n"/;
- warn $@ if $@;
+ if ($name_printed) {
+ print ";\n";
+ } else {
+ eval qq/print "\\t$var;\\n"/;
+ warn $@ if $@;
+ }
$deferred .= eval qq/"\\n$expr;\\n"/;
warn $@ if $@;
} else {
+ die "panic: do not know how to handle this branch for function pointers"
+ if $name_printed;
eval qq/print "$expr;\\n"/;
warn $@ if $@;
}
@@ -1405,7 +1494,7 @@ sub generate_output {
$type = TidyType($type) ;
if ($type =~ /^array\(([^,]*),(.*)\)/) {
- print "\tsv_setpvn($arg, (char *)$var, $2 * sizeof($1)), XFree((char *)$var);\n";
+ print "\tsv_setpvn($arg, (char *)$var, $2 * sizeof($1));\n";
print "\tSvSETMAGIC($arg);\n" if $do_setmagic;
} else {
blurt("Error: '$type' not in typemap"), return
@@ -1468,10 +1557,17 @@ sub generate_output {
}
sub map_type {
- my($type) = @_;
+ my($type, $varname) = @_;
$type =~ tr/:/_/;
$type =~ s/^array\(([^,]*),(.*)\).*/$1 */s;
+ if ($varname) {
+ if ($varname && $type =~ / \( \s* \* (?= \s* \) ) /xg) {
+ (substr $type, pos $type, 0) = " $varname ";
+ } else {
+ $type .= "\t$varname";
+ }
+ }
$type;
}
diff --git a/lib/File/Copy.pm b/lib/File/Copy.pm
index fd812bc721..8df54e55a8 100644
--- a/lib/File/Copy.pm
+++ b/lib/File/Copy.pm
@@ -10,14 +10,14 @@ package File::Copy;
use strict;
use Carp;
use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION $Too_Big
- &copy &syscopy &cp &mv);
+ &copy &syscopy &cp &mv $Syscopy_is_copy);
# Note that this module implements only *part* of the API defined by
# the File/Copy.pm module of the File-Tools-2.0 package. However, that
# package has not yet been updated to work with Perl 5.004, and so it
# would be a Bad Thing for the CPAN module to grab it and replace this
# module. Therefore, we set this module's version higher than 2.0.
-$VERSION = '2.02';
+$VERSION = '2.03';
require Exporter;
@ISA = qw(Exporter);
@@ -60,12 +60,12 @@ sub copy {
$to = _catname($from, $to);
}
- if (defined &syscopy && \&syscopy != \&copy
+ if (defined &syscopy && !$Syscopy_is_copy
&& !$to_a_handle
&& !($from_a_handle && $^O eq 'os2' ) # OS/2 cannot handle handles
&& !($from_a_handle && $^O eq 'mpeix') # and neither can MPE/iX.
&& !($from_a_handle && $^O eq 'MSWin32')
- )
+ )
{
return syscopy($from, $to);
}
@@ -83,16 +83,16 @@ sub copy {
open(FROM, "< $from\0") or goto fail_open1;
binmode FROM or die "($!,$^E)";
$closefrom = 1;
- }
-
+ }
+
if ($to_a_handle) {
*TO = *$to{FILEHANDLE};
- } else {
+ } else {
$to = "./$to" if $to =~ /^\s/;
open(TO,"> $to\0") or goto fail_open2;
binmode TO or die "($!,$^E)";
$closeto = 1;
- }
+ }
if (@_) {
$size = shift(@_) + 0;
@@ -120,7 +120,7 @@ sub copy {
# Use this idiom to avoid uninitialized value warning.
return 1;
-
+
# All of these contortions try to preserve error messages...
fail_inner:
if ($closeto) {
@@ -163,10 +163,10 @@ sub move {
(($tosz2,$tomt2) = (stat($to))[7,9]) && # $to's there
($tosz1 != $tosz2 or $tomt1 != $tomt2) && # and changed
$tosz2 == $fromsz; # it's all there
-
+
($tosz1,$tomt1) = (stat($to))[7,9]; # just in case rename did something
return 1 if ($copied = copy($from,$to)) && unlink($from);
-
+
($tosz2,$tomt2) = ((stat($to))[7,9],0,0) if defined $tomt1;
unlink($to) if !defined($tomt1) or $tomt1 != $tomt2 or $tosz1 != $tosz2;
($!,$^E) = ($sts,$ossts);
@@ -193,6 +193,7 @@ unless (defined &syscopy) {
return Win32::CopyFile(@_, 1);
};
} else {
+ $Syscopy_is_copy = 1;
*syscopy = \&copy;
}
}
diff --git a/lib/File/DosGlob.pm b/lib/File/DosGlob.pm
index 594ee2ec84..e6fc311e32 100644
--- a/lib/File/DosGlob.pm
+++ b/lib/File/DosGlob.pm
@@ -206,7 +206,7 @@ pandering to DOS habits. Needs a dose of optimizium too.
=head1 AUTHOR
-Gurusamy Sarathy <gsar@umich.edu>
+Gurusamy Sarathy <gsar@activestate.com>
=head1 HISTORY
diff --git a/lib/File/Find.pm b/lib/File/Find.pm
index 28e2e90e44..c674b2c5f6 100644
--- a/lib/File/Find.pm
+++ b/lib/File/Find.pm
@@ -1,5 +1,5 @@
package File::Find;
-require 5.000;
+require 5.005;
require Exporter;
require Cwd;
@@ -12,70 +12,163 @@ finddepth - traverse a directory structure depth-first
=head1 SYNOPSIS
use File::Find;
- find(\&wanted, '/foo','/bar');
+ find(\&wanted, '/foo', '/bar');
sub wanted { ... }
use File::Find;
- finddepth(\&wanted, '/foo','/bar');
+ finddepth(\&wanted, '/foo', '/bar');
sub wanted { ... }
+
+ use File::Find;
+ find({ wanted => \&process, follow => 1 }, '.');
=head1 DESCRIPTION
The first argument to find() is either a hash reference describing the
-operations to be performed for each file, a code reference, or a string
-that contains a subroutine name. If it is a hash reference, then the
-value for the key C<wanted> should be a code reference. This code
-reference is called I<the wanted() function> below.
+operations to be performed for each file, or a code reference.
-Currently the only other supported key for the above hash is
-C<bydepth>, in presense of which the walk over directories is
-performed depth-first. Entry point finddepth() is a shortcut for
-specifying C<{ bydepth => 1}> in the first argument of find().
+Here are the possible keys for the hash:
+
+=over 3
+
+=item C<wanted>
+
+The value should be a code reference. This code reference is called
+I<the wanted() function> below.
+
+=item C<bydepth>
+
+Reports the name of a directory only AFTER all its entries
+have been reported. Entry point finddepth() is a shortcut for
+specifying C<{ bydepth => 1 }> in the first argument of find().
+
+=item C<follow>
+
+Causes symbolic links to be followed. Since directory trees with symbolic
+links (followed) may contain files more than once and may even have
+cycles, a hash has to be built up with an entry for each file.
+This might be expensive both in space and time for a large
+directory tree. See I<follow_fast> and I<follow_skip> below.
+If either I<follow> or I<follow_fast> is in effect:
+
+=over 6
+
+=item
+
+It is guarantueed that an I<lstat> has been called before the user's
+I<wanted()> function is called. This enables fast file checks involving S< _>.
+
+=item
+
+There is a variable C<$File::Find::fullname> which holds the absolute
+pathname of the file with all symbolic links resolved
+
+=back
+
+=item C<follow_fast>
+
+This is similar to I<follow> except that it may report some files
+more than once. It does detect cycles however.
+Since only symbolic links have to be hashed, this is
+much cheaper both in space and time.
+If processing a file more than once (by the user's I<wanted()> function)
+is worse than just taking time, the option I<follow> should be used.
+
+=item C<follow_skip>
+
+C<follow_skip==1>, which is the default, causes all files which are
+neither directories nor symbolic links to be ignored if they are about
+to be processed a second time. If a directory or a symbolic link
+are about to be processed a second time, File::Find dies.
+C<follow_skip==0> causes File::Find to die if any file is about to be
+processed a second time.
+C<follow_skip==2> causes File::Find to ignore any duplicate files and
+dirctories but to proceed normally otherwise.
-The wanted() function does whatever verifications you want.
-$File::Find::dir contains the current directory name, and $_ the
-current filename within that directory. $File::Find::name contains
-C<"$File::Find::dir/$_">. You are chdir()'d to $File::Find::dir when
-the function is called. The function may set $File::Find::prune to
-prune the tree.
-File::Find assumes that you don't alter the $_ variable. If you do then
-make sure you return it to its original value before exiting your function.
+=item C<no_chdir>
+
+Does not C<chdir()> to each directory as it recurses. The wanted()
+function will need to be aware of this, of course. In this case,
+C<$_> will be the same as C<$File::Find::name>.
+
+=item C<untaint>
+
+If find is used in taint-mode (-T command line switch or if EUID != UID
+or if EGID != GID) then internally directory names have to be untainted
+before they can be cd'ed to. Therefore they are checked against a regular
+expression I<untaint_pattern>. Note, that all names passed to the
+user's I<wanted()> function are still tainted.
+
+=item C<untaint_pattern>
+
+See above. This should be set using the C<qr> quoting operator.
+The default is set to C<qr|^([-+@\w./]+)$|>.
+Note that the paranthesis which are vital.
+
+=item C<untaint_skip>
+
+If set, directories (subtrees) which fail the I<untaint_pattern>
+are skipped. The default is to 'die' in such a case.
+
+=back
+
+The wanted() function does whatever verifications you want.
+C<$File::Find::dir> contains the current directory name, and C<$_> the
+current filename within that directory. C<$File::Find::name> contains
+the complete pathname to the file. You are chdir()'d to C<$File::Find::dir> when
+the function is called, unless C<no_chdir> was specified.
+When <follow> or <follow_fast> are in effect there is also a
+C<$File::Find::fullname>.
+The function may set C<$File::Find::prune> to prune the tree
+unless C<bydepth> was specified.
+Unless C<follow> or C<follow_fast> is specified, for compatibility
+reasons (find.pl, find2perl) there are in addition the following globals
+available: C<$File::Find::topdir>, C<$File::Find::topdev>, C<$File::Find::topino>,
+C<$File::Find::topmode> and C<$File::Find::topnlink>.
This library is useful for the C<find2perl> tool, which when fed,
find2perl / -name .nfs\* -mtime +7 \
- -exec rm -f {} \; -o -fstype nfs -prune
+ -exec rm -f {} \; -o -fstype nfs -prune
produces something like:
sub wanted {
/^\.nfs.*$/ &&
- (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
+ (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_)) &&
int(-M _) > 7 &&
unlink($_)
||
- ($nlink || (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_))) &&
+ ($nlink || (($dev, $ino, $mode, $nlink, $uid, $gid) = lstat($_))) &&
$dev < 0 &&
($File::Find::prune = 1);
}
-Set the variable $File::Find::dont_use_nlink if you're using AFS,
+Set the variable C<$File::Find::dont_use_nlink> if you're using AFS,
since AFS cheats.
-C<finddepth> is just like C<find>, except that it does a depth-first
-search.
Here's another interesting wanted function. It will find all symlinks
that don't resolve:
sub wanted {
- -l && !-e && print "bogus link: $File::Find::name\n";
+ -l && !-e && print "bogus link: $File::Find::name\n";
}
-=head1 BUGS
+See also the script C<pfind> on CPAN for a nice application of this
+module.
+
+=head1 CAVEAT
+
+Be aware that the option to follow symblic links can be dangerous.
+Depending on the structure of the directory tree (including symbolic
+links to directories) you might traverse a given (physical) directory
+more than once (only if C<follow_fast> is in effect).
+Furthermore, deleting or changing files in a symbolically linked directory
+might cause very unpleasant surprises, since you delete or change files
+in an unknown directory.
-There is no way to make find or finddepth follow symlinks.
=cut
@@ -83,151 +176,522 @@ There is no way to make find or finddepth follow symlinks.
@EXPORT = qw(find finddepth);
-sub find_opt {
- my $wanted = shift;
- my $bydepth = $wanted->{bydepth};
- my $cwd = $bydepth ? Cwd::fastcwd() : Cwd::cwd();
- # Localize these rather than lexicalizing them for backwards
- # compatibility.
- local($topdir,$topdev,$topino,$topmode,$topnlink);
- foreach $topdir (@_) {
- (($topdev,$topino,$topmode,$topnlink) =
- ($Is_VMS ? stat($topdir) : lstat($topdir)))
- || (warn("Can't stat $topdir: $!\n"), next);
- if (-d _) {
- if (chdir($topdir)) {
- $prune = 0;
- unless ($bydepth) {
- ($dir,$_) = ($topdir,'.');
- $name = $topdir;
- $wanted->{wanted}->();
- }
- next if $prune;
- my $fixtopdir = $topdir;
- $fixtopdir =~ s,/$,, ;
- $fixtopdir =~ s/\.dir$// if $Is_VMS;
- &finddir($wanted,$fixtopdir,$topnlink, $bydepth);
- if ($bydepth) {
- ($dir,$_) = ($fixtopdir,'.');
- $name = $fixtopdir;
- $wanted->{wanted}->();
- }
+use strict;
+my $Is_VMS;
+
+require File::Basename;
+
+my %SLnkSeen;
+my ($wanted_callback, $avoid_nlink, $bydepth, $no_chdir, $follow,
+ $follow_skip, $full_check, $untaint, $untaint_skip, $untaint_pat);
+
+sub contract_name {
+ my ($cdir,$fn) = @_;
+
+ return substr($cdir,0,rindex($cdir,'/')) if $fn eq '.';
+
+ $cdir = substr($cdir,0,rindex($cdir,'/')+1);
+
+ $fn =~ s|^\./||;
+
+ my $abs_name= $cdir . $fn;
+
+ if (substr($fn,0,3) eq '../') {
+ do 1 while ($abs_name=~ s|/(?>[^/]+)/\.\./|/|);
+ }
+
+ return $abs_name;
+}
+
+
+sub PathCombine($$) {
+ my ($Base,$Name) = @_;
+ my $AbsName;
+
+ if (substr($Name,0,1) eq '/') {
+ $AbsName= $Name;
+ }
+ else {
+ $AbsName= contract_name($Base,$Name);
+ }
+
+ # (simple) check for recursion
+ my $newlen= length($AbsName);
+ if ($newlen <= length($Base)) {
+ if (($newlen == length($Base) || substr($Base,$newlen,1) eq '/')
+ && $AbsName eq substr($Base,0,$newlen))
+ {
+ return undef;
+ }
+ }
+ return $AbsName;
+}
+
+sub Follow_SymLink($) {
+ my ($AbsName) = @_;
+
+ my ($NewName,$DEV, $INO);
+ ($DEV, $INO)= lstat $AbsName;
+
+ while (-l _) {
+ if ($SLnkSeen{$DEV, $INO}++) {
+ if ($follow_skip < 2) {
+ die "$AbsName is encountered a second time";
}
else {
- warn "Can't cd to $topdir: $!\n";
+ return undef;
}
}
- else {
- require File::Basename;
- unless (($_,$dir) = File::Basename::fileparse($topdir)) {
- ($dir,$_) = ('.', $topdir);
+ $NewName= PathCombine($AbsName, readlink($AbsName));
+ unless(defined $NewName) {
+ if ($follow_skip < 2) {
+ die "$AbsName is a recursive symbolic link";
+ }
+ else {
+ return undef;
}
- if (chdir($dir)) {
- $name = $topdir;
- $wanted->{wanted}->();
+ }
+ else {
+ $AbsName= $NewName;
+ }
+ ($DEV, $INO) = lstat($AbsName);
+ return undef unless defined $DEV; # dangling symbolic link
+ }
+
+ if ($full_check && $SLnkSeen{$DEV, $INO}++) {
+ if ($follow_skip < 1) {
+ die "$AbsName encountered a second time";
+ }
+ else {
+ return undef;
+ }
+ }
+
+ return $AbsName;
+}
+
+use vars qw/ $dir $name $fullname $prune /;
+sub _find_dir_symlnk($$$);
+sub _find_dir($$$);
+
+sub _find_opt {
+ my $wanted = shift;
+ die "invalid top directory" unless defined $_[0];
+
+ my $cwd = $wanted->{bydepth} ? Cwd::fastcwd() : Cwd::cwd();
+ my $cwd_untainted = $cwd;
+ $wanted_callback = $wanted->{wanted};
+ $bydepth = $wanted->{bydepth};
+ $no_chdir = $wanted->{no_chdir};
+ $full_check = $wanted->{follow};
+ $follow = $full_check || $wanted->{follow_fast};
+ $follow_skip = $wanted->{follow_skip};
+ $untaint = $wanted->{untaint};
+ $untaint_pat = $wanted->{untaint_pattern};
+ $untaint_skip = $wanted->{untaint_skip};
+
+ # for compatability reasons (find.pl, find2perl)
+ our ($topdir, $topdev, $topino, $topmode, $topnlink);
+
+ # a symbolic link to a directory doesn't increase the link count
+ $avoid_nlink = $follow || $File::Find::dont_use_nlink;
+
+ if ( $untaint ) {
+ $cwd_untainted= $1 if $cwd_untainted =~ m|$untaint_pat|;
+ die "insecure cwd in find(depth)" unless defined($cwd_untainted);
+ }
+
+ my ($abs_dir, $Is_Dir);
+
+ Proc_Top_Item:
+ foreach my $TOP (@_) {
+ my $top_item = $TOP;
+ $top_item =~ s|/$|| unless $top_item eq '/';
+ $Is_Dir= 0;
+
+ if ($follow) {
+ if (substr($top_item,0,1) eq '/') {
+ $abs_dir = $top_item;
+ }
+ elsif ($top_item eq '.') {
+ $abs_dir = $cwd;
}
+ else { # care about any ../
+ $abs_dir = contract_name("$cwd/",$top_item);
+ }
+ $abs_dir= Follow_SymLink($abs_dir);
+ unless (defined $abs_dir) {
+ warn "$top_item is a dangling symbolic link\n";
+ next Proc_Top_Item;
+ }
+ if (-d _) {
+ _find_dir_symlnk($wanted, $abs_dir, $top_item);
+ $Is_Dir= 1;
+ }
+ }
+ else { # no follow
+ $topdir = $top_item;
+ ($topdev,$topino,$topmode,$topnlink) = lstat $top_item;
+ unless (defined $topnlink) {
+ warn "Can't stat $top_item: $!\n";
+ next Proc_Top_Item;
+ }
+ if (-d _) {
+ $top_item =~ s/\.dir$// if $Is_VMS;
+ _find_dir($wanted, $top_item, $topnlink);
+ $Is_Dir= 1;
+ }
else {
- warn "Can't cd to $dir: $!\n";
+ $abs_dir= $top_item;
+ }
+ }
+
+ unless ($Is_Dir) {
+ unless (($_,$dir) = File::Basename::fileparse($abs_dir)) {
+ ($dir,$_) = ('.', $top_item);
+ }
+
+ $abs_dir = $dir;
+ if ($untaint) {
+ my $abs_dir_save = $abs_dir;
+ $abs_dir = $1 if $abs_dir =~ m|$untaint_pat|;
+ unless (defined $abs_dir) {
+ if ($untaint_skip == 0) {
+ die "directory $abs_dir_save is still tainted";
+ }
+ else {
+ next Proc_Top_Item;
+ }
+ }
+ }
+
+ unless ($no_chdir or chdir $abs_dir) {
+ warn "Couldn't chdir $abs_dir: $!\n";
+ next Proc_Top_Item;
+ }
+
+ $name = $abs_dir;
+
+ &$wanted_callback;
+
+ }
+
+ $no_chdir or chdir $cwd_untainted;
+ }
+}
+
+# API:
+# $wanted
+# $p_dir : "parent directory"
+# $nlink : what came back from the stat
+# preconditions:
+# chdir (if not no_chdir) to dir
+
+sub _find_dir($$$) {
+ my ($wanted, $p_dir, $nlink) = @_;
+ my ($CdLvl,$Level) = (0,0);
+ my @Stack;
+ my @filenames;
+ my ($subcount,$sub_nlink);
+ my $SE= [];
+ my $dir_name= $p_dir;
+ my $dir_pref= ( $p_dir eq '/' ? '/' : "$p_dir/" );
+ my $dir_rel= '.'; # directory name relative to current directory
+
+ local ($dir, $name, $prune, *DIR);
+
+ unless ($no_chdir or $p_dir eq '.') {
+ my $udir = $p_dir;
+ if ($untaint) {
+ $udir = $1 if $p_dir =~ m|$untaint_pat|;
+ unless (defined $udir) {
+ if ($untaint_skip == 0) {
+ die "directory $p_dir is still tainted";
+ }
+ else {
+ return;
+ }
}
}
+ unless (chdir $udir) {
+ warn "Can't cd to $udir: $!\n";
+ return;
+ }
+ }
+
+ while (defined $SE) {
+ unless ($bydepth) {
+ $dir= $p_dir;
+ $name= $dir_name;
+ $_= ($no_chdir ? $dir_name : $dir_rel );
+ # prune may happen here
+ $prune= 0;
+ &$wanted_callback;
+ next if $prune;
+ }
+
+ # change to that directory
+ unless ($no_chdir or $dir_rel eq '.') {
+ my $udir= $dir_rel;
+ if ($untaint) {
+ $udir = $1 if $dir_rel =~ m|$untaint_pat|;
+ unless (defined $udir) {
+ if ($untaint_skip == 0) {
+ die "directory ("
+ . ($p_dir ne '/' ? $p_dir : '')
+ . "/) $dir_rel is still tainted";
+ }
+ }
+ }
+ unless (chdir $udir) {
+ warn "Can't cd to ("
+ . ($p_dir ne '/' ? $p_dir : '')
+ . "/) $udir : $!\n";
+ next;
+ }
+ $CdLvl++;
+ }
+
+ $dir= $dir_name;
+
+ # Get the list of files in the current directory.
+ unless (opendir DIR, ($no_chdir ? $dir_name : '.')) {
+ warn "Can't opendir($dir_name): $!\n";
+ next;
+ }
+ @filenames = readdir DIR;
+ closedir(DIR);
+
+ if ($nlink == 2 && !$avoid_nlink) {
+ # This dir has no subdirectories.
+ for my $FN (@filenames) {
+ next if $FN =~ /^\.{1,2}$/;
+
+ $name = $dir_pref . $FN;
+ $_ = ($no_chdir ? $name : $FN);
+ &$wanted_callback;
+ }
+
+ }
+ else {
+ # This dir has subdirectories.
+ $subcount = $nlink - 2;
+
+ for my $FN (@filenames) {
+ next if $FN =~ /^\.{1,2}$/;
+ if ($subcount > 0 || $avoid_nlink) {
+ # Seen all the subdirs?
+ # check for directoriness.
+ # stat is faster for a file in the current directory
+ $sub_nlink = (lstat ($no_chdir ? $dir_pref . $FN : $FN))[3];
+
+ if (-d _) {
+ --$subcount;
+ $FN =~ s/\.dir$// if $Is_VMS;
+ push @Stack,[$CdLvl,$dir_name,$FN,$sub_nlink];
+ }
+ else {
+ $name = $dir_pref . $FN;
+ $_= ($no_chdir ? $name : $FN);
+ &$wanted_callback;
+ }
+ }
+ else {
+ $name = $dir_pref . $FN;
+ $_= ($no_chdir ? $name : $FN);
+ &$wanted_callback;
+ }
+ }
+ }
+ if ($bydepth) {
+ $name = $dir_name;
+ $dir = $p_dir;
+ $_ = ($no_chdir ? $dir_name : $dir_rel );
+ &$wanted_callback;
+ }
}
continue {
- chdir $cwd;
+ if ( defined ($SE = pop @Stack) ) {
+ ($Level, $p_dir, $dir_rel, $nlink) = @$SE;
+ if ($CdLvl > $Level && !$no_chdir) {
+ die "Can't cd to $dir_name" . '../' x ($CdLvl-$Level)
+ unless chdir '../' x ($CdLvl-$Level);
+ $CdLvl = $Level;
+ }
+ $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
+ $dir_pref = "$dir_name/";
+ }
}
}
-sub finddir {
- my($wanted, $nlink, $bydepth);
- local($dir, $name);
- ($wanted, $dir, $nlink, $bydepth) = @_;
-
- my($dev, $ino, $mode, $subcount);
-
- # Get the list of files in the current directory.
- opendir(DIR,'.') || (warn("Can't open $dir: $!\n"), $bydepth || return);
- my(@filenames) = readdir(DIR);
- closedir(DIR);
-
- if ($nlink == 2 && !$dont_use_nlink) { # This dir has no subdirectories.
- for (@filenames) {
- next if $_ eq '.';
- next if $_ eq '..';
- $name = "$dir/$_";
- $nlink = 0;
- $wanted->{wanted}->();
- }
- }
- else { # This dir has subdirectories.
- $subcount = $nlink - 2;
- for (@filenames) {
- next if $_ eq '.';
- next if $_ eq '..';
- $nlink = 0;
- $prune = 0 unless $bydepth;
- $name = "$dir/$_";
- $wanted->{wanted}->() unless $bydepth;
- if ($subcount > 0 || $dont_use_nlink) { # Seen all the subdirs?
-
- # Get link count and check for directoriness.
-
- $_ = "" if (!defined($_));
- ($dev,$ino,$mode,$nlink) = ($Is_VMS ? stat($_) : lstat($_));
- # unless ($nlink || $dont_use_nlink);
-
- if (-d _) {
-
- # It really is a directory, so do it recursively.
-
- --$subcount;
- next if $prune;
- if (chdir $_) {
- $name =~ s/\.dir$// if $Is_VMS;
- &finddir($wanted,$name,$nlink, $bydepth);
- chdir '..';
+
+# API:
+# $wanted
+# $dir_loc : absolute location of a dir
+# $p_dir : "parent directory"
+# preconditions:
+# chdir (if not no_chdir) to dir
+
+sub _find_dir_symlnk($$$) {
+ my ($wanted, $dir_loc, $p_dir) = @_;
+ my @Stack;
+ my @filenames;
+ my $new_loc;
+ my $SE = [];
+ my $dir_name = $p_dir;
+ my $dir_pref = ( $p_dir eq '/' ? '/' : "$p_dir/" );
+ my $loc_pref = ( $dir_loc eq '/' ? '/' : "$dir_loc/" );
+ my $dir_rel = '.'; # directory name relative to current directory
+
+ local ($dir, $name, $fullname, $prune, *DIR);
+
+ unless ($no_chdir or $p_dir eq '.') {
+ my $udir = $dir_loc;
+ if ($untaint) {
+ $udir = $1 if $dir_loc =~ m|$untaint_pat|;
+ unless (defined $udir) {
+ if ($untaint_skip == 0) {
+ die "directory $dir_loc is still tainted";
+ }
+ else {
+ return;
+ }
+ }
+ }
+ unless (chdir $udir) {
+ warn "Can't cd to $udir: $!\n";
+ return;
+ }
+ }
+
+ while (defined $SE) {
+
+ unless ($bydepth) {
+ $dir= $p_dir;
+ $name= $dir_name;
+ $_= ($no_chdir ? $dir_name : $dir_rel );
+ $fullname= $dir_loc;
+ # prune may happen here
+ $prune= 0;
+ &$wanted_callback;
+ next if $prune;
+ }
+
+ # change to that directory
+ unless ($no_chdir or $dir_rel eq '.') {
+ my $udir = $dir_loc;
+ if ($untaint) {
+ $udir = $1 if $dir_loc =~ m|$untaint_pat|;
+ unless (defined $udir ) {
+ if ($untaint_skip == 0) {
+ die "directory $dir_loc is still tainted";
}
else {
- warn "Can't cd to $_: $!\n";
+ next;
}
}
}
- $wanted->{wanted}->() if $bydepth;
+ unless (chdir $udir) {
+ warn "Can't cd to $udir: $!\n";
+ next;
+ }
+ }
+
+ $dir = $dir_name;
+
+ # Get the list of files in the current directory.
+ unless (opendir DIR, ($no_chdir ? $dir_loc : '.')) {
+ warn "Can't opendir($dir_loc): $!\n";
+ next;
+ }
+ @filenames = readdir DIR;
+ closedir(DIR);
+
+ for my $FN (@filenames) {
+ next if $FN =~ /^\.{1,2}$/;
+
+ # follow symbolic links / do an lstat
+ $new_loc = Follow_SymLink($loc_pref.$FN);
+
+ # ignore if invalid symlink
+ next unless defined $new_loc;
+
+ if (-d _) {
+ push @Stack,[$new_loc,$dir_name,$FN];
+ }
+ else {
+ $fullname = $new_loc;
+ $name = $dir_pref . $FN;
+ $_ = ($no_chdir ? $name : $FN);
+ &$wanted_callback;
+ }
+ }
+
+ if ($bydepth) {
+ $fullname = $dir_loc;
+ $name = $dir_name;
+ $_ = ($no_chdir ? $dir_name : $dir_rel);
+ &$wanted_callback;
+ }
+ }
+ continue {
+ if (defined($SE = pop @Stack)) {
+ ($dir_loc, $p_dir, $dir_rel) = @$SE;
+ $dir_name = ($p_dir eq '/' ? "/$dir_rel" : "$p_dir/$dir_rel");
+ $dir_pref = "$dir_name/";
+ $loc_pref = "$dir_loc/";
}
}
}
+
sub wrap_wanted {
- my $wanted = shift;
- ref($wanted) eq 'HASH' ? $wanted : { wanted => $wanted };
+ my $wanted = shift;
+ if ( ref($wanted) eq 'HASH' ) {
+ if ( $wanted->{follow} || $wanted->{follow_fast}) {
+ $wanted->{follow_skip} = 1 unless defined $wanted->{follow_skip};
+ }
+ if ( $wanted->{untaint} ) {
+ $wanted->{untaint_pattern} = qr|^([-+@\w./]+)$|
+ unless defined $wanted->{untaint_pattern};
+ $wanted->{untaint_skip} = 0 unless defined $wanted->{untaint_skip};
+ }
+ return $wanted;
+ }
+ else {
+ return { wanted => $wanted };
+ }
}
sub find {
- my $wanted = shift;
- find_opt(wrap_wanted($wanted), @_);
+ my $wanted = shift;
+ _find_opt(wrap_wanted($wanted), @_);
+ %SLnkSeen= (); # free memory
}
sub finddepth {
- my $wanted = wrap_wanted(shift);
- $wanted->{bydepth} = 1;
- find_opt($wanted, @_);
+ my $wanted = wrap_wanted(shift);
+ $wanted->{bydepth} = 1;
+ _find_opt($wanted, @_);
+ %SLnkSeen= (); # free memory
}
# These are hard-coded for now, but may move to hint files.
if ($^O eq 'VMS') {
- $Is_VMS = 1;
- $dont_use_nlink = 1;
+ $Is_VMS = 1;
+ $File::Find::dont_use_nlink = 1;
}
-$dont_use_nlink = 1
+$File::Find::dont_use_nlink = 1
if $^O eq 'os2' || $^O eq 'dos' || $^O eq 'amigaos' || $^O eq 'MSWin32';
# Set dont_use_nlink in your hint file if your system's stat doesn't
# report the number of links in a directory as an indication
# of the number of files.
# See, e.g. hints/machten.sh for MachTen 2.2.
-unless ($dont_use_nlink) {
- require Config;
- $dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
+unless ($File::Find::dont_use_nlink) {
+ require Config;
+ $File::Find::dont_use_nlink = 1 if ($Config::Config{'dont_use_nlink'});
}
1;
-
diff --git a/lib/File/Path.pm b/lib/File/Path.pm
index 729037294b..634b2cd108 100644
--- a/lib/File/Path.pm
+++ b/lib/File/Path.pm
@@ -2,15 +2,14 @@ package File::Path;
=head1 NAME
-File::Path - create or remove a series of directories
+File::Path - create or remove directory trees
=head1 SYNOPSIS
-C<use File::Path>
+ use File::Path;
-C<mkpath(['/foo/bar/baz', 'blurfl/quux'], 1, 0711);>
-
-C<rmtree(['foo/bar/baz', 'blurfl/quux'], 1, 1);>
+ mkpath(['/foo/bar/baz', 'blurfl/quux'], 1, 0711);
+ rmtree(['foo/bar/baz', 'blurfl/quux'], 1, 1);
=head1 DESCRIPTION
@@ -127,13 +126,15 @@ sub mkpath {
my $parent = File::Basename::dirname($path);
# Allow for creation of new logical filesystems under VMS
if (not $Is_VMS or $parent !~ m:/[^/]+/000000/?:) {
- push(@created,mkpath($parent, $verbose, $mode)) unless (-d $parent);
+ unless (-d $parent or $path eq $parent) {
+ push(@created,mkpath($parent, $verbose, $mode));
+ }
}
print "mkdir $path\n" if $verbose;
unless (mkdir($path,$mode)) {
- my $e = $!;
- # allow for another process to have created it meanwhile
- croak "mkdir $path: $e" unless -d $path;
+ my $e = $!;
+ # allow for another process to have created it meanwhile
+ croak "mkdir $path: $e" unless -d $path;
}
push(@created, $path);
}
diff --git a/lib/File/Spec.pm b/lib/File/Spec.pm
index b71e357cdc..40f5345140 100644
--- a/lib/File/Spec.pm
+++ b/lib/File/Spec.pm
@@ -86,4 +86,7 @@ Kenneth Albanowski <F<kjahds@kjahds.com>>, Andy Dougherty
<F<A.Koenig@franz.ww.TU-Berlin.DE>>, Tim Bunce <F<Tim.Bunce@ig.co.uk>>. VMS
support by Charles Bailey <F<bailey@newman.upenn.edu>>. OS/2 support by
Ilya Zakharevich <F<ilya@math.ohio-state.edu>>. Mac support by Paul Schinder
-<F<schinder@pobox.com>>.
+<F<schinder@pobox.com>>. abs2rel() and rel2abs() written by
+Shigio Yamaguchi <F<shigio@tamacom.com>>, modified by Barrie Slaymaker
+<F<barries@slaysys.com>>. splitpath(), splitdir(), catpath() and catdir()
+by Barrie Slaymaker.
diff --git a/lib/File/Spec/Unix.pm b/lib/File/Spec/Unix.pm
index 87ad643fe2..85df2c2d3b 100644
--- a/lib/File/Spec/Unix.pm
+++ b/lib/File/Spec/Unix.pm
@@ -41,7 +41,7 @@ ricochet (some scripts depend on it).
sub canonpath {
my ($self,$path,$reduce_ricochet) = @_;
- $path =~ s|/+|/|g unless($^O =~ /cygwin/); # xx////xx -> xx/xx
+ $path =~ s|/+|/|g unless($^O eq 'cygwin'); # xx////xx -> xx/xx
$path =~ s|(/\.)+/|/|g; # xx/././xx -> xx/xx
$path =~ s|^(\./)+|| unless $path eq "./"; # ./xx -> xx
$path =~ s|^/(\.\./)+|/|; # /../../xx -> xx
diff --git a/lib/FindBin.pm b/lib/FindBin.pm
index 9e1c0a06bf..9d35f6f9c9 100644
--- a/lib/FindBin.pm
+++ b/lib/FindBin.pm
@@ -82,7 +82,7 @@ use File::Spec;
%EXPORT_TAGS = (ALL => [qw($Bin $Script $RealBin $RealScript $Dir $RealDir)]);
@ISA = qw(Exporter);
-$VERSION = $VERSION = "1.42";
+$VERSION = "1.42";
BEGIN
{
diff --git a/lib/Getopt/Std.pm b/lib/Getopt/Std.pm
index 390bf14e96..e027bad3d2 100644
--- a/lib/Getopt/Std.pm
+++ b/lib/Getopt/Std.pm
@@ -42,7 +42,7 @@ the argument or 1 if no argument is specified.
@ISA = qw(Exporter);
@EXPORT = qw(getopt getopts);
-$VERSION = $VERSION = '1.01';
+$VERSION = '1.01';
# Process single-character switches with switch clustering. Pass one argument
# which is a string containing all switches that take an argument. For each
diff --git a/lib/Math/BigFloat.pm b/lib/Math/BigFloat.pm
index 8aa6a6604b..1a9195e185 100644
--- a/lib/Math/BigFloat.pm
+++ b/lib/Math/BigFloat.pm
@@ -240,12 +240,13 @@ sub fcmp #(fnum_str, fnum_str) return cond_code
if ($x eq "NaN" || $y eq "NaN") {
undef;
} else {
+ local($xm,$xe,$ym,$ye) = split('E', $x."E$y");
+ if ($xm eq '+0' || $ym eq '+0') {
+ return $xm <=> $ym;
+ }
ord($y) <=> ord($x)
- ||
- ( local($xm,$xe,$ym,$ye) = split('E', $x."E$y"),
- (($xe <=> $ye) * (substr($x,$[,1).'1')
- || Math::BigInt::cmp($xm,$ym))
- );
+ || ($xe <=> $ye) * (substr($x,$[,1).'1')
+ || Math::BigInt::cmp($xm,$ym);
}
}
diff --git a/lib/Math/Complex.pm b/lib/Math/Complex.pm
index 5b69039afc..b339573bc9 100644
--- a/lib/Math/Complex.pm
+++ b/lib/Math/Complex.pm
@@ -1746,7 +1746,7 @@ Whatever it is, it does not manifest itself anywhere else where Perl runs.
=head1 AUTHORS
-Raphael Manfredi <F<Raphael_Manfredi@grenoble.hp.com>> and
+Raphael Manfredi <F<Raphael_Manfredi@pobox.com>> and
Jarkko Hietaniemi <F<jhi@iki.fi>>.
Extensive patches by Daniel S. Lewart <F<d-lewart@uiuc.edu>>.
diff --git a/lib/Math/Trig.pm b/lib/Math/Trig.pm
index d987b5cc76..c659137eba 100644
--- a/lib/Math/Trig.pm
+++ b/lib/Math/Trig.pm
@@ -435,7 +435,7 @@ an answer instead of giving a fatal runtime error.
=head1 AUTHORS
Jarkko Hietaniemi <F<jhi@iki.fi>> and
-Raphael Manfredi <F<Raphael_Manfredi@grenoble.hp.com>>.
+Raphael Manfredi <F<Raphael_Manfredi@pobox.com>>.
=cut
diff --git a/lib/Net/Ping.pm b/lib/Net/Ping.pm
index 495b82f95b..54540601d3 100644
--- a/lib/Net/Ping.pm
+++ b/lib/Net/Ping.pm
@@ -4,7 +4,7 @@ package Net::Ping;
#
# Authors of the original pingecho():
# karrer@bernina.ethz.ch (Andreas Karrer)
-# pmarquess@bfsec.bt.co.uk (Paul Marquess)
+# Paul.Marquess@btinternet.com (Paul Marquess)
#
# Copyright (c) 1996 Russell Mosemann. All rights reserved. This
# program is free software; you may redistribute it and/or modify it
diff --git a/lib/Pod/Checker.pm b/lib/Pod/Checker.pm
index 6607ad9375..aa5c5490ae 100644
--- a/lib/Pod/Checker.pm
+++ b/lib/Pod/Checker.pm
@@ -1,10 +1,7 @@
#############################################################################
# Pod/Checker.pm -- check pod documents for syntax errors
#
-# Based on Tom Christiansen's Pod::Text::pod2text() function
-# (with modifications).
-#
-# Copyright (C) 1994-1999 Tom Christiansen. All rights reserved.
+# Copyright (C) 1994-1999 by Bradford Appleton. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
@@ -13,7 +10,7 @@
package Pod::Checker;
use vars qw($VERSION);
-$VERSION = 1.081; ## Current version of this package
+$VERSION = 1.090; ## Current version of this package
require 5.004; ## requires this Perl version or later
=head1 NAME
@@ -24,7 +21,7 @@ Pod::Checker, podchecker() - check pod documents for syntax errors
use Pod::Checker;
- $syntax_okay = podchecker($filepath, $outputpath);
+ $syntax_okay = podchecker($filepath, $outputpath, %options);
=head1 OPTIONS/ARGUMENTS
@@ -34,6 +31,15 @@ indcating a file-path, or else a reference to an open filehandle.
If unspecified, the input-file it defaults to C<\*STDIN>, and
the output-file defaults to C<\*STDERR>.
+=head2 Options
+
+=over 4
+
+=item B<-warnings> =E<gt> I<val>
+
+Turn warnings on/off. See L<"Warnings">.
+
+=back
=head1 DESCRIPTION
@@ -46,13 +52,83 @@ unknown 'X<...>' interior-sequences, and unterminated interior sequences.
It is hoped that curious/ambitious user will help flesh out and add the
additional features they wish to see in B<Pod::Checker> and B<podchecker>.
+The following additional checks are preformed:
+
+=over 4
+
+=item *
+
+Check for proper balancing of C<=begin> and C<=end>.
+
+=item *
+
+Check for proper nesting and balancing of C<=over>, C<=item> and C<=back>.
+
+=item *
+
+Check for same nested interior-sequences (e.g. C<LE<lt>...LE<lt>...E<gt>...E<gt>>).
+
+=item *
+
+Check for malformed entities.
+
+=item *
+
+Check for correct syntax of hyperlinks C<LE<lt>E<gt>>. See L<perlpod> for
+details.
+
+=item *
+
+Check for unresolved document-internal links.
+
+=back
+
+=head2 Warnings
+
+The following warnings are printed. These may not necessarily cause trouble,
+but indicate mediocre style.
+
+=over 4
+
+=item *
+
+Spurious characters after C<=back> and C<=end>.
+
+=item *
+
+Unescaped C<E<lt>> and C<E<gt>> in the text.
+
+=item *
+
+Missing arguments for C<=begin> and C<=over>.
+
+=item *
+
+Empty C<=over> / C<=back> list.
+
+=item *
+
+Hyperlinks: leading/trailing whitespace, brackets C<()> in the page name.
+
+=back
+
+=head1 DIAGNOSTICS
+
+I<[T.B.D.]>
+
+=head1 RETURN VALUE
+
+B<podchecker> returns the number of POD syntax errors found or -1 if
+there were no POD commands at all found in the file.
+
=head1 EXAMPLES
I<[T.B.D.]>
=head1 AUTHOR
-Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version)
+Brad Appleton E<lt>bradapp@enteract.comE<gt> (initial version),
+Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>
Based on code for B<Pod::Text::pod2text()> written by
Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
@@ -104,8 +180,8 @@ my %VALID_SEQUENCES = (
## Function definitions begin here
##---------------------------------
-sub podchecker( $ ; $ ) {
- my ($infile, $outfile) = @_;
+sub podchecker( $ ; $ % ) {
+ my ($infile, $outfile, %options) = @_;
local $_;
## Set defaults
@@ -113,7 +189,7 @@ sub podchecker( $ ; $ ) {
$outfile ||= \*STDERR;
## Now create a pod checker
- my $checker = new Pod::Checker();
+ my $checker = new Pod::Checker(%options);
## Now check the pod document for errors
$checker->parse_from_file($infile, $outfile);
@@ -140,45 +216,335 @@ sub new {
sub initialize {
my $self = shift;
- $self->num_errors(0);
+ ## Initialize number of errors, and setup an error function to
+ ## increment this number and then print to the designated output.
+ $self->{_NUM_ERRORS} = 0;
+ $self->errorsub('poderror');
+ $self->{_commands} = 0; # total number of POD commands encountered
+ $self->{_list_stack} = []; # stack for nested lists
+ $self->{_have_begin} = ''; # stores =begin
+ $self->{_links} = []; # stack for internal hyperlinks
+ $self->{_nodes} = []; # stack for =head/=item nodes
+ $self->{-warnings} = 1 unless(defined $self->{-warnings});
+}
+
+## Invoked as $self->poderror( @args ), or $self->poderror( {%opts}, @args )
+sub poderror {
+ my $self = shift;
+ my %opts = (ref $_[0]) ? %{shift()} : ();
+
+ ## Retrieve options
+ chomp( my $msg = ($opts{-msg} || "")."@_" );
+ my $line = (exists $opts{-line}) ? " at line $opts{-line}" : "";
+ my $file = (exists $opts{-file}) ? " in file $opts{-file}" : "";
+ my $severity = (exists $opts{-severity}) ? "*** $opts{-severity}: " : "";
+
+ ## Increment error count and print message "
+ ++($self->{_NUM_ERRORS})
+ if(!%opts || ($opts{-severity} && $opts{-severity} eq 'ERROR'));
+ my $out_fh = $self->output_handle();
+ print $out_fh ($severity, $msg, $line, $file, "\n");
}
sub num_errors {
return (@_ > 1) ? ($_[0]->{_NUM_ERRORS} = $_[1]) : $_[0]->{_NUM_ERRORS};
}
+## overrides for Pod::Parser
+
sub end_pod {
- ## Print the number of errors found
+ ## Do some final checks and
+ ## print the number of errors found
my $self = shift;
my $infile = $self->input_file();
my $out_fh = $self->output_handle();
+ if(@{$self->{_list_stack}}) {
+ # _TODO_ display, but don't count them for now
+ my $list;
+ while($list = shift(@{$self->{_list_stack}})) {
+ $self->poderror({ -line => 'EOF', -file => $infile,
+ -severity => 'ERROR', -msg => "=over on line " .
+ $list->start() . " without closing =back" }); #"
+ }
+ }
+
+ # check validity of document internal hyperlinks
+ # first build the node names from the paragraph text
+ my %nodes;
+ foreach($self->node()) {
+ #print "Have node: +$_+\n";
+ $nodes{$_} = 1;
+ if(/^(\S+)\s+/) {
+ # we have more than one word. Use the first as a node, too.
+ # This is used heavily in perlfunc.pod
+ $nodes{$1} ||= 2; # derived node
+ }
+ }
+ foreach($self->hyperlink()) {
+ #print "Seek node: +$_+\n";
+ my $line = '';
+ s/^(\d+):// && ($line = $1);
+ if($_ && !$nodes{$_}) {
+ $self->poderror({ -line => $line, -file => $infile,
+ -severity => 'ERROR',
+ -msg => "unresolved internal link `$_'"});
+ }
+ }
+
+ ## Print the number of errors found
my $num_errors = $self->num_errors();
if ($num_errors > 0) {
printf $out_fh ("$infile has $num_errors pod syntax %s.\n",
($num_errors == 1) ? "error" : "errors");
}
+ elsif($self->{_commands} == 0) {
+ print $out_fh "$infile does not contain any pod commands.\n";
+ $self->num_errors(-1);
+ }
else {
print $out_fh "$infile pod syntax OK.\n";
}
}
sub command {
- my ($self, $command, $paragraph, $line_num, $pod_para) = @_;
+ my ($self, $cmd, $paragraph, $line_num, $pod_para) = @_;
my ($file, $line) = $pod_para->file_line;
- my $out_fh = $self->output_handle();
## Check the command syntax
- if (! $VALID_COMMANDS{$command}) {
- ++($self->{_NUM_ERRORS});
- _invalid_cmd($out_fh, $command, $paragraph, $file, $line);
+ my $arg; # this will hold the command argument
+ if (! $VALID_COMMANDS{$cmd}) {
+ $self->poderror({ -line => $line, -file => $file, -severity => 'ERROR',
+ -msg => "Unknown command \"$cmd\"" });
}
else {
- ## check syntax of particular command
+ $self->{_commands}++; # found a valid command
+ ## check syntax of particular command
+ if($cmd eq 'over') {
+ # start a new list
+ unshift(@{$self->{_list_stack}},
+ Pod::List->new(
+ -indent => $paragraph,
+ -start => $line,
+ -file => $file));
+ }
+ elsif($cmd eq 'item') {
+ unless(@{$self->{_list_stack}}) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'ERROR',
+ -msg => "=item without previous =over" });
+ }
+ else {
+ # check for argument
+ $arg = $self->_interpolate_and_check($paragraph, $line, $file);
+ unless($arg && $arg =~ /(\S+)/) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'WARNING',
+ -msg => "No argument for =item" });
+ }
+ # add this item
+ $self->{_list_stack}[0]->item($arg || '');
+ # remember this node
+ $self->node($arg) if($arg);
+ }
+ }
+ elsif($cmd eq 'back') {
+ # check if we have an open list
+ unless(@{$self->{_list_stack}}) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'ERROR',
+ -msg => "=back without previous =over" });
+ }
+ else {
+ # check for spurious characters
+ $arg = $self->_interpolate_and_check($paragraph, $line,$file);
+ if($arg && $arg =~ /\S/) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'WARNING',
+ -msg => "Spurious character(s) after =back" });
+ }
+ # close list
+ my $list = shift @{$self->{_list_stack}};
+ # check for empty lists
+ if(!$list->item() && $self->{-warnings}) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'WARNING',
+ -msg => "No items in =over (at line " .
+ $list->start() . ") / =back list"}); #"
+ }
+ }
+ }
+ elsif($cmd =~ /^head/) {
+ # check if there is an open list
+ if(@{$self->{_list_stack}}) {
+ my $list;
+ while($list = shift(@{$self->{_list_stack}})) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'ERROR',
+ -msg => "unclosed =over (line ". $list->start() .
+ ") at $cmd" });
+ }
+ }
+ # remember this node
+ $arg = $self->_interpolate_and_check($paragraph, $line,$file);
+ $self->node($arg) if($arg);
+ }
+ elsif($cmd eq 'begin') {
+ if($self->{_have_begin}) {
+ # already have a begin
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'ERROR',
+ -msg => "Nested =begin's (first at line " .
+ $self->{_have_begin} . ")"});
+ }
+ else {
+ # check for argument
+ $arg = $self->_interpolate_and_check($paragraph, $line,$file);
+ unless($arg && $arg =~ /(\S+)/) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'WARNING',
+ -msg => "No argument for =begin"});
+ }
+ # remember the =begin
+ $self->{_have_begin} = "$line:$1";
+ }
+ }
+ elsif($cmd eq 'end') {
+ if($self->{_have_begin}) {
+ # close the existing =begin
+ $self->{_have_begin} = '';
+ # check for spurious characters
+ $arg = $self->_interpolate_and_check($paragraph, $line,$file);
+ if($arg && $arg =~ /\S/) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'WARNING',
+ -msg => "Spurious character(s) after =end" });
+ }
+ }
+ else {
+ # don't have a matching =begin
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'WARNING',
+ -msg => "=end without =begin" });
+ }
+ }
}
## Check the interior sequences in the command-text
- my $expansion = $self->interpolate($paragraph, $line_num);
+ $self->_interpolate_and_check($paragraph, $line,$file)
+ unless(defined $arg);
+}
+
+sub _interpolate_and_check {
+ my ($self, $paragraph, $line, $file) = @_;
+ ## Check the interior sequences in the command-text
+ # and return the text
+ $self->_check_ptree(
+ $self->parse_text($paragraph,$line), $line, $file, '');
+}
+
+sub _check_ptree {
+ my ($self,$ptree,$line,$file,$nestlist) = @_;
+ local($_);
+ my $text = '';
+ # process each node in the parse tree
+ foreach(@$ptree) {
+ # regular text chunk
+ unless(ref) {
+ my $count;
+ # count the unescaped angle brackets
+ my $i = $_;
+ if($count = $i =~ s/[<>]/$self->expand_unescaped_bracket($&)/ge) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'WARNING',
+ -msg => "$count unescaped <>" });
+ }
+ $text .= $i;
+ next;
+ }
+ # have an interior sequence
+ my $cmd = $_->cmd_name();
+ my $contents = $_->parse_tree();
+ ($file,$line) = $_->file_line();
+ # check for valid tag
+ if (! $VALID_SEQUENCES{$cmd}) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'ERROR',
+ -msg => qq(Unknown interior-sequence "$cmd")});
+ # expand it anyway
+ $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
+ next;
+ }
+ if($nestlist =~ /$cmd/) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'ERROR',
+ -msg => "nested commands $cmd<...$cmd<...>...>"});
+ # _TODO_ should we add the contents anyway?
+ # expand it anyway, see below
+ }
+ if($cmd eq 'E') {
+ # preserve entities
+ if(@$contents > 1 || ref $$contents[0] || $$contents[0] !~ /^\w+$/) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'ERROR',
+ -msg => "garbled entity " . $_->raw_text()});
+ next;
+ }
+ $text .= $self->expand_entity($$contents[0]);
+ }
+ elsif($cmd eq 'L') {
+ # try to parse the hyperlink
+ my $link = Pod::Hyperlink->new($contents->raw_text());
+ unless(defined $link) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'ERROR',
+ -msg => "malformed link L<>: $@"});
+ next;
+ }
+ $link->line($line); # remember line
+ if($self->{-warnings}) {
+ foreach my $w ($link->warning()) {
+ $self->poderror({ -line => $line, -file => $file,
+ -severity => 'WARNING',
+ -msg => $w });
+ }
+ }
+ # check the link text
+ $text .= $self->_check_ptree($self->parse_text($link->text(),
+ $line), $line, $file, "$nestlist$cmd");
+ my $node = '';
+ $node = $self->_check_ptree($self->parse_text($link->node(),
+ $line), $line, $file, "$nestlist$cmd")
+ if($link->node());
+ # store internal link
+ # _TODO_ what if there is a link to the page itself by the name,
+ # e.g. Tk::Pod : L<Tk::Pod/"DESCRIPTION">
+ $self->hyperlink("$line:$node") if($node && !$link->page());
+ }
+ elsif($cmd =~ /[BCFIS]/) {
+ # add the guts
+ $text .= $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
+ }
+ else {
+ # check, but add nothing to $text (X<>, Z<>)
+ $self->_check_ptree($contents, $line, $file, "$nestlist$cmd");
+ }
+ }
+ $text;
+}
+
+# default method - just return it
+sub expand_unescaped_bracket {
+ my ($self,$bracket) = @_;
+ $bracket;
+}
+
+# keep the entities
+sub expand_entity {
+ my ($self,$entity) = @_;
+ "E<$entity>";
}
+# _TODO_ overloadable methods for BC..Z<...> expansion
+
sub verbatim {
## Nothing to check
## my ($self, $paragraph, $line_num, $pod_para) = @_;
@@ -186,39 +552,376 @@ sub verbatim {
sub textblock {
my ($self, $paragraph, $line_num, $pod_para) = @_;
- my $out_fh = $self->output_handle();
- ## Check the interior sequences in the text (set $SIG{__WARN__} to
- ## send parse_text warnings about untermnated sequences to $out_fh)
- local $SIG{__WARN__} = sub {
- ++($self->{_NUM_ERRORS});
- print $out_fh @_
- };
- my $expansion = $self->interpolate($paragraph, $line_num);
-}
-
-sub interior_sequence {
- my ($self, $seq_cmd, $seq_arg, $pod_seq) = @_;
- my ($file, $line) = $pod_seq->file_line;
- my $out_fh = $self->output_handle();
- ## Check the sequence syntax
- if (! $VALID_SEQUENCES{$seq_cmd}) {
- ++($self->{_NUM_ERRORS});
- _invalid_seq($out_fh, $seq_cmd, $seq_arg, $file, $line);
+ my ($file, $line) = $pod_para->file_line;
+ $self->_interpolate_and_check($paragraph, $line,$file);
+}
+
+# set/return nodes of the current POD
+sub node {
+ my ($self,$text) = @_;
+ if(defined $text) {
+ $text =~ s/[\s\n]+$//; # strip trailing whitespace
+ # add node
+ push(@{$self->{_nodes}}, $text);
+ return $text;
+ }
+ @{$self->{_nodes}};
+}
+
+# set/return hyperlinks of the current POD
+sub hyperlink {
+ my $self = shift;
+ if($_[0]) {
+ push(@{$self->{_links}}, $_[0]);
+ return $_[0];
+ }
+ @{$self->{_links}};
+}
+
+#-----------------------------------------------------------------------------
+# Pod::List
+#
+# class to hold POD list info (=over, =item, =back)
+#-----------------------------------------------------------------------------
+
+package Pod::List;
+
+use Carp;
+
+sub new {
+ my $this = shift;
+ my $class = ref($this) || $this;
+ my %params = @_;
+ my $self = {%params};
+ bless $self, $class;
+ $self->initialize();
+ return $self;
+}
+
+sub initialize {
+ my $self = shift;
+ $self->{-file} ||= 'unknown';
+ $self->{-start} ||= 'unknown';
+ $self->{-indent} ||= 4; # perlpod: "should be the default"
+ $self->{_items} = [];
+}
+
+# The POD file name the list appears in
+sub file {
+ return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
+}
+
+# The line in the file the node appears
+sub start {
+ return (@_ > 1) ? ($_[0]->{-start} = $_[1]) : $_[0]->{-start};
+}
+
+# indent level
+sub indent {
+ return (@_ > 1) ? ($_[0]->{-indent} = $_[1]) : $_[0]->{-indent};
+}
+
+# The individual =items of this list
+sub item {
+ my ($self,$item) = @_;
+ if(defined $item) {
+ push(@{$self->{_items}}, $item);
+ return $item;
}
else {
- ## check syntax of the particular sequence
+ return @{$self->{_items}};
}
}
-sub _invalid_cmd {
- my ($fh, $cmd, $text, $file, $line) = @_;
- print $fh "*** ERROR: Unknown command \"$cmd\""
- . " at line $line of file $file\n";
+#-----------------------------------------------------------------------------
+# Pod::Hyperlink
+#
+# class to hold hyperlinks (L<>)
+#-----------------------------------------------------------------------------
+
+package Pod::Hyperlink;
+
+=head1 NAME
+
+Pod::Hyperlink - class for manipulation of POD hyperlinks
+
+=head1 SYNOPSIS
+
+ my $link = Pod::Hyperlink->new('alternative text|page/"section in page"');
+
+=head1 DESCRIPTION
+
+The B<Pod::Hyperlink> class is mainly designed to parse the contents of the
+C<LE<lt>...E<gt>> sequence, providing a simple interface for accessing the
+different parts of a POD hyperlink.
+
+=head1 METHODS
+
+=over 4
+
+=item new()
+
+The B<new()> method can either be passed a set of key/value pairs or a single
+scalar value, namely the contents of a C<LE<lt>...E<gt>> sequence. An object
+of the class C<Pod::Hyperlink> is returned. The value C<undef> indicates a
+failure, the error message is stored in C<$@>.
+
+=item parse()
+
+This method can be used to (re)parse a (new) hyperlink. The result is stored
+in the current object.
+
+=item markup($on,$off,$pageon,$pageoff)
+
+The result of this method is a string the represents the textual value of the
+link, but with included arbitrary markers that highlight the active portion
+of the link. This will mainly be used by POD translators and saves the
+effort of determining which words have to be highlighted. Examples: Depending
+on the type of link, the following text will be returned, the C<*> represent
+the places where the section/item specific on/off markers will be placed
+(link to a specific node) and C<+> for the pageon/pageoff markers (link to the
+top of the page).
+
+ the +perl+ manpage
+ the *$|* entry in the +perlvar+ manpage
+ the section on *OPTIONS* in the +perldoc+ manpage
+ the section on *DESCRIPTION* elsewhere in this document
+
+This method is read-only.
+
+=item text()
+
+This method returns the textual representation of the hyperlink as above,
+but without markers (read only).
+
+=item warning()
+
+After parsing, this method returns any warnings ecountered during the
+parsing process.
+
+=item page()
+
+This method sets or returns the POD page this link points to.
+
+=item node()
+
+As above, but the destination node text of the link.
+
+=item type()
+
+The node type, either C<section> or C<item>.
+
+=item alttext()
+
+Sets or returns an alternative text specified in the link.
+
+=item line(), file()
+
+Just simple slots for storing information about the line and the file
+the link was incountered in. Has to be filled in manually.
+
+=back
+
+=head1 AUTHOR
+
+Marek Rouchal E<lt>marek@saftsack.fs.uni-bayreuth.deE<gt>, borrowing
+a lot of things from L<pod2man> and L<pod2roff>.
+
+=cut
+
+use Carp;
+
+sub new {
+ my $this = shift;
+ my $class = ref($this) || $this;
+ my $self = +{};
+ bless $self, $class;
+ $self->initialize();
+ if(defined $_[0]) {
+ if(ref($_[0])) {
+ # called with a list of parameters
+ %$self = %{$_[0]};
+ }
+ else {
+ # called with L<> contents
+ return undef unless($self->parse($_[0]));
+ }
+ }
+ return $self;
+}
+
+sub initialize {
+ my $self = shift;
+ $self->{-line} ||= 'undef';
+ $self->{-file} ||= 'undef';
+ $self->{-page} ||= '';
+ $self->{-node} ||= '';
+ $self->{-alttext} ||= '';
+ $self->{-type} ||= 'undef';
+ $self->{_warnings} = [];
+ $self->_construct_text();
+}
+
+sub parse {
+ my $self = shift;
+ local($_) = $_[0];
+ # syntax check the link and extract destination
+ my ($alttext,$page,$section,$item) = ('','','','');
+
+ # strip leading/trailing whitespace
+ if(s/^[\s\n]+//) {
+ $self->warning("ignoring leading whitespace in link");
+ }
+ if(s/[\s\n]+$//) {
+ $self->warning("ignoring trailing whitespace in link");
+ }
+
+ # collapse newlines with whitespace
+ s/\s*\n\s*/ /g;
+
+ # extract alternative text
+ if(s!^([^|/"\n]*)[|]!!) {
+ $alttext = $1;
+ }
+ # extract page
+ if(s!^([^|/"\s]*)(?=/|$)!!) {
+ $page = $1;
+ }
+ # extract section
+ if(s!^/?"([^"\n]+)"$!!) { # e.g. L</"blah blah">
+ $section = $1;
+ }
+ # extact item
+ if(s!^/(.*)$!!) {
+ $item = $1;
+ }
+ # last chance here
+ if(s!^([^|"\s\n/][^"\n/]*)$!!) { # e.g. L<lah di dah>
+ $section = $1;
+ }
+ # now there should be nothing left
+ if(length) {
+ _invalid_link("garbled entry (spurious characters `$_')");
+ return undef;
+ }
+ elsif(!(length($page) || length($section) || length($item))) {
+ _invalid_link("empty link");
+ return undef;
+ }
+ elsif($alttext =~ /[<>]/) {
+ _invalid_link("alternative text contains < or >");
+ return undef;
+ }
+ else { # no errors so far
+ if($page =~ /[(]\d\w*[)]$/) {
+ $self->warning("brackets in `$page'");
+ $page = $`; # strip that extension
+ }
+ if($page =~ /^(\s*)(\S+)(\s*)/ && (length($1) || length($3))) {
+ $self->warning("whitespace in `$page'");
+ $page = $2; # strip that extension
+ }
+ }
+ $self->page($page);
+ $self->node($section || $item); # _TODO_ do not distinguish for now
+ $self->alttext($alttext);
+ $self->type($item ? 'item' : 'section');
+ 1;
+}
+
+sub _construct_text {
+ my $self = shift;
+ my $alttext = $self->alttext();
+ my $type = $self->type();
+ my $section = $self->node();
+ my $page = $self->page();
+ $self->{_text} =
+ $alttext ? $alttext : (
+ !$section ? '' :
+ $type eq 'item' ? 'the ' . $section . ' entry' :
+ 'the section on ' . $section ) .
+ ($page ? ($section ? ' in ':''). 'the ' . $page . ' manpage' :
+ 'elsewhere in this document');
+ # for being marked up later
+ $self->{_markup} =
+ $alttext ? '<SECTON>' . $alttext . '<SECTOFF>' : (
+ !$section ? '' :
+ $type eq 'item' ? 'the <SECTON>' . $section . '<SECTOFF> entry' :
+ 'the section on <SECTON>' . $section . '<SECTOFF>' ) .
+ ($page ? ($section ? ' in ':'') . 'the <PAGEON>' .
+ $page . '<PAGEOFF> manpage' :
+ ' elsewhere in this document');
+}
+
+# include markup
+sub markup {
+ my ($self,$on,$off,$pageon,$pageoff) = @_;
+ $on ||= '';
+ $off ||= '';
+ $pageon ||= '';
+ $pageoff ||= '';
+ $_[0]->_construct_text;
+ my $str = $self->{_markup};
+ $str =~ s/<SECTON>/$on/;
+ $str =~ s/<SECTOFF>/$off/;
+ $str =~ s/<PAGEON>/$pageon/;
+ $str =~ s/<PAGEOFF>/$pageoff/;
+ return $str;
+}
+
+# The complete link's text
+sub text {
+ $_[0]->_construct_text();
+ $_[0]->{_text};
+}
+
+# The POD page the link appears on
+sub warning {
+ my $self = shift;
+ if(@_) {
+ push(@{$self->{_warnings}}, @_);
+ return @_;
+ }
+ return @{$self->{_warnings}};
+}
+
+# The POD file name the link appears in
+sub file {
+ return (@_ > 1) ? ($_[0]->{-file} = $_[1]) : $_[0]->{-file};
+}
+
+# The line in the file the link appears
+sub line {
+ return (@_ > 1) ? ($_[0]->{-line} = $_[1]) : $_[0]->{-line};
+}
+
+# The POD page the link appears on
+sub page {
+ return (@_ > 1) ? ($_[0]->{-page} = $_[1]) : $_[0]->{-page};
+}
+
+# The link destination
+sub node {
+ return (@_ > 1) ? ($_[0]->{-node} = $_[1]) : $_[0]->{-node};
+}
+
+# Potential alternative text
+sub alttext {
+ return (@_ > 1) ? ($_[0]->{-alttext} = $_[1]) : $_[0]->{-alttext};
+}
+
+# The type
+sub type {
+ return (@_ > 1) ? ($_[0]->{-type} = $_[1]) : $_[0]->{-type};
}
-sub _invalid_seq {
- my ($fh, $cmd, $text, $file, $line) = @_;
- print $fh "*** ERROR: Unknown interior-sequence \"$cmd\""
- . " at line $line of file $file\n";
+sub _invalid_link {
+ my ($msg) = @_;
+ # this sets @_
+ #eval { die "$msg\n" };
+ #chomp $@;
+ $@ = $msg; # this seems to work, too!
+ undef;
}
+1;
diff --git a/lib/Pod/Html.pm b/lib/Pod/Html.pm
index e9c640cf5d..15757ec80d 100644
--- a/lib/Pod/Html.pm
+++ b/lib/Pod/Html.pm
@@ -1487,7 +1487,7 @@ sub process_L {
if (m,^(.*?)/"?(.*?)"?$,) { # yes
($page, $section) = ($1, $2);
} else { # no
- ($page, $section) = ($str, "");
+ ($page, $section) = ($_, "");
}
# check if we know that this is a section in this page
diff --git a/lib/Pod/InputObjects.pm b/lib/Pod/InputObjects.pm
index 007fd74ebc..1432895e91 100644
--- a/lib/Pod/InputObjects.pm
+++ b/lib/Pod/InputObjects.pm
@@ -2,7 +2,7 @@
# Pod/InputObjects.pm -- package which defines objects for input streams
# and paragraphs and commands when parsing POD docs.
#
-# Copyright (C) 1996-1999 Tom Christiansen. All rights reserved.
+# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
@@ -11,7 +11,7 @@
package Pod::InputObjects;
use vars qw($VERSION);
-$VERSION = 1.081; ## Current version of this package
+$VERSION = 1.090; ## Current version of this package
require 5.004; ## requires this Perl version or later
#############################################################################
@@ -434,6 +434,9 @@ It has the following methods/attributes:
-file => $filename,
-line => $line_number);
+ my $pod_seq4 = new Pod::InteriorSequence(-name => $cmd, $ptree);
+ my $pod_seq5 = new Pod::InteriorSequence($cmd, $ptree);
+
This is a class method that constructs a C<Pod::InteriorSequence> object
and returns a reference to the new interior sequence object. It should
be given two keyword arguments. The C<-ldelim> keyword indicates the
@@ -441,7 +444,10 @@ corresponding left-delimiter of the interior sequence (e.g. 'E<lt>').
The C<-name> keyword indicates the name of the corresponding interior
sequence command, such as C<I> or C<B> or C<C>. The C<-file> and
C<-line> keywords indicate the filename and line number corresponding
-to the beginning of the interior sequence.
+to the beginning of the interior sequence. If the C<$ptree> argument is
+given, it must be the last argument, and it must be either string, or
+else an array-ref suitable for passing to B<Pod::ParseTree::new> (or
+it may be a reference to an Pod::ParseTree object).
=cut
@@ -450,6 +456,18 @@ sub new {
my $this = shift;
my $class = ref($this) || $this;
+ ## See if first argument has no keyword
+ if (((@_ <= 2) or (@_ % 2)) and $_[0] !~ /^-\w/) {
+ ## Yup - need an implicit '-name' before first parameter
+ unshift @_, '-name';
+ }
+
+ ## See if odd number of args
+ if ((@_ % 2) != 0) {
+ ## Yup - need an implicit '-ptree' before the last parameter
+ splice @_, $#_, 0, '-ptree';
+ }
+
## Any remaining arguments are treated as initial values for the
## hash that is used to represent this object. Note that we default
## certain values by specifying them *before* the arguments passed.
@@ -460,10 +478,18 @@ sub new {
-line => 0,
-ldelim => '<',
-rdelim => '>',
- -ptree => new Pod::ParseTree(),
@_
};
+ ## Initialize contents if they havent been already
+ my $ptree = $self->{'-ptree'} || new Pod::ParseTree();
+ if ( ref $ptree =~ /^(ARRAY)?$/ ) {
+ ## We have an array-ref, or a normal scalar. Pass it as an
+ ## an argument to the ptree-constructor
+ $ptree = new Pod::ParseTree($1 ? [$ptree] : $ptree);
+ }
+ $self->{'-ptree'} = $ptree;
+
## Bless ourselves into the desired class and perform any initialization
bless $self, $class;
return $self;
@@ -496,7 +522,7 @@ sub _set_child2parent_links {
my ($self, @children) = @_;
## Make sure any sequences know who their parent is
for (@children) {
- next unless ref;
+ next unless (ref || ref eq 'SCALAR');
if ($_->isa('Pod::InteriorSequence') or $_->can('nested')) {
$_->nested($self);
}
@@ -510,8 +536,8 @@ sub _unset_child2parent_links {
$self->{'-parent_sequence'} = undef;
my $ptree = $self->{'-ptree'};
for (@$ptree) {
- next unless (length and ref and $_->isa('Pod::InteriorSequence'));
- $_->_unset_child2parent_links();
+ next unless (length and ref and ref ne 'SCALAR');
+ $_->_unset_child2parent_links() if $_->isa('Pod::InteriorSequence');
}
}
@@ -718,7 +744,7 @@ itself contain a parse-tree (since interior sequences may be nested).
This is a class method that constructs a C<Pod::Parse_tree> object and
returns a reference to the new parse-tree. If a single-argument is given,
-it mist be a reference to an array, and is used to initialize the root
+it must be a reference to an array, and is used to initialize the root
(top) of the parse tree.
=cut
@@ -863,8 +889,8 @@ sub _unset_child2parent_links {
my $self = shift;
local *ptree = $self;
for (@ptree) {
- next unless (length and ref and $_->isa('Pod::InteriorSequence'));
- $_->_unset_child2parent_links();
+ next unless (length and ref and ref ne 'SCALAR');
+ $_->_unset_child2parent_links() if $_->isa('Pod::InteriorSequence');
}
}
diff --git a/lib/Pod/Man.pm b/lib/Pod/Man.pm
new file mode 100644
index 0000000000..9aadd42dea
--- /dev/null
+++ b/lib/Pod/Man.pm
@@ -0,0 +1,1194 @@
+# Pod::Man -- Convert POD data to formatted *roff input.
+# $Id: Man.pm,v 0.8 1999/10/07 09:39:37 eagle Exp $
+#
+# Copyright 1999 by Russ Allbery <rra@stanford.edu>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the same terms as Perl itself.
+#
+# This module is intended to be a replacement for pod2man, and attempts to
+# match its output except for some specific circumstances where other
+# decisions seemed to produce better output. It uses Pod::Parser and is
+# designed to be very easy to subclass.
+
+############################################################################
+# Modules and declarations
+############################################################################
+
+package Pod::Man;
+
+require 5.004;
+
+use Carp qw(carp croak);
+use Pod::Parser ();
+
+use strict;
+use subs qw(makespace);
+use vars qw(@ISA %ESCAPES $PREAMBLE $VERSION);
+
+@ISA = qw(Pod::Parser);
+
+($VERSION = (split (' ', q$Revision: 0.8 $ ))[1]) =~ s/\.(\d)$/.0$1/;
+
+
+############################################################################
+# Preamble and *roff output tables
+############################################################################
+
+# The following is the static preamble which starts all *roff output we
+# generate. It's completely static except for the font to use as a
+# fixed-width font, which is designed by @CFONT@. $PREAMBLE should
+# therefore be run through s/\@CFONT\@/<font>/g before output.
+$PREAMBLE = <<'----END OF PREAMBLE----';
+.de Sh \" Subsection heading
+.br
+.if t .Sp
+.ne 5
+.PP
+\fB\\$1\fR
+.PP
+..
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Ip \" List item
+.br
+.ie \\n(.$>=3 .ne \\$3
+.el .ne 3
+.IP "\\$1" \\$2
+..
+.de Vb \" Begin verbatim text
+.ft @CFONT@
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+
+.fi
+..
+.\" Set up some character translations and predefined strings. \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote. | will give a
+.\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used
+.\" to do unbreakable dashes and therefore won't be available. \*(C` and
+.\" \*(C' expand to `' in nroff, nothing in troff, for use with C<>
+.tr \(*W-|\(bv\*(Tr
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+. ds -- \(*W-
+. ds PI pi
+. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
+. ds L" ""
+. ds R" ""
+. ds C` `
+. ds C' '
+'br\}
+.el\{\
+. ds -- \|\(em\|
+. ds PI \(*p
+. ds L" ``
+. ds R" ''
+'br\}
+.\"
+.\" If the F register is turned on, we'll generate index entries on stderr
+.\" for titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and
+.\" index entries marked with X<> in POD. Of course, you'll have to process
+.\" the output yourself in some meaningful fashion.
+.if \nF \{\
+. de IX
+. tm Index:\\$1\t\\n%\t"\\$2"
+. .
+. nr % 0
+. rr F
+.\}
+.\"
+.\" For nroff, turn off justification. Always turn off hyphenation; it
+.\" makes way too many mistakes in technical documents.
+.hy 0
+.if n .na
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear. Run. Save yourself. No user-serviceable parts.
+.bd B 3
+. \" fudge factors for nroff and troff
+.if n \{\
+. ds #H 0
+. ds #V .8m
+. ds #F .3m
+. ds #[ \f1
+. ds #] \fP
+.\}
+.if t \{\
+. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+. ds #V .6m
+. ds #F 0
+. ds #[ \&
+. ds #] \&
+.\}
+. \" simple accents for nroff and troff
+.if n \{\
+. ds ' \&
+. ds ` \&
+. ds ^ \&
+. ds , \&
+. ds ~ ~
+. ds /
+.\}
+.if t \{\
+. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+. \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+. \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+. \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds : e
+. ds 8 ss
+. ds o a
+. ds d- d\h'-1'\(ga
+. ds D- D\h'-1'\(hy
+. ds th \o'bp'
+. ds Th \o'LP'
+. ds ae ae
+. ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+----END OF PREAMBLE----
+
+# This table is taken nearly verbatim from Tom Christiansen's pod2man. It
+# assumes that the standard preamble has already been printed, since that's
+# what defines all of the accent marks. Note that some of these are quoted
+# with double quotes since they contain embedded single quotes, so use \\
+# uniformly for backslash for readability.
+%ESCAPES = (
+ 'amp' => '&', # ampersand
+ 'lt' => '<', # left chevron, less-than
+ 'gt' => '>', # right chevron, greater-than
+ 'quot' => '"', # double quote
+
+ 'Aacute' => "A\\*'", # capital A, acute accent
+ 'aacute' => "a\\*'", # small a, acute accent
+ 'Acirc' => 'A\\*^', # capital A, circumflex accent
+ 'acirc' => 'a\\*^', # small a, circumflex accent
+ 'AElig' => '\*(AE', # capital AE diphthong (ligature)
+ 'aelig' => '\*(ae', # small ae diphthong (ligature)
+ 'Agrave' => "A\\*`", # capital A, grave accent
+ 'agrave' => "A\\*`", # small a, grave accent
+ 'Aring' => 'A\\*o', # capital A, ring
+ 'aring' => 'a\\*o', # small a, ring
+ 'Atilde' => 'A\\*~', # capital A, tilde
+ 'atilde' => 'a\\*~', # small a, tilde
+ 'Auml' => 'A\\*:', # capital A, dieresis or umlaut mark
+ 'auml' => 'a\\*:', # small a, dieresis or umlaut mark
+ 'Ccedil' => 'C\\*,', # capital C, cedilla
+ 'ccedil' => 'c\\*,', # small c, cedilla
+ 'Eacute' => "E\\*'", # capital E, acute accent
+ 'eacute' => "e\\*'", # small e, acute accent
+ 'Ecirc' => 'E\\*^', # capital E, circumflex accent
+ 'ecirc' => 'e\\*^', # small e, circumflex accent
+ 'Egrave' => 'E\\*`', # capital E, grave accent
+ 'egrave' => 'e\\*`', # small e, grave accent
+ 'ETH' => '\\*(D-', # capital Eth, Icelandic
+ 'eth' => '\\*(d-', # small eth, Icelandic
+ 'Euml' => 'E\\*:', # capital E, dieresis or umlaut mark
+ 'euml' => 'e\\*:', # small e, dieresis or umlaut mark
+ 'Iacute' => "I\\*'", # capital I, acute accent
+ 'iacute' => "i\\*'", # small i, acute accent
+ 'Icirc' => 'I\\*^', # capital I, circumflex accent
+ 'icirc' => 'i\\*^', # small i, circumflex accent
+ 'Igrave' => 'I\\*`', # capital I, grave accent
+ 'igrave' => 'i\\*`', # small i, grave accent
+ 'Iuml' => 'I\\*:', # capital I, dieresis or umlaut mark
+ 'iuml' => 'i\\*:', # small i, dieresis or umlaut mark
+ 'Ntilde' => 'N\*~', # capital N, tilde
+ 'ntilde' => 'n\*~', # small n, tilde
+ 'Oacute' => "O\\*'", # capital O, acute accent
+ 'oacute' => "o\\*'", # small o, acute accent
+ 'Ocirc' => 'O\\*^', # capital O, circumflex accent
+ 'ocirc' => 'o\\*^', # small o, circumflex accent
+ 'Ograve' => 'O\\*`', # capital O, grave accent
+ 'ograve' => 'o\\*`', # small o, grave accent
+ 'Oslash' => 'O\\*/', # capital O, slash
+ 'oslash' => 'o\\*/', # small o, slash
+ 'Otilde' => 'O\\*~', # capital O, tilde
+ 'otilde' => 'o\\*~', # small o, tilde
+ 'Ouml' => 'O\\*:', # capital O, dieresis or umlaut mark
+ 'ouml' => 'o\\*:', # small o, dieresis or umlaut mark
+ 'szlig' => '\*8', # small sharp s, German (sz ligature)
+ 'THORN' => '\\*(Th', # capital THORN, Icelandic
+ 'thorn' => '\\*(th', # small thorn, Icelandic
+ 'Uacute' => "U\\*'", # capital U, acute accent
+ 'uacute' => "u\\*'", # small u, acute accent
+ 'Ucirc' => 'U\\*^', # capital U, circumflex accent
+ 'ucirc' => 'u\\*^', # small u, circumflex accent
+ 'Ugrave' => 'U\\*`', # capital U, grave accent
+ 'ugrave' => 'u\\*`', # small u, grave accent
+ 'Uuml' => 'U\\*:', # capital U, dieresis or umlaut mark
+ 'uuml' => 'u\\*:', # small u, dieresis or umlaut mark
+ 'Yacute' => "Y\\*'", # capital Y, acute accent
+ 'yacute' => "y\\*'", # small y, acute accent
+ 'yuml' => 'y\\*:', # small y, dieresis or umlaut mark
+);
+
+
+############################################################################
+# Static helper functions
+############################################################################
+
+# Protect leading quotes and periods against interpretation as commands.
+sub protect { local $_ = shift; s/^([.\'])/\\&$1/mg; $_ }
+
+# Given a command and a single argument that may or may not contain double
+# quotes, handle double-quote formatting for it. If there are no double
+# quotes, just return the command followed by the argument in double quotes.
+# If there are double quotes, use an if statement to test for nroff, and for
+# nroff output the command followed by the argument in double quotes with
+# embedded double quotes doubled. For other formatters, remap paired double
+# quotes to `` and ''.
+sub switchquotes {
+ my $command = shift;
+ local $_ = shift;
+ my $extra = shift;
+ s/\\\*\([LR]\"/\"/g;
+ if (/\"/) {
+ s/\"/\"\"/g;
+ my $troff = $_;
+ $troff =~ s/\"\"([^\"]*)\"\"/\`\`$1\'\'/g;
+ s/\"/\"\"/g if $extra;
+ $troff =~ s/\"/\"\"/g if $extra;
+ $_ = qq("$_") . ($extra ? " $extra" : '');
+ $troff = qq("$troff") . ($extra ? " $extra" : '');
+ return ".if n $command $_\n.el $command $troff\n";
+ } else {
+ $_ = qq("$_") . ($extra ? " $extra" : '');
+ return "$command $_\n";
+ }
+}
+
+# Translate a font string into an escape.
+sub toescape { (length ($_[0]) > 1 ? '\f(' : '\f') . $_[0] }
+
+
+############################################################################
+# Initialization
+############################################################################
+
+# Initialize the object. Here, we also process any additional options
+# passed to the constructor or set up defaults if none were given. center
+# is the centered title, release is the version number, and date is the date
+# for the documentation. Note that we can't know what file name we're
+# processing due to the architecture of Pod::Parser, so that *has* to either
+# be passed to the constructor or set separately with Pod::Man::name().
+sub initialize {
+ my $self = shift;
+
+ # Figure out the fixed-width font. If user-supplied, make sure that
+ # they are the right length.
+ for (qw/fixed fixedbold fixeditalic fixedbolditalic/) {
+ if (defined $$self{$_}) {
+ if (length ($$self{$_}) < 1 || length ($$self{$_}) > 2) {
+ croak "roff font should be 1 or 2 chars, not `$$self{$_}'";
+ }
+ } else {
+ $$self{$_} = '';
+ }
+ }
+
+ # Set the default fonts. We can't be sure what fixed bold-italic is
+ # going to be called, so default to just bold.
+ $$self{fixed} ||= 'CW';
+ $$self{fixedbold} ||= 'CB';
+ $$self{fixeditalic} ||= 'CI';
+ $$self{fixedbolditalic} ||= 'CB';
+
+ # Set up a table of font escapes. First number is fixed-width, second
+ # is bold, third is italic.
+ $$self{FONTS} = { '000' => '\fR', '001' => '\fI',
+ '010' => '\fB', '011' => '\f(BI',
+ '100' => toescape ($$self{fixed}),
+ '101' => toescape ($$self{fixeditalic}),
+ '110' => toescape ($$self{fixedbold}),
+ '111' => toescape ($$self{fixedbolditalic})};
+
+ # Extra stuff for page titles.
+ $$self{center} = 'User Contributed Perl Documentation'
+ unless defined $$self{center};
+ $$self{indent} = 4 unless defined $$self{indent};
+
+ # We used to try first to get the version number from a local binary,
+ # but we shouldn't need that any more. Get the version from the running
+ # Perl.
+ if (!defined $$self{release}) {
+ my ($rev, $ver, $sver) = ($] =~ /^(\d+)\.(\d{3})(\d{0,3})$/);
+ $sver ||= 0; $sver *= 10 ** (3-length($sver));
+ $rev += 0; $ver += 0; $sver += 0;
+ $$self{release} = "perl v$rev.$ver.$sver";
+ }
+
+ # Double quotes in things that will be quoted.
+ for (qw/center date release/) { $$self{$_} =~ s/\"/\"\"/g }
+
+ $$self{INDENT} = 0; # Current indentation level.
+ $$self{INDENTS} = []; # Stack of indentations.
+ $$self{INDEX} = []; # Index keys waiting to be printed.
+
+ $self->SUPER::initialize;
+}
+
+# For each document we process, output the preamble first. Note that the
+# fixed width font is a global default; once we interpolate it into the
+# PREAMBLE, it ain't ever changing. Maybe fix this later.
+sub begin_pod {
+ my $self = shift;
+
+ # Try to figure out the name and section from the file name.
+ my $section = $$self{section} || 1;
+ my $name = $$self{name};
+ if (!defined $name) {
+ $name = $self->input_file;
+ $section = 3 if (!$$self{section} && $name =~ /\.pm$/i);
+ $name =~ s/\.p(od|[lm])$//i;
+ if ($section =~ /^1/) {
+ require File::Basename;
+ $name = uc File::Basename::basename ($name);
+ } else {
+ # Lose everything up to the first of
+ # */lib/*perl* standard or site_perl module
+ # */*perl*/lib from -D prefix=/opt/perl
+ # */*perl*/ random module hierarchy
+ # which works. Should be fixed to use File::Spec.
+ for ($name) {
+ s%//+%/%g;
+ if ( s%^.*?/lib/[^/]*perl[^/]*/%%i
+ or s%^.*?/[^/]*perl[^/]*/(?:lib/)?%%i) {
+ s%^site(_perl)?/%%; # site and site_perl
+ s%^(.*-$^O|$^O-.*)/%%o; # arch
+ s%^\d+\.\d+%%; # version
+ }
+ s%/%::%g;
+ }
+ }
+ }
+
+ # Modification date header. Try to use the modification time of our
+ # input.
+ if (!defined $$self{date}) {
+ my $time = (stat $self->input_file)[9] || time;
+ my ($day, $month, $year) = (localtime $time)[3,4,5];
+ $month++;
+ $year += 1900;
+ $$self{date} = join ('-', $year, $month, $day);
+ }
+
+ # Now, print out the preamble and the title.
+ $PREAMBLE =~ s/\@CFONT\@/$$self{fixed}/;
+ chomp $PREAMBLE;
+ print { $self->output_handle } <<"----END OF HEADER----";
+.\\" Automatically generated by Pod::Man version $VERSION
+.\\" @{[ scalar localtime ]}
+.\\"
+.\\" Standard preamble:
+.\\" ======================================================================
+$PREAMBLE
+.\\" ======================================================================
+.\\"
+.IX Title "$name $section"
+.TH $name $section "$$self{release}" "$$self{date}" "$$self{center}"
+.UC
+----END OF HEADER----
+#"# for cperl-mode
+
+ # Initialize a few per-file variables.
+ $$self{INDENT} = 0;
+ $$self{NEEDSPACE} = 0;
+}
+
+
+############################################################################
+# Core overrides
+############################################################################
+
+# Called for each command paragraph. Gets the command, the associated
+# paragraph, the line number, and a Pod::Paragraph object. Just dispatches
+# the command to a method named the same as the command. =cut is handled
+# internally by Pod::Parser.
+sub command {
+ my $self = shift;
+ my $command = shift;
+ return if $command eq 'pod';
+ return if ($$self{EXCLUDE} && $command ne 'end');
+ $command = 'cmd_' . $command;
+ $self->$command (@_);
+}
+
+# Called for a verbatim paragraph. Gets the paragraph, the line number, and
+# a Pod::Paragraph object. Rofficate backslashes, untabify, put a
+# zero-width character at the beginning of each line to protect against
+# commands, and wrap in .Vb/.Ve.
+sub verbatim {
+ my $self = shift;
+ return if $$self{EXCLUDE};
+ local $_ = shift;
+ return if /^\s+$/;
+ s/\s+$/\n/;
+ my $lines = tr/\n/\n/;
+ 1 while s/^(.*?)(\t+)/$1 . ' ' x (length ($2) * 8 - length ($1) % 8)/me;
+ s/\\/\\e/g;
+ s/^(\s*\S)/'\&' . $1/gme;
+ $self->makespace if $$self{NEEDSPACE};
+ $self->output (".Vb $lines\n$_.Ve\n");
+ $$self{NEEDSPACE} = 0;
+}
+
+# Called for a regular text block. Gets the paragraph, the line number, and
+# a Pod::Paragraph object. Perform interpolation and output the results.
+sub textblock {
+ my $self = shift;
+ return if $$self{EXCLUDE};
+ $self->output ($_[0]), return if $$self{VERBATIM};
+
+ # Perform a little magic to collapse multiple L<> references. We'll
+ # just rewrite the whole thing into actual text at this part, bypassing
+ # the whole internal sequence parsing thing.
+ s{
+ (L< # A link of the form L</something>.
+ /
+ (
+ [:\w]+ # The item has to be a simple word...
+ (\(\))? # ...or simple function.
+ )
+ >
+ (
+ ,?\s+(and\s+)? # Allow lots of them, conjuncted.
+ L<
+ /
+ ( [:\w]+ ( \(\) )? )
+ >
+ )+
+ )
+ } {
+ local $_ = $1;
+ s{ L< / ([^>]+ ) } {$1}g;
+ my @items = split /(?:,?\s+(?:and\s+)?)/;
+ my $string = "the ";
+ my $i;
+ for ($i = 0; $i < @items; $i++) {
+ $string .= $items[$i];
+ $string .= ", " if @items > 2 && $i != $#items;
+ $string .= " and " if ($i == $#items - 1);
+ }
+ $string .= " entries elsewhere in this document";
+ $string;
+ }gex;
+
+ # Parse the tree and output it. collapse knows about references to
+ # scalars as well as scalars and does the right thing with them.
+ local $_ = $self->parse (@_);
+ s/\n\s*$/\n/;
+ $self->makespace if $$self{NEEDSPACE};
+ $self->output (protect $self->mapfonts ($_));
+ $self->outindex;
+ $$self{NEEDSPACE} = 1;
+}
+
+# Called for an interior sequence. Takes a Pod::InteriorSequence object and
+# returns a reference to a scalar. This scalar is the final formatted text.
+# It's returned as a reference so that other interior sequences above us
+# know that the text has already been processed.
+sub sequence {
+ my ($self, $seq) = @_;
+ my $command = $seq->cmd_name;
+
+ # Zero-width characters.
+ if ($command eq 'Z') {
+ my $v = '\&'; return bless \ $v, 'Pod::Man::String';
+ }
+
+ # C<>, L<>, X<>, and E<> don't apply guesswork to their contents.
+ local $_ = $self->collapse ($seq->parse_tree, $command =~ /^[CELX]$/);
+
+ # Handle E<> escapes.
+ if ($command eq 'E') {
+ if (/^\d+$/) {
+ return bless \ chr ($_), 'Pod::Man::String';
+ } elsif (exists $ESCAPES{$_}) {
+ return bless \ "$ESCAPES{$_}", 'Pod::Man::String';
+ } else {
+ carp "Unknown escape E<$1>";
+ return bless \ "E<$_>", 'Pod::Man::String';
+ }
+ }
+
+ # For all the other sequences, empty content produces no output.
+ return '' if $_ eq '';
+
+ # Handle formatting sequences.
+ if ($command eq 'B') {
+ return bless \ ('\f(BS' . $_ . '\f(BE'), 'Pod::Man::String';
+ } elsif ($command eq 'F') {
+ return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
+ } elsif ($command eq 'I') {
+ return bless \ ('\f(IS' . $_ . '\f(IE'), 'Pod::Man::String';
+ } elsif ($command eq 'C') {
+ s/-/\\-/g;
+ s/__/_\\|_/g;
+ return bless \ ('\f(FS\*(C`' . $_ . "\\*(C'\\f(FE"),
+ 'Pod::Man::String';
+ }
+
+ # Handle links.
+ if ($command eq 'L') {
+ # XXX bug in lvalue subroutines prevents this from working
+ #return bless \ ($self->buildlink ($_)), 'Pod::Man::String';
+ my $v = $self->buildlink($_);
+ return bless \$v, 'Pod::Man::String';
+ }
+
+ # Whitespace protection replaces whitespace with "\ ".
+ if ($command eq 'S') {
+ s/\s+/\\ /g;
+ return bless \ "$_", 'Pod::Man::String';
+ }
+
+ # Add an index entry to the list of ones waiting to be output.
+ if ($command eq 'X') { push (@{ $$self{INDEX} }, $_); return '' }
+
+ # Anything else is unknown.
+ carp "Unknown sequence $command<$_>";
+}
+
+
+############################################################################
+# Command paragraphs
+############################################################################
+
+# All command paragraphs take the paragraph and the line number.
+
+# First level heading. We can't output .IX in the NAME section due to a bug
+# in some versions of catman, so don't output a .IX for that section. .SH
+# already uses small caps, so remove any E<> sequences that would cause
+# them.
+sub cmd_head1 {
+ my $self = shift;
+ local $_ = $self->parse (@_);
+ s/\s+$//;
+ s/\\s-?\d//g;
+ $self->output (switchquotes ('.SH', $self->mapfonts ($_)));
+ $self->outindex (($_ eq 'NAME') ? () : ('Header', $_));
+ $$self{NEEDSPACE} = 0;
+}
+
+# Second level heading.
+sub cmd_head2 {
+ my $self = shift;
+ local $_ = $self->parse (@_);
+ s/\s+$//;
+ $self->output (switchquotes ('.Sh', $self->mapfonts ($_)));
+ $self->outindex ('Subsection', $_);
+ $$self{NEEDSPACE} = 0;
+}
+
+# Start a list. For indents after the first, wrap the outside indent in .RS
+# so that hanging paragraph tags will be correct.
+sub cmd_over {
+ my $self = shift;
+ local $_ = shift;
+ unless (/^[-+]?\d+\s+$/) { $_ = $$self{indent} }
+ if (@{ $$self{INDENTS} } > 0) {
+ $self->output (".RS $$self{INDENT}\n");
+ }
+ push (@{ $$self{INDENTS} }, $$self{INDENT});
+ $$self{INDENT} = ($_ + 0);
+}
+
+# End a list. If we've closed an embedded indent, we've mangled the hanging
+# paragraph indent, so temporarily replace it with .RS and set WEIRDINDENT.
+# We'll close that .RS at the next =back or =item.
+sub cmd_back {
+ my $self = shift;
+ $$self{INDENT} = pop @{ $$self{INDENTS} };
+ unless (defined $$self{INDENT}) {
+ carp "Unmatched =back";
+ $$self{INDENT} = 0;
+ }
+ if ($$self{WEIRDINDENT}) {
+ $self->output (".RE\n");
+ $$self{WEIRDINDENT} = 0;
+ }
+ if (@{ $$self{INDENTS} } > 0) {
+ $self->output (".RE\n");
+ $self->output (".RS $$self{INDENT}\n");
+ $$self{WEIRDINDENT} = 1;
+ }
+ $$self{NEEDSPACE} = 1;
+}
+
+# An individual list item. Emit an index entry for anything that's
+# interesting, but don't emit index entries for things like bullets and
+# numbers. rofficate bullets too while we're at it (so for nice output, use
+# * for your lists rather than o or . or - or some other thing).
+sub cmd_item {
+ my $self = shift;
+ local $_ = $self->parse (@_);
+ s/\s+$//;
+ my $index;
+ if (/\w/ && !/^\w[.\)]\s*$/) {
+ $index = $_;
+ $index =~ s/^\s*[-*+o.]?\s*//;
+ }
+ s/^\*(\s|\Z)/\\\(bu$1/;
+ if ($$self{WEIRDINDENT}) {
+ $self->output (".RE\n");
+ $$self{WEIRDINDENT} = 0;
+ }
+ $_ = $self->mapfonts ($_);
+ $self->output (switchquotes ('.Ip', $_, $$self{INDENT}));
+ $self->outindex ($index ? ('Item', $index) : ());
+ $$self{NEEDSPACE} = 0;
+}
+
+# Begin a block for a particular translator. Setting VERBATIM triggers
+# special handling in textblock().
+sub cmd_begin {
+ my $self = shift;
+ local $_ = shift;
+ my ($kind) = /^(\S+)/ or return;
+ if ($kind eq 'man' || $kind eq 'roff') {
+ $$self{VERBATIM} = 1;
+ } else {
+ $$self{EXCLUDE} = 1;
+ }
+}
+
+# End a block for a particular translator. We assume that all =begin/=end
+# pairs are properly closed.
+sub cmd_end {
+ my $self = shift;
+ $$self{EXCLUDE} = 0;
+ $$self{VERBATIM} = 0;
+}
+
+# One paragraph for a particular translator. Ignore it unless it's intended
+# for man or roff, in which case we output it verbatim.
+sub cmd_for {
+ my $self = shift;
+ local $_ = shift;
+ my $line = shift;
+ return unless s/^(?:man|roff)\b[ \t]*\n?//;
+ $self->output ($_);
+}
+
+
+############################################################################
+# Link handling
+############################################################################
+
+# Handle links. We can't actually make real hyperlinks, so this is all to
+# figure out what text and formatting we print out.
+sub buildlink {
+ my $self = shift;
+ local $_ = shift;
+
+ # Smash whitespace in case we were split across multiple lines.
+ s/\s+/ /g;
+
+ # If we were given any explicit text, just output it.
+ if (m{ ^ ([^|]+) \| }x) { return $1 }
+
+ # Okay, leading and trailing whitespace isn't important.
+ s/^\s+//;
+ s/\s+$//;
+
+ # Default to using the whole content of the link entry as a section
+ # name. Note that L<manpage/> forces a manpage interpretation, as does
+ # something looking like L<manpage(section)>. Do the same thing to
+ # L<manpage(section)> as we would to manpage(section) without the L<>;
+ # see guesswork(). If we've added italics, don't add the "manpage"
+ # text; markup is sufficient.
+ my ($manpage, $section) = ('', $_);
+ if (/^"\s*(.*?)\s*"$/) {
+ $section = '"' . $1 . '"';
+ } elsif (m{ ^ [-:.\w]+ (?: \( \S+ \) )? $ }x) {
+ ($manpage, $section) = ($_, '');
+ $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|('/e;
+ } elsif (m%/%) {
+ ($manpage, $section) = split (/\s*\/\s*/, $_, 2);
+ if ($manpage =~ /^[-:.\w]+(?:\(\S+\))?$/) {
+ $manpage =~ s/^([^\(]+)\(/'\f(IS' . $1 . '\f(IE\|'/e;
+ }
+ $section =~ s/^\"\s*//;
+ $section =~ s/\s*\"$//;
+ }
+ if ($manpage && $manpage !~ /\\f\(IS/) {
+ $manpage = "the $manpage manpage";
+ }
+
+ # Now build the actual output text.
+ my $text = '';
+ if (!length ($section) && !length ($manpage)) {
+ carp "Invalid link $_";
+ } elsif (!length ($section)) {
+ $text = $manpage;
+ } elsif ($section =~ /^[:\w]+(?:\(\))?/) {
+ $text .= 'the ' . $section . ' entry';
+ $text .= (length $manpage) ? " in $manpage"
+ : " elsewhere in this document";
+ } else {
+ if ($section !~ /^".*"$/) { $section = '"' . $section . '"' }
+ $text .= 'the section on ' . $section;
+ $text .= " in $manpage" if length $manpage;
+ }
+ $text;
+}
+
+
+############################################################################
+# Escaping and fontification
+############################################################################
+
+# At this point, we'll have embedded font codes of the form \f(<font>[SE]
+# where <font> is one of B, I, or F. Turn those into the right font start
+# or end codes. B<someI<thing> else> should map to \fBsome\f(BIthing\fB
+# else\fR. The old pod2man didn't get this right; the second \fB was \fR,
+# so nested sequences didn't work right. We take care of this by using
+# variables as a combined pointer to our current font sequence, and set each
+# to the number of current nestings of start tags for that font. Use them
+# as a vector to look up what font sequence to use.
+sub mapfonts {
+ my $self = shift;
+ local $_ = shift;
+
+ my ($fixed, $bold, $italic) = (0, 0, 0);
+ my %magic = (F => \$fixed, B => \$bold, I => \$italic);
+ s { \\f\((.)(.) } {
+ ${ $magic{$1} } += ($2 eq 'S') ? 1 : -1;
+ $$self{FONTS}{($fixed && 1) . ($bold && 1) . ($italic && 1)};
+ }gxe;
+ $_;
+}
+
+
+############################################################################
+# *roff-specific parsing
+############################################################################
+
+# Called instead of parse_text, calls parse_text with the right flags.
+sub parse {
+ my $self = shift;
+ $self->parse_text ({ -expand_seq => 'sequence',
+ -expand_ptree => 'collapse' }, @_);
+}
+
+# Takes a parse tree and a flag saying whether or not to treat it as literal
+# text (not call guesswork on it), and returns the concatenation of all of
+# the text strings in that parse tree. If the literal flag isn't true,
+# guesswork() will be called on all plain scalars in the parse tree.
+# Assumes that everything in the parse tree is either a scalar or a
+# reference to a scalar.
+sub collapse {
+ my ($self, $ptree, $literal) = @_;
+ if ($literal) {
+ return join ('', map {
+ if (ref $_) {
+ $$_;
+ } else {
+ s/\\/\\e/g;
+ $_;
+ }
+ } $ptree->children);
+ } else {
+ return join ('', map {
+ ref ($_) ? $$_ : $self->guesswork ($_)
+ } $ptree->children);
+ }
+}
+
+# Takes a text block to perform guesswork on; this is guaranteed not to
+# contain any interior sequences. Returns the text block with remapping
+# done.
+sub guesswork {
+ my $self = shift;
+ local $_ = shift;
+
+ # rofficate backslashes.
+ s/\\/\\e/g;
+
+ # Ensure double underbars have a tiny space between them.
+ s/__/_\\|_/g;
+
+ # Make all caps a little smaller. Be careful here, since we don't want
+ # to make @ARGV into small caps, nor do we want to fix the MIME in
+ # MIME-Version, since it looks weird with the full-height V.
+ s{
+ ( ^ | [\s\(\"\'\`\[\{<>] )
+ ( [A-Z] [A-Z] [/A-Z+:\d_\$&-]* )
+ (?: (?= [\s>\}\]\)\'\".?!,;:] | -- ) | $ )
+ } { $1 . '\s-1' . $2 . '\s0' . $3 }egx;
+
+ # Turn PI into a pretty pi.
+ s{ (?: \\s-1 | \b ) PI (?: \\s0 | \b ) } {\\*\(PI}gx;
+
+ # Italize functions in the form func().
+ s{
+ \b
+ (
+ [:\w]+ (?:\\s-1)? \(\)
+ )
+ } { '\f(IS' . $1 . '\f(IE' }egx;
+
+ # func(n) is a reference to a manual page. Make it \fIfunc\fR\|(n).
+ s{
+ \b
+ (\w[-:.\w]+ (?:\\s-1)?)
+ (
+ \( [^\)] \)
+ )
+ } { '\f(IS' . $1 . '\f(IE\|' . $2 }egx;
+
+ # Convert simple Perl variable references to a fixed-width font.
+ s{
+ ( \s+ )
+ ( [\$\@%] [\w:]+ )
+ (?! \( )
+ } { $1 . '\f(FS' . $2 . '\f(FE'}egx;
+
+ # Translate -- into a real em dash if it's used like one and fix up
+ # dashes, but keep hyphens hyphens.
+ s{ (\G|^|.) (-+) (\b|.) } {
+ my ($pre, $dash, $post) = ($1, $2, $3);
+ if (length ($dash) == 1) {
+ ($pre =~ /[a-zA-Z]/) ? "$pre-$post" : "$pre\\-$post";
+ } elsif (length ($dash) == 2
+ && ((!$pre && !$post)
+ || ($pre =~ /\w/ && !$post)
+ || ($pre eq ' ' && $post eq ' ')
+ || ($pre eq '=' && $post ne '=')
+ || ($pre ne '=' && $post eq '='))) {
+ "$pre\\*(--$post";
+ } else {
+ $pre . ('\-' x length $dash) . $post;
+ }
+ }egxs;
+
+ # Fix up double quotes.
+ s{ \" ([^\"]+) \" } { '\*(L"' . $1 . '\*(R"' }egx;
+
+ # Make C++ into \*(C+, which is a squinched version.
+ s{ \b C\+\+ } {\\*\(C+}gx;
+
+ # All done.
+ $_;
+}
+
+
+############################################################################
+# Output formatting
+############################################################################
+
+# Make vertical whitespace.
+sub makespace {
+ my $self = shift;
+ $self->output ($$self{INDENT} > 0 ? ".Sp\n" : ".PP\n");
+}
+
+# Output any pending index entries, and optionally an index entry given as
+# an argument. Support multiple index entries in X<> separated by slashes,
+# and strip special escapes from index entries.
+sub outindex {
+ my ($self, $section, $index) = @_;
+ my @entries = map { split m%\s*/\s*% } @{ $$self{INDEX} };
+ return unless ($section || @entries);
+ $$self{INDEX} = [];
+ my $output;
+ if (@entries) {
+ my $output = '.IX Xref "'
+ . join (' ', map { s/\"/\"\"/; $_ } @entries)
+ . '"' . "\n";
+ }
+ if ($section) {
+ $index =~ s/\"/\"\"/;
+ $index =~ s/\\-/-/g;
+ $index =~ s/\\(?:s-?\d|.\(..|.)//g;
+ $output .= ".IX $section " . '"' . $index . '"' . "\n";
+ }
+ $self->output ($output);
+}
+
+# Output text to the output device.
+sub output { print { $_[0]->output_handle } $_[1] }
+
+__END__
+
+.\" These are some extra bits of roff that I don't want to lose track of
+.\" but that have been removed from the preamble to make it a bit shorter
+.\" since they're not currently being used. They're accents and special
+.\" characters we don't currently have escapes for.
+.if n \{\
+. ds ? ?
+. ds ! !
+. ds q
+.\}
+.if t \{\
+. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
+. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
+. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
+.\}
+.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
+.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
+.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
+.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
+.ds oe o\h'-(\w'o'u*4/10)'e
+.ds Oe O\h'-(\w'O'u*4/10)'E
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+. ds v \h'-1'\o'\(aa\(ga'
+. ds _ \h'-1'^
+. ds . \h'-1'.
+. ds 3 3
+. ds oe oe
+. ds Oe OE
+.\}
+
+############################################################################
+# Documentation
+############################################################################
+
+=head1 NAME
+
+Pod::Man - Convert POD data to formatted *roff input
+
+=head1 SYNOPSIS
+
+ use Pod::Man;
+ my $parser = Pod::Man->new (release => $VERSION, section => 8);
+
+ # Read POD from STDIN and write to STDOUT.
+ $parser->parse_from_filehandle;
+
+ # Read POD from file.pod and write to file.1.
+ $parser->parse_from_file ('file.pod', 'file.1');
+
+=head1 DESCRIPTION
+
+Pod::Man is a module to convert documentation in the POD format (the
+preferred language for documenting Perl) into *roff input using the man
+macro set. The resulting *roff code is suitable for display on a terminal
+using nroff(1), normally via man(1), or printing using troff(1). It is
+conventionally invoked using the driver script B<pod2man>, but it can also
+be used directly.
+
+As a derived class from Pod::Parser, Pod::Man supports the same methods and
+interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
+new parser with C<Pod::Man-E<gt>new()> and then calls either
+parse_from_filehandle() or parse_from_file().
+
+new() can take options, in the form of key/value pairs that control the
+behavior of the parser. See below for details.
+
+If no options are given, Pod::Man uses the name of the input file with any
+trailing C<.pod>, C<.pm>, or C<.pl> stripped as the man page title, to
+section 1 unless the file ended in C<.pm> in which case it defaults to
+section 3, to a centered title of "User Contributed Perl Documentation", to
+a centered footer of the Perl version it is run with, and to a left-hand
+footer of the modification date of its input (or the current date if given
+STDIN for input).
+
+Pod::Man assumes that your *roff formatters have a fixed-width font named
+CW. If yours is called something else (like CR), use the C<fixed> option to
+specify it. This generally only matters for troff output for printing.
+Similarly, you can set the fonts used for bold, italic, and bold italic
+fixed-width output.
+
+Besides the obvious pod conversions, Pod::Man also takes care of formatting
+func(), func(n), and simple variable references like $foo or @bar so you
+don't have to use code escapes for them; complex expressions like
+C<$fred{'stuff'}> will still need to be escaped, though. It also translates
+dashes that aren't used as hyphens into en dashes, makes long dashes--like
+this--into proper em dashes, fixes "paired quotes," makes C++ and PI look
+right, puts a little space between double underbars, makes ALLCAPS a teeny
+bit smaller in troff(1), and escapes stuff that *roff treats as special so
+that you don't have to.
+
+The recognized options to new() are as follows. All options take a single
+argument.
+
+=over 4
+
+=item center
+
+Sets the centered page header to use instead of "User Contributed Perl
+Documentation".
+
+=item date
+
+Sets the left-hand footer. By default, the modification date of the input
+file will be used, or the current date if stat() can't find that file (the
+case if the input is from STDIN), and the date will be formatted as
+YYYY-MM-DD.
+
+=item fixed
+
+The fixed-width font to use for vertabim text and code. Defaults to CW.
+Some systems may want CR instead. Only matters for troff(1) output.
+
+=item fixedbold
+
+Bold version of the fixed-width font. Defaults to CB. Only matters for
+troff(1) output.
+
+=item fixeditalic
+
+Italic version of the fixed-width font (actually, something of a misnomer,
+since most fixed-width fonts only have an oblique version, not an italic
+version). Defaults to CI. Only matters for troff(1) output.
+
+=item fixedbolditalic
+
+Bold italic (probably actually oblique) version of the fixed-width font.
+Pod::Man doesn't assume you have this, and defaults to CB. Some systems
+(such as Solaris) have this font available as CX. Only matters for troff(1)
+output.
+
+=item release
+
+Set the centered footer. By default, this is the version of Perl you run
+Pod::Man under. Note that some system an macro sets assume that the
+centered footer will be a modification date and will prepend something like
+"Last modified: "; if this is the case, you may want to set C<release> to
+the last modified date and C<date> to the version number.
+
+=item section
+
+Set the section for the C<.TH> macro. The standard section numbering
+convention is to use 1 for user commands, 2 for system calls, 3 for
+functions, 4 for devices, 5 for file formats, 6 for games, 7 for
+miscellaneous information, and 8 for administrator commands. There is a lot
+of variation here, however; some systems (like Solaris) use 4 for file
+formats, 5 for miscellaneous information, and 7 for devices. Still others
+use 1m instead of 8, or some mix of both. About the only section numbers
+that are reliably consistent are 1, 2, and 3.
+
+By default, section 1 will be used unless the file ends in .pm in which case
+section 3 will be selected.
+
+=back
+
+The standard Pod::Parser method parse_from_filehandle() takes up to two
+arguments, the first being the file handle to read POD from and the second
+being the file handle to write the formatted output to. The first defaults
+to STDIN if not given, and the second defaults to STDOUT. The method
+parse_from_file() is almost identical, except that its two arguments are the
+input and output disk files instead. See L<Pod::Parser> for the specific
+details.
+
+=head1 DIAGNOSTICS
+
+=over 4
+
+=item roff font should be 1 or 2 chars, not `%s'
+
+(F) You specified a *roff font (using C<fixed>, C<fixedbold>, etc.) that
+wasn't either one or two characters. Pod::Man doesn't support *roff fonts
+longer than two characters, although some *roff extensions do (the canonical
+versions of nroff(1) and troff(1) don't either).
+
+=item Invalid link %s
+
+(W) The POD source contained a C<LE<lt>E<gt>> sequence that Pod::Man was
+unable to parse. You should never see this error message; it probably
+indicates a bug in Pod::Man.
+
+=item Unknown escape EE<lt>%sE<gt>
+
+(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Man didn't
+know about. C<EE<lt>%sE<gt>> was printed verbatim in the output.
+
+=item Unknown sequence %s
+
+(W) The POD source contained a non-standard interior sequence (something of
+the form C<XE<lt>E<gt>>) that Pod::Man didn't know about. It was ignored.
+
+=item Unmatched =back
+
+(W) Pod::Man encountered a C<=back> command that didn't correspond to an
+C<=over> command.
+
+=back
+
+=head1 BUGS
+
+The lint-like features and strict POD format checking done by B<pod2man> are
+not yet implemented and should be, along with the corresponding C<lax>
+option.
+
+The NAME section should be recognized specially and index entries emitted
+for everything in that section. This would have to be deferred until the
+next section, since extraneous things in NAME tends to confuse various man
+page processors.
+
+The handling of hyphens, en dashes, and em dashes is somewhat fragile, and
+one may get the wrong one under some circumstances. This should only matter
+for troff(1) output.
+
+When and whether to use small caps is somewhat tricky, and Pod::Man doesn't
+necessarily get it right.
+
+Pod::Man doesn't handle font names longer than two characters. Neither do
+most troff(1) implementations, but GNU troff does as an extension. It would
+be nice to support as an option for those who want to use it.
+
+The preamble added to each output file is rather verbose, and most of it is
+only necessary in the presence of EE<lt>E<gt> escapes for non-ASCII
+characters. It would ideally be nice if all of those definitions were only
+output if needed, perhaps on the fly as the characters are used.
+
+Some of the automagic applied to file names assumes Unix directory
+separators.
+
+Pod::Man is excessively slow.
+
+=head1 NOTES
+
+The intention is for this module and its driver script to eventually replace
+B<pod2man> in Perl core.
+
+=head1 SEE ALSO
+
+L<Pod::Parser|Pod::Parser>, perlpod(1), pod2man(1), nroff(1), troff(1),
+man(1), man(7)
+
+Ossanna, Joseph F., and Brian W. Kernighan. "Troff User's Manual,"
+Computing Science Technical Report No. 54, AT&T Bell Laboratories. This is
+the best documentation of standard nroff(1) and troff(1). At the time of
+this writing, it's available at http://www.cs.bell-labs.com/cm/cs/cstr.html.
+
+The man page documenting the man macro set may be man(5) instead of man(7)
+on your system. Also, please see pod2man(1) for extensive documentation on
+writing manual pages if you've not done it before and aren't familiar with
+the conventions.
+
+=head1 AUTHOR
+
+Russ Allbery E<lt>rra@stanford.eduE<gt>, based I<very> heavily on the
+original B<pod2man> by Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>.
+
+=cut
diff --git a/lib/Pod/Parser.pm b/lib/Pod/Parser.pm
index cb1e3a61c1..c9c67bd8e2 100644
--- a/lib/Pod/Parser.pm
+++ b/lib/Pod/Parser.pm
@@ -1,10 +1,7 @@
#############################################################################
# Pod/Parser.pm -- package which defines a base class for parsing POD docs.
#
-# Based on Tom Christiansen's Pod::Text module
-# (with extensive modifications).
-#
-# Copyright (C) 1996-1999 Tom Christiansen. All rights reserved.
+# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
@@ -13,7 +10,7 @@
package Pod::Parser;
use vars qw($VERSION);
-$VERSION = 1.081; ## Current version of this package
+$VERSION = 1.091; ## Current version of this package
require 5.004; ## requires this Perl version or later
#############################################################################
@@ -74,7 +71,7 @@ Pod::Parser - base class for creating POD filters and translators
=head1 REQUIRES
-perl5.004, Pod::InputObjects, Exporter, FileHandle, Carp
+perl5.004, Pod::InputObjects, Exporter, Carp
=head1 EXPORTS
@@ -145,6 +142,50 @@ For the most part, the B<Pod::Parser> base class should be able to
do most of the input parsing for you and leave you free to worry about
how to intepret the commands and translate the result.
+Note that all we have described here in this quick overview overview is
+the simplest most striaghtforward use of B<Pod::Parser> to do stream-based
+parsing. It is also possible to use the B<Pod::Parser::parse_text> function
+to do more sophisticated tree-based parsing. See L<"TREE-BASED PARSING">.
+
+=head1 PARSING OPTIONS
+
+A I<parse-option> is simply a named option of B<Pod::Parser> with a
+value that corresponds to a certain specified behavior. These various
+behaviors of B<Pod::Parser> may be enabled/disabled by setting or
+or unsetting one or more I<parse-options> using the B<parseopts()> method.
+The set of currently accepted parse-options is as follows:
+
+=over 3
+
+=item B<-want_nonPODs> (default: unset)
+
+Normally (by default) B<Pod::Parser> will only provide access to
+the POD sections of the input. Input paragraphs that are not part
+of the POD-format documentation are not made available to the caller
+(not even using B<preprocess_paragraph()>). Setting this option to a
+non-empty, non-zero value will allow B<preprocess_paragraph()> to see
+non-POD sections of the input as well as POD sections. The B<cutting()>
+method can be used to determine if the corresponding paragraph is a POD
+paragraph, or some other input paragraph.
+
+=item B<-process_cut_cmd> (default: unset)
+
+Normally (by default) B<Pod::Parser> handles the C<=cut> POD directive
+by itself and does not pass it on to the caller for processing. Setting
+this option to non-empty, non-zero value will cause B<Pod::Parser> to
+pass the C<=cut> directive to the caller just like any other POD command
+(and hence it may be processed by the B<command()> method).
+
+B<Pod::Parser> will still interpret the C<=cut> directive to mean that
+"cutting mode" has been (re)entered, but the caller will get a chance
+to capture the actual C<=cut> paragraph itself for whatever purpose
+it desires.
+
+=back
+
+Please see L<"parseopts()"> for a complete description of the interface
+for the setting and unsetting of parse-options.
+
=cut
#############################################################################
@@ -154,12 +195,11 @@ use strict;
#use diagnostics;
use Pod::InputObjects;
use Carp;
-use FileHandle;
use Exporter;
@ISA = qw(Exporter);
## These "variables" are used as local "glob aliases" for performance
-use vars qw(%myData @input_stack);
+use vars qw(%myData %myOpts @input_stack);
#############################################################################
@@ -547,18 +587,20 @@ The value returned should correspond to the new text to use in its
place If the empty string is returned or an undefined value is
returned, then the given C<$text> is ignored (not processed).
-This method is invoked after gathering up all thelines in a paragraph
+This method is invoked after gathering up all the lines in a paragraph
+and after determining the cutting state of the paragraph,
but before trying to further parse or interpret them. After
B<preprocess_paragraph()> returns, the current cutting state (which
is returned by C<$self-E<gt>cutting()>) is examined. If it evaluates
-to false then input text (including the given C<$text>) is cut (not
+to true then input text (including the given C<$text>) is cut (not
processed) until the next POD directive is encountered.
Please note that the B<preprocess_line()> method is invoked I<before>
the B<preprocess_paragraph()> method. After all (possibly preprocessed)
-lines in a paragraph have been assembled together and it has been
+lines in a paragraph have been assembled together and either it has been
determined that the paragraph is part of the POD documentation from one
-of the selected sections, then B<preprocess_paragraph()> is invoked.
+of the selected sections or the C<-want_nonPODs> option is true,
+then B<preprocess_paragraph()> is invoked.
The base class implementation of this method returns the given text.
@@ -574,8 +616,9 @@ sub preprocess_paragraph {
=head1 METHODS FOR PARSING AND PROCESSING
B<Pod::Parser> provides several methods to process input text. These
-methods typically won't need to be overridden, but subclasses may want
-to invoke them to exploit their functionality.
+methods typically won't need to be overridden (and in some cases they
+can't be overridden), but subclasses may want to invoke them to exploit
+their functionality.
=cut
@@ -629,6 +672,31 @@ is a reference to the interior-sequence object.
[I<NOTE>: If the B<interior_sequence()> method is specified, then it is
invoked according to the interface specified in L<"interior_sequence()">].
+=item B<-expand_text> =E<gt> I<code-ref>|I<method-name>
+
+Normally, the parse-tree returned by B<parse_text()> will contain a
+text-string for each contiguous sequence of characters outside of an
+interior-sequence. Specifying B<-expand_text> tells B<parse_text()> to
+"preprocess" every such text-string it sees by invoking the referenced
+function (or named method of the parser object) and using the return value
+as the preprocessed (or "expanded") result. [Note that if the result is
+an interior-sequence, then it will I<not> be expanded as specified by the
+B<-expand_seq> option; Any such recursive expansion needs to be handled by
+the specified callback routine.]
+
+If a subroutine reference was given, it is invoked as:
+
+ &$code_ref( $parser, $text, $ptree_node )
+
+and if a method-name was given, it is invoked as:
+
+ $parser->method_name( $text, $ptree_node )
+
+where C<$parser> is a reference to the parser object, C<$text> is the
+text-string encountered, and C<$ptree_node> is a reference to the current
+node in the parse-tree (usually an interior-sequence object or else the
+top-level node of the parse-tree).
+
=item B<-expand_ptree> =E<gt> I<code-ref>|I<method-name>
Rather than returning a C<Pod::ParseTree>, pass the parse-tree as an
@@ -652,10 +720,10 @@ is a reference to the parse-tree object.
## This global regex is used to see if the text before a '>' inside
## an interior sequence looks like '-' or '=', but not '--', '==',
-## '$-', or '$='
+## '!=', '$-', '$=' or <<op>>=
use vars qw( $ARROW_RE );
-$ARROW_RE = join('', qw{ (?: [^-+*/=!&|%^x.<>$]= | [^$-]- )$ });
-#$ARROW_RE = qr/(?:[^=]+=|[^-]+-)$/; ## 5.005+ only!
+$ARROW_RE = join('', qw{ (?: [^-+*/=!&|%^x.<>$]= | [^-$]- )$ });
+#$ARROW_RE = qr/(?:[^-+*/=!&|%^x.<>$]+=|[^-$]+-)$/; ## 5.005+ only!
sub parse_text {
my $self = shift;
@@ -664,6 +732,7 @@ sub parse_text {
## Get options and set any defaults
my %opts = (ref $_[0]) ? %{ shift() } : ();
my $expand_seq = $opts{'-expand_seq'} || undef;
+ my $expand_text = $opts{'-expand_text'} || undef;
my $expand_ptree = $opts{'-expand_ptree'} || undef;
my $text = shift;
@@ -673,6 +742,7 @@ sub parse_text {
## Convert method calls into closures, for our convenience
my $xseq_sub = $expand_seq;
+ my $xtext_sub = $expand_text;
my $xptree_sub = $expand_ptree;
if (defined $expand_seq and $expand_seq eq 'interior_sequence') {
## If 'interior_sequence' is the method to use, we have to pass
@@ -685,6 +755,7 @@ sub parse_text {
};
}
ref $xseq_sub or $xseq_sub = sub { shift()->$expand_seq(@_) };
+ ref $xtext_sub or $xtext_sub = sub { shift()->$expand_text(@_) };
ref $xptree_sub or $xptree_sub = sub { shift()->$expand_ptree(@_) };
## Keep track of the "current" interior sequence, and maintain a stack
@@ -729,19 +800,24 @@ sub parse_text {
## Remember the current cmd-name
$cmd = (@seq_stack > 1) ? $seq_stack[-1]->name : '';
}
- else {
- ## In the middle of a sequence, append this text to it
- $seq->append($_) if length;
+ elsif (length) {
+ ## In the middle of a sequence, append this text to it, and
+ ## dont forget to "expand" it if that's what the caller wanted
+ $seq->append($expand_text ? &$xtext_sub($self,$_,$seq) : $_);
}
## Remember the "current" sequence and the previously seen token
($seq, $prev) = ( $seq_stack[-1], $_ );
}
## Handle unterminated sequences
+ my $errorsub = (@seq_stack > 1) ? $self->errorsub() : undef;
while (@seq_stack > 1) {
($cmd, $file, $line) = ($seq->name, $seq->file_line);
pop @seq_stack;
- warn "** Unterminated $cmd<...> at $file line $line\n";
+ my $errmsg = "** Unterminated $cmd<...> at $file line $line\n";
+ (ref $errorsub) and &{$errorsub}($errmsg)
+ or (defined $errorsub) and $self->$errorsub($errmsg)
+ or warn($errmsg);
$seq_stack[-1]->append($expand_seq ? &$xseq_sub($self,$seq) : $seq);
$seq = $seq_stack[-1];
}
@@ -788,7 +864,8 @@ This method takes the text of a POD paragraph to be processed, along
with its corresponding line number, and invokes the appropriate method
(one of B<command()>, B<verbatim()>, or B<textblock()>).
-This method does I<not> usually need to be overridden by subclasses.
+For performance reasons, this method is invoked directly without any
+dynamic lookup; Hence subclasses may I<not> override it!
=end __PRIVATE__
@@ -796,15 +873,21 @@ This method does I<not> usually need to be overridden by subclasses.
sub parse_paragraph {
my ($self, $text, $line_num) = @_;
- local *myData = $self; ## an alias to avoid deref-ing overhead
+ local *myData = $self; ## alias to avoid deref-ing overhead
+ local *myOpts = ($myData{_PARSEOPTS} ||= {}); ## get parse-options
local $_;
- ## This is the end of a non-empty paragraph
+ ## See if we want to preprocess nonPOD paragraphs as well as POD ones.
+ my $wantNonPods = $myOpts{'-want_nonPODs'};
+
+ ## Update cutting status
+ $myData{_CUTTING} = 0 if $text =~ /^={1,2}\S/;
+
+ ## Perform any desired preprocessing if we wanted it this early
+ $wantNonPods and $text = $self->preprocess_paragraph($text, $line_num);
+
## Ignore up until next POD directive if we are cutting
- if ($myData{_CUTTING}) {
- return unless ($text =~ /^={1,2}\S/);
- $myData{_CUTTING} = 0;
- }
+ return if $myData{_CUTTING};
## Now we know this is block of text in a POD section!
@@ -822,10 +905,13 @@ sub parse_paragraph {
$self->is_selected($text) or return ($myData{_CUTTING} = 1);
}
- ## Perform any desired preprocessing and re-check the "cutting" state
- $text = $self->preprocess_paragraph($text, $line_num);
- return 1 unless ((defined $text) and (length $text));
- return 1 if ($myData{_CUTTING});
+ ## If we havent already, perform any desired preprocessing and
+ ## then re-check the "cutting" state
+ unless ($wantNonPods) {
+ $text = $self->preprocess_paragraph($text, $line_num);
+ return 1 unless ((defined $text) and (length $text));
+ return 1 if ($myData{_CUTTING});
+ }
## Look for one of the three types of paragraphs
my ($pfx, $cmd, $arg, $sep) = ('', '', '', '');
@@ -842,7 +928,7 @@ sub parse_paragraph {
## except return to "cutting" mode.
if ($cmd eq 'cut') {
$myData{_CUTTING} = 1;
- return;
+ return unless $myOpts{'-process_cut_cmd'};
}
}
## Save the attributes indicating how the command was specified.
@@ -1012,7 +1098,7 @@ sub parse_from_file {
my $self = shift;
my %opts = (ref $_[0] eq 'HASH') ? %{ shift() } : ();
my ($infile, $outfile) = @_;
- my ($in_fh, $out_fh) = (undef, undef);
+ my ($in_fh, $out_fh);
my ($close_input, $close_output) = (0, 0);
local *myData = $self;
local $_;
@@ -1033,7 +1119,7 @@ sub parse_from_file {
else {
## We have a filename, open it for reading
$myData{_INFILE} = $infile;
- $in_fh = FileHandle->new("< $infile") or
+ open($in_fh, "< $infile") or
croak "Can't open $infile for reading: $!\n";
$close_input = 1;
}
@@ -1069,7 +1155,7 @@ sub parse_from_file {
else {
## We have a filename, open it for writing
$myData{_OUTFILE} = $outfile;
- $out_fh = FileHandle->new("> $outfile") or
+ open($out_fh, "> $outfile") or
croak "Can't open $outfile for writing: $!\n";
$close_output = 1;
}
@@ -1097,6 +1183,35 @@ instance data fields:
##---------------------------------------------------------------------------
+=head1 B<errorsub()>
+
+ $parser->errorsub("method_name");
+ $parser->errorsub(\&warn_user);
+ $parser->errorsub(sub { print STDERR, @_ });
+
+Specifies the method or subroutine to use when printing error messages
+about POD syntax. The supplied method/subroutine I<must> return TRUE upon
+successful printing of the message. If C<undef> is given, then the B<warn>
+builtin is used to issue error messages (this is the default behavior).
+
+ my $errorsub = $parser->errorsub()
+ my $errmsg = "This is an error message!\n"
+ (ref $errorsub) and &{$errorsub}($errmsg)
+ or (defined $errorsub) and $parser->$errorsub($errmsg)
+ or warn($errmsg);
+
+Returns a method name, or else a reference to the user-supplied subroutine
+used to print error messages. Returns C<undef> if the B<warn> builtin
+is used to issue error messages (this is the default behavior).
+
+=cut
+
+sub errorsub {
+ return (@_ > 1) ? ($_[0]->{_ERRORSUB} = $_[1]) : $_[0]->{_ERRORSUB};
+}
+
+##---------------------------------------------------------------------------
+
=head1 B<cutting()>
$boolean = $parser->cutting();
@@ -1118,6 +1233,58 @@ sub cutting {
##---------------------------------------------------------------------------
+##---------------------------------------------------------------------------
+
+=head1 B<parseopts()>
+
+When invoked with no additional arguments, B<parseopts> returns a hashtable
+of all the current parsing options.
+
+ ## See if we are parsing non-POD sections as well as POD ones
+ my %opts = $parser->parseopts();
+ $opts{'-want_nonPODs}' and print "-want_nonPODs\n";
+
+When invoked using a single string, B<parseopts> treats the string as the
+name of a parse-option and returns its corresponding value if it exists
+(returns C<undef> if it doesn't).
+
+ ## Did we ask to see '=cut' paragraphs?
+ my $want_cut = $parser->parseopts('-process_cut_cmd');
+ $want_cut and print "-process_cut_cmd\n";
+
+When invoked with multiple arguments, B<parseopts> treats them as
+key/value pairs and the specified parse-option names are set to the
+given values. Any unspecified parse-options are unaffected.
+
+ ## Set them back to the default
+ $parser->parseopts(-process_cut_cmd => 0);
+
+When passed a single hash-ref, B<parseopts> uses that hash to completely
+reset the existing parse-options, all previous parse-option values
+are lost.
+
+ ## Reset all options to default
+ $parser->parseopts( { } );
+
+See L<"PARSING OPTIONS"> for more for the name and meaning of each
+parse-option currently recognized.
+
+=cut
+
+sub parseopts {
+ local *myData = shift;
+ local *myOpts = ($myData{_PARSEOPTS} ||= {});
+ return %myOpts if (@_ == 0);
+ if (@_ == 1) {
+ local $_ = shift;
+ return ref($_) ? $myData{_PARSEOPTS} = $_ : $myOpts{$_};
+ }
+ my @newOpts = (%myOpts, @_);
+ $myData{_PARSEOPTS} = { @newOpts };
+}
+
+##---------------------------------------------------------------------------
+
=head1 B<output_file()>
$fname = $parser->output_file();
@@ -1361,6 +1528,159 @@ sub _pop_input_stream {
#############################################################################
+=head1 TREE-BASED PARSING
+
+If straightforward stream-based parsing wont meet your needs (as is
+likely the case for tasks such as translating PODs into structured
+markup languages like HTML and XML) then you may need to take the
+tree-based approach. Rather than doing everything in one pass and
+calling the B<interpolate()> method to expand sequences into text, it
+may be desirable to instead create a parse-tree using the B<parse_text()>
+method to return a tree-like structure which may contain an ordered list
+list of children (each of which may be a text-string, or a similar
+tree-like structure).
+
+Pay special attention to L<"METHODS FOR PARSING AND PROCESSING"> and
+to the objects described in L<Pod::InputObjects>. The former describes
+the gory details and parameters for how to customize and extend the
+parsing behavior of B<Pod::Parser>. B<Pod::InputObjects> provides
+several objects that may all be used interchangeably as parse-trees. The
+most obvious one is the B<Pod::ParseTree> object. It defines the basic
+interface and functionality that all things trying to be a POD parse-tree
+should do. A B<Pod::ParseTree> is defined such that each "node" may be a
+text-string, or a reference to another parse-tree. Each B<Pod::Paragraph>
+object and each B<Pod::InteriorSequence> object also supports the basic
+parse-tree interface.
+
+The B<parse_text()> method takes a given paragraph of text, and
+returns a parse-tree that contains one or more children, each of which
+may be a text-string, or an InteriorSequence object. There are also
+callback-options that may be passed to B<parse_text()> to customize
+the way it expands or transforms interior-sequences, as well as the
+returned result. These callbacks can be used to create a parse-tree
+with custom-made objects (which may or may not support the parse-tree
+interface, depending on how you choose to do it).
+
+If you wish to turn an entire POD document into a parse-tree, that process
+is fairly straightforward. The B<parse_text()> method is the key to doing
+this successfully. Every paragraph-callback (i.e. the polymorphic methods
+for B<command()>, B<verbatim()>, and B<textblock()> paragraphs) takes
+a B<Pod::Paragraph> object as an argument. Each paragraph object has a
+B<parse_tree()> method that can be used to get or set a corresponding
+parse-tree. So for each of those paragraph-callback methods, simply call
+B<parse_text()> with the options you desire, and then use the returned
+parse-tree to assign to the given paragraph object.
+
+That gives you a parse-tree for each paragraph - so now all you need is
+an ordered list of paragraphs. You can maintain that yourself as a data
+element in the object/hash. The most straightforward way would be simply
+to use an array-ref, with the desired set of custom "options" for each
+invocation of B<parse_text>. Let's assume the desired option-set is
+given by the hash C<%options>. Then we might do something like the
+following:
+
+ package MyPodParserTree;
+
+ @ISA = qw( Pod::Parser );
+
+ ...
+
+ sub begin_pod {
+ my $self = shift;
+ $self->{'-paragraphs'} = []; ## initialize paragraph list
+ }
+
+ sub command {
+ my ($parser, $command, $paragraph, $line_num, $pod_para) = @_;
+ my $ptree = $parser->parse_text({%options}, $paragraph, ...);
+ $pod_para->parse_tree( $ptree );
+ push @{ $self->{'-paragraphs'} }, $pod_para;
+ }
+
+ sub verbatim {
+ my ($parser, $paragraph, $line_num, $pod_para) = @_;
+ push @{ $self->{'-paragraphs'} }, $pod_para;
+ }
+
+ sub textblock {
+ my ($parser, $paragraph, $line_num, $pod_para) = @_;
+ my $ptree = $parser->parse_text({%options}, $paragraph, ...);
+ $pod_para->parse_tree( $ptree );
+ push @{ $self->{'-paragraphs'} }, $pod_para;
+ }
+
+ ...
+
+ package main;
+ ...
+ my $parser = new MyPodParserTree(...);
+ $parser->parse_from_file(...);
+ my $paragraphs_ref = $parser->{'-paragraphs'};
+
+Of course, in this module-author's humble opinion, I'd be more inclined to
+use the existing B<Pod::ParseTree> object than a simple array. That way
+everything in it, paragraphs and sequences, all respond to the same core
+interface for all parse-tree nodes. The result would look something like:
+
+ package MyPodParserTree2;
+
+ ...
+
+ sub begin_pod {
+ my $self = shift;
+ $self->{'-ptree'} = new Pod::ParseTree; ## initialize parse-tree
+ }
+
+ sub parse_tree {
+ ## convenience method to get/set the parse-tree for the entire POD
+ (@_ > 1) and $_[0]->{'-ptree'} = $_[1];
+ return $_[0]->{'-ptree'};
+ }
+
+ sub command {
+ my ($parser, $command, $paragraph, $line_num, $pod_para) = @_;
+ my $ptree = $parser->parse_text({<<options>>}, $paragraph, ...);
+ $pod_para->parse_tree( $ptree );
+ $parser->parse_tree()->append( $pod_para );
+ }
+
+ sub verbatim {
+ my ($parser, $paragraph, $line_num, $pod_para) = @_;
+ $parser->parse_tree()->append( $pod_para );
+ }
+
+ sub textblock {
+ my ($parser, $paragraph, $line_num, $pod_para) = @_;
+ my $ptree = $parser->parse_text({<<options>>}, $paragraph, ...);
+ $pod_para->parse_tree( $ptree );
+ $parser->parse_tree()->append( $pod_para );
+ }
+
+ ...
+
+ package main;
+ ...
+ my $parser = new MyPodParserTree2(...);
+ $parser->parse_from_file(...);
+ my $ptree = $parser->parse_tree;
+ ...
+
+Now you have the entire POD document as one great big parse-tree. You
+can even use the B<-expand_seq> option to B<parse_text> to insert
+whole different kinds of objects. Just don't expect B<Pod::Parser>
+to know what to do with them after that. That will need to be in your
+code. Or, alternatively, you can insert any object you like so long as
+it conforms to the B<Pod::ParseTree> interface.
+
+One could use this to create subclasses of B<Pod::Paragraphs> and
+B<Pod::InteriorSequences> for specific commands (or to create your own
+custom node-types in the parse-tree) and add some kind of B<emit()>
+method to each custom node/subclass object in the tree. Then all you'd
+need to do is recursively walk the tree in the desired order, processing
+the children (most likely from left to right) by formatting them if
+they are text-strings, or by calling their B<emit()> method if they
+are objects/references.
+
=head1 SEE ALSO
L<Pod::InputObjects>, L<Pod::Select>
diff --git a/lib/Pod/PlainText.pm b/lib/Pod/PlainText.pm
deleted file mode 100644
index 3816badb7f..0000000000
--- a/lib/Pod/PlainText.pm
+++ /dev/null
@@ -1,650 +0,0 @@
-#############################################################################
-# Pod/PlainText.pm -- convert POD data to formatted ASCII text
-#
-# Derived from Tom Christiansen's Pod::PlainText module
-# (with extensive modifications).
-#
-# Copyright (C) 1994-1999 Tom Christiansen. All rights reserved.
-# This file is part of "PodParser". PodParser is free software;
-# you can redistribute it and/or modify it under the same terms
-# as Perl itself.
-#############################################################################
-
-package Pod::PlainText;
-
-use vars qw($VERSION);
-$VERSION = 1.081; ## Current version of this package
-require 5.004; ## requires this Perl version or later
-
-=head1 NAME
-
-pod2plaintext - function to convert POD data to formatted ASCII text
-
-Pod::PlainText - a class for converting POD data to formatted ASCII text
-
-=head1 SYNOPSIS
-
- use Pod::PlainText;
- pod2plaintext("perlfunc.pod");
-
-or
-
- use Pod::PlainText;
- package MyParser;
- @ISA = qw(Pod::PlainText);
-
- sub new {
- ## constructor code ...
- }
-
- ## implementation of appropriate subclass methods ...
-
- package main;
- $parser = new MyParser;
- @ARGV = ('-') unless (@ARGV > 0);
- for (@ARGV) {
- $parser->parse_from_file($_);
- }
-
-=head1 REQUIRES
-
-perl5.004, Pod::Select, Term::Cap, Exporter, Carp
-
-=head1 EXPORTS
-
-pod2plaintext()
-
-=head1 DESCRIPTION
-
-Pod::PlainText is a module that can convert documentation in the POD
-format (such as can be found throughout the Perl distribution) into
-formatted ASCII. Termcap is optionally supported for
-boldface/underline, and can be enabled via C<$Pod::PlainText::termcap=1>.
-If termcap has not been enabled, then backspaces will be used to
-simulate bold and underlined text.
-
-A separate F<pod2plaintext> program is included that is primarily a wrapper
-for C<Pod::PlainText::pod2plaintext()>.
-
-The single function C<pod2plaintext()> can take one or two arguments. The first
-should be the name of a file to read the pod from, or "<&STDIN" to read from
-STDIN. A second argument, if provided, should be a filehandle glob where
-output should be sent.
-
-=head1 SEE ALSO
-
-L<Pod::Parser>.
-
-=head1 AUTHOR
-
-Tom Christiansen E<lt>tchrist@mox.perl.comE<gt>
-
-Modified to derive from B<Pod::Parser> by
-Brad Appleton E<lt>bradapp@enteract.comE<gt>
-
-=cut
-
-#############################################################################
-
-use strict;
-#use diagnostics;
-use Carp;
-use Exporter;
-use Pod::Select;
-use Term::Cap;
-use vars qw(@ISA @EXPORT %HTML_Escapes);
-
-@ISA = qw(Exporter Pod::Select);
-@EXPORT = qw(&pod2plaintext);
-
-%HTML_Escapes = (
- 'amp' => '&', # ampersand
- 'lt' => '<', # left chevron, less-than
- 'gt' => '>', # right chevron, greater-than
- 'quot' => '"', # double quote
-
- "Aacute" => "\xC1", # capital A, acute accent
- "aacute" => "\xE1", # small a, acute accent
- "Acirc" => "\xC2", # capital A, circumflex accent
- "acirc" => "\xE2", # small a, circumflex accent
- "AElig" => "\xC6", # capital AE diphthong (ligature)
- "aelig" => "\xE6", # small ae diphthong (ligature)
- "Agrave" => "\xC0", # capital A, grave accent
- "agrave" => "\xE0", # small a, grave accent
- "Aring" => "\xC5", # capital A, ring
- "aring" => "\xE5", # small a, ring
- "Atilde" => "\xC3", # capital A, tilde
- "atilde" => "\xE3", # small a, tilde
- "Auml" => "\xC4", # capital A, dieresis or umlaut mark
- "auml" => "\xE4", # small a, dieresis or umlaut mark
- "Ccedil" => "\xC7", # capital C, cedilla
- "ccedil" => "\xE7", # small c, cedilla
- "Eacute" => "\xC9", # capital E, acute accent
- "eacute" => "\xE9", # small e, acute accent
- "Ecirc" => "\xCA", # capital E, circumflex accent
- "ecirc" => "\xEA", # small e, circumflex accent
- "Egrave" => "\xC8", # capital E, grave accent
- "egrave" => "\xE8", # small e, grave accent
- "ETH" => "\xD0", # capital Eth, Icelandic
- "eth" => "\xF0", # small eth, Icelandic
- "Euml" => "\xCB", # capital E, dieresis or umlaut mark
- "euml" => "\xEB", # small e, dieresis or umlaut mark
- "Iacute" => "\xCD", # capital I, acute accent
- "iacute" => "\xED", # small i, acute accent
- "Icirc" => "\xCE", # capital I, circumflex accent
- "icirc" => "\xEE", # small i, circumflex accent
- "Igrave" => "\xCD", # capital I, grave accent
- "igrave" => "\xED", # small i, grave accent
- "Iuml" => "\xCF", # capital I, dieresis or umlaut mark
- "iuml" => "\xEF", # small i, dieresis or umlaut mark
- "Ntilde" => "\xD1", # capital N, tilde
- "ntilde" => "\xF1", # small n, tilde
- "Oacute" => "\xD3", # capital O, acute accent
- "oacute" => "\xF3", # small o, acute accent
- "Ocirc" => "\xD4", # capital O, circumflex accent
- "ocirc" => "\xF4", # small o, circumflex accent
- "Ograve" => "\xD2", # capital O, grave accent
- "ograve" => "\xF2", # small o, grave accent
- "Oslash" => "\xD8", # capital O, slash
- "oslash" => "\xF8", # small o, slash
- "Otilde" => "\xD5", # capital O, tilde
- "otilde" => "\xF5", # small o, tilde
- "Ouml" => "\xD6", # capital O, dieresis or umlaut mark
- "ouml" => "\xF6", # small o, dieresis or umlaut mark
- "szlig" => "\xDF", # small sharp s, German (sz ligature)
- "THORN" => "\xDE", # capital THORN, Icelandic
- "thorn" => "\xFE", # small thorn, Icelandic
- "Uacute" => "\xDA", # capital U, acute accent
- "uacute" => "\xFA", # small u, acute accent
- "Ucirc" => "\xDB", # capital U, circumflex accent
- "ucirc" => "\xFB", # small u, circumflex accent
- "Ugrave" => "\xD9", # capital U, grave accent
- "ugrave" => "\xF9", # small u, grave accent
- "Uuml" => "\xDC", # capital U, dieresis or umlaut mark
- "uuml" => "\xFC", # small u, dieresis or umlaut mark
- "Yacute" => "\xDD", # capital Y, acute accent
- "yacute" => "\xFD", # small y, acute accent
- "yuml" => "\xFF", # small y, dieresis or umlaut mark
-
- "lchevron" => "\xAB", # left chevron (double less than)
- "rchevron" => "\xBB", # right chevron (double greater than)
-);
-
-##---------------------------------
-## Function definitions begin here
-##---------------------------------
-
- ## Try to find #columns for the tty
-my %NotUnix = map {($_ => 1)} qw(MacOS MSWin32 VMS MVS);
-sub get_screen {
- ((defined $ENV{TERMCAP}) && ($ENV{TERMCAP} =~ /co#(\d+)/)[0])
- or ((defined $ENV{COLUMNS}) && $ENV{COLUMNS})
- or (!$NotUnix{$^O} && (`stty -a 2>/dev/null` =~ /(\d+) columns/)[0])
- or 72;
-
-}
-
-sub pod2plaintext {
- my ($infile, $outfile) = @_;
- local $_;
- my $text_parser = new Pod::PlainText;
- $text_parser->parse_from_file($infile, $outfile);
-}
-
-##-------------------------------
-## Method definitions begin here
-##-------------------------------
-
-sub new {
- my $this = shift;
- my $class = ref($this) || $this;
- my %params = @_;
- my $self = {%params};
- bless $self, $class;
- $self->initialize();
- return $self;
-}
-
-sub initialize {
- my $self = shift;
- $self->SUPER::initialize();
- return;
-}
-
-sub makespace {
- my $self = shift;
- my $out_fh = $self->output_handle();
- if ($self->{NEEDSPACE}) {
- print $out_fh "\n";
- $self->{NEEDSPACE} = 0;
- }
-}
-
-sub bold {
- my $self = shift;
- my $line = shift;
- my $map = $self->{FONTMAP};
- return $line if $self->{USE_FORMAT};
- if ($self->{TERMCAP}) {
- $line = "$map->{BOLD}$line$map->{NORM}";
- }
- else {
- $line =~ s/(.)/$1\b$1/g;
- }
-# $line = "$map->{BOLD}$line$map->{NORM}" if $self->{ANSIFY};
- return $line;
-}
-
-sub italic {
- my $self = shift;
- my $line = shift;
- my $map = $self->{FONTMAP};
- return $line if $self->{USE_FORMAT};
- if ($self->{TERMCAP}) {
- $line = "$map->{UNDL}$line$map->{NORM}";
- }
- else {
- $line =~ s/(.)/$1\b_/g;
- }
-# $line = "$map->{UNDL}$line$map->{NORM}" if $self->{ANSIFY};
- return $line;
-}
-
-# Fill a paragraph including underlined and overstricken chars.
-# It's not perfect for words longer than the margin, and it's probably
-# slow, but it works.
-sub fill {
- my $self = shift;
- local $_ = shift;
- my $par = "";
- my $indent_space = " " x $self->{INDENT};
- my $marg = $self->{SCREEN} - $self->{INDENT};
- my $line = $indent_space;
- my $line_length;
- foreach (split) {
- my $word_length = length;
- $word_length -= 2 while /\010/g; # Subtract backspaces
-
- if ($line_length + $word_length > $marg) {
- $par .= $line . "\n";
- $line= $indent_space . $_;
- $line_length = $word_length;
- }
- else {
- if ($line_length) {
- $line_length++;
- $line .= " ";
- }
- $line_length += $word_length;
- $line .= $_;
- }
- }
- $par .= "$line\n" if length $line;
- $par .= "\n";
- return $par;
-}
-
-## Handle a pending "item" paragraph. The paragraph (if given) is the
-## corresponding item text. (the item tag should be in $self->{ITEM}).
-sub item {
- my $self = shift;
- my $cmd = shift;
- local $_ = shift;
- my $line = shift;
- $cmd = '' unless (defined $cmd);
- $_ = '' unless (defined $_);
- my $out_fh = $self->output_handle();
- return unless (defined $self->{ITEM});
- my $paratag = $self->{ITEM};
- my $prev_indent = $self->{INDENTS}->[-1] || $self->{DEF_INDENT};
- ## reset state
- undef $self->{ITEM};
- #$self->rm_callbacks('*');
-
- my $over = $self->{INDENT};
- $over -= $prev_indent if ($prev_indent < $over);
- if (length $cmd) { # tricked - this is another command
- $self->output($paratag, INDENT => $prev_indent);
- $self->command($cmd, $_);
- }
- elsif (/^\s+/o) { # verbatim
- $self->output($paratag, INDENT => $prev_indent);
- s/\s+\Z//;
- $self->verbatim($_);
- }
- else { # plain textblock
- $_ = $self->interpolate($_, $line);
- s/\s+\Z//;
- if ((length $_) && (length($paratag) <= $over)) {
- $self->IP_output($paratag, $_);
- }
- else {
- $self->output($paratag, INDENT => $prev_indent);
- $self->output($_, REFORMAT => 1);
- }
- }
-}
-
-sub remap_whitespace {
- my $self = shift;
- local($_) = shift;
- tr/\000-\177/\200-\377/;
- return $_;
-}
-
-sub unmap_whitespace {
- my $self = shift;
- local($_) = shift;
- tr/\200-\377/\000-\177/;
- return $_;
-}
-
-sub IP_output {
- my $self = shift;
- my $tag = shift;
- local($_) = @_;
- my $out_fh = $self->output_handle();
- my $tag_indent = $self->{INDENTS}->[-1] || $self->{DEF_INDENT};
- my $tag_cols = $self->{SCREEN} - $tag_indent;
- my $cols = $self->{SCREEN} - $self->{INDENT};
- $tag =~ s/\s*$//;
- s/\s+/ /g;
- s/^ //;
- my $fmt_name = '_Pod_Text_IP_output_format_';
- my $str = "format $fmt_name = \n"
- . (" " x ($tag_indent))
- . '@' . ('<' x ($self->{INDENT} - $tag_indent - 1))
- . "^" . ("<" x ($cols - 1)) . "\n"
- . '$tag, $_'
- . "\n~~"
- . (" " x ($self->{INDENT} - 2))
- . "^" . ("<" x ($cols - 5)) . "\n"
- . '$_' . "\n\n.\n1";
- #warn $str; warn "tag is $tag, _ is $_";
- {
- ## reset format (turn off warning about redefining a format)
- local($^W) = 0;
- eval $str;
- croak if ($@);
- }
- select((select($out_fh), $~ = $fmt_name)[0]);
- local($:) = ($self->curr_headings(1) eq 'SYNOPSIS') ? "\n " : $: ;
- write $out_fh;
-}
-
-sub output {
- my $self = shift;
- local $_ = shift;
- $_ = '' unless (defined $_);
- return unless (length $_);
- my $out_fh = $self->output_handle();
- my %options;
- if (@_ > 1) {
- ## usage was $self->output($text, NAME=>VALUE, ...);
- %options = @_;
- }
- elsif (@_ == 1) {
- if (ref $_[0]) {
- ## usage was $self->output($text, { NAME=>VALUE, ... } );
- %options = %{$_[0]};
- }
- else {
- ## usage was $self->output($text, $number);
- $options{"REFORMAT"} = shift;
- }
- }
- $options{"INDENT"} = $self->{INDENT} unless (defined $options{"INDENT"});
- if ((defined $options{"REFORMAT"}) && $options{"REFORMAT"}) {
- my $cols = $self->{SCREEN} - $options{"INDENT"};
- s/\s+/ /g;
- s/^ //;
- my $fmt_name = '_Pod_Text_output_format_';
- my $str = "format $fmt_name = \n~~"
- . (" " x ($options{"INDENT"} - 2))
- . "^" . ("<" x ($cols - 5)) . "\n"
- . '$_' . "\n\n.\n1";
- {
- ## reset format (turn off warning about redefining a format)
- local($^W) = 0;
- eval $str;
- croak if ($@);
- }
- select((select($out_fh), $~ = $fmt_name)[0]);
- local($:) = ($self->curr_headings(1) eq 'SYNOPSIS') ? "\n " : $: ;
- write $out_fh;
- }
- else {
- s/^/' ' x $options{"INDENT"}/gem;
- s/^\s+\n$/\n/gm;
- print $out_fh $_;
- }
-}
-
-sub internal_lrefs {
- my $self = shift;
- local $_ = shift;
- s{L</([^>]+)>}{$1}g;
- my(@items) = split( /(?:,?\s+(?:and\s+)?)/ );
- my $retstr = "the ";
- my $i;
- for ($i = 0; $i <= $#items; $i++) {
- $retstr .= "C<$items[$i]>";
- $retstr .= ", " if @items > 2 && $i != $#items;
- $retstr .= " and " if $i+2 == @items;
- }
-
- $retstr .= " entr" . ( @items > 1 ? "ies" : "y" )
- . " elsewhere in this document ";
-
- return $retstr;
-}
-
-sub begin_pod {
- my $self = shift;
-
- $self->{BEGUN} = [];
- $self->{TERMCAP} = 0;
- #$self->{USE_FORMAT} = 1;
-
- $self->{FONTMAP} = {
- UNDL => "\x1b[4m",
- INV => "\x1b[7m",
- BOLD => "\x1b[1m",
- NORM => "\x1b[0m",
- };
- if ($self->{TERMCAP} and (! defined $self->{SETUPTERMCAP})) {
- $self->{SETUPTERMCAP} = 1;
- my ($term) = Tgetent Term::Cap { TERM => undef, OSPEED => 9600 };
- $self->{FONTMAP}->{UNDL} = $term->{'_us'};
- $self->{FONTMAP}->{INV} = $term->{'_mr'};
- $self->{FONTMAP}->{BOLD} = $term->{'_md'};
- $self->{FONTMAP}->{NORM} = $term->{'_me'};
- }
-
- $self->{SCREEN} = &get_screen;
- $self->{FANCY} = 0;
- $self->{DEF_INDENT} = 4;
- $self->{INDENTS} = [];
- $self->{INDENT} = $self->{DEF_INDENT};
- $self->{NEEDSPACE} = 0;
-}
-
-sub end_pod {
- my $self = shift;
- $self->item('', '', '', 0) if (defined $self->{ITEM});
-}
-
-sub begun_excluded {
- my $self = shift;
- my @begun = @{ $self->{BEGUN} };
- return (@begun > 0) ? ($begun[-1] ne 'text') : 0;
-}
-
-sub command {
- my $self = shift;
- my $cmd = shift;
- local $_ = shift;
- my $line = shift;
- $cmd = '' unless (defined $cmd);
- $_ = '' unless (defined $_);
- my $out_fh = $self->output_handle();
-
- return if (($cmd ne 'end') and $self->begun_excluded());
- return $self->item($cmd, $_, $line) if (defined $self->{ITEM});
- $_ = $self->interpolate($_, $line);
- s/\s+\Z/\n/;
-
- return if ($cmd eq 'pod');
- if ($cmd eq 'head1') {
- $self->makespace();
- print $out_fh $_;
- # print $out_fh uc($_);
- }
- elsif ($cmd eq 'head2') {
- $self->makespace();
- # s/(\w+)/\u\L$1/g;
- #print ' ' x $self->{DEF_INDENT}, $_;
- # print "\xA7";
- s/(\w)/\xA7 $1/ if $self->{FANCY};
- print $out_fh ' ' x ($self->{DEF_INDENT}/2), $_, "\n";
- }
- elsif ($cmd eq 'over') {
- /^[-+]?\d+$/ or $_ = $self->{DEF_INDENT};
- push(@{$self->{INDENTS}}, $self->{INDENT});
- $self->{INDENT} += ($_ + 0);
- }
- elsif ($cmd eq 'back') {
- $self->{INDENT} = pop(@{$self->{INDENTS}});
- unless (defined $self->{INDENT}) {
- carp "Unmatched =back\n";
- $self->{INDENT} = $self->{DEF_INDENT};
- }
- }
- elsif ($cmd eq 'begin') {
- my ($kind) = /^(\S*)/;
- push( @{ $self->{BEGUN} }, $kind );
- }
- elsif ($cmd eq 'end') {
- pop( @{ $self->{BEGUN} } );
- }
- elsif ($cmd eq 'for') {
- $self->textblock($1) if /^text\b\s*(.*)$/s;
- }
- elsif ($cmd eq 'item') {
- $self->makespace();
- # s/\A(\s*)\*/$1\xb7/ if $self->{FANCY};
- # s/^(\s*\*\s+)/$1 /;
- $self->{ITEM} = $_;
- #$self->add_callbacks('*', SUB => \&item);
- }
- else {
- carp "Unrecognized directive: $cmd\n";
- }
-}
-
-sub verbatim {
- my $self = shift;
- local $_ = shift;
- my $line = shift;
- return if $self->begun_excluded();
- return $self->item('', $_, $line) if (defined $self->{ITEM});
- $self->output($_);
- #$self->{NEEDSPACE} = 1;
-}
-
-sub textblock {
- my $self = shift;
- my $text = shift;
- my $line = shift;
- return if $self->begun_excluded();
- return $self->item('', $text, $line) if (defined $self->{ITEM});
- local($_) = $self->interpolate($text, $line);
- s/\s*\Z/\n/;
- $self->makespace();
- $self->output($_, REFORMAT => 1);
-}
-
-sub interior_sequence {
- my $self = shift;
- my $cmd = shift;
- my $arg = shift;
- local($_) = $arg;
- if ($cmd eq 'C') {
- my ($pre, $post) = ("`", "'");
- ($pre, $post) = ($HTML_Escapes{"lchevron"}, $HTML_Escapes{"rchevron"})
- if ((defined $self->{FANCY}) && $self->{FANCY});
- $_ = $pre . $_ . $post;
- }
- elsif ($cmd eq 'E') {
- if (defined $HTML_Escapes{$_}) {
- $_ = $HTML_Escapes{$_};
- }
- else {
- carp "Unknown escape: E<$_>";
- $_ = "E<$_>";
- }
- # }
- # elsif ($cmd eq 'B') {
- # $_ = $self->bold($_);
- }
- elsif ($cmd eq 'I') {
- # $_ = $self->italic($_);
- $_ = "*" . $_ . "*";
- }
- elsif (($cmd eq 'X') || ($cmd eq 'Z')) {
- $_ = '';
- }
- elsif ($cmd eq 'S') {
- # Escape whitespace until we are ready to print
- #$_ = $self->remap_whitespace($_);
- }
- elsif ($cmd eq 'L') {
- s/\s+/ /g;
- my ($text, $manpage, $sec, $ref) = ('', $_, '', '');
- if (/\A(.*?)\|(.*)\Z/) {
- $text = $1;
- $manpage = $_ = $2;
- }
- if (/^\s*"\s*(.*)\s*"\s*$/o) {
- ($manpage, $sec) = ('', "\"$1\"");
- }
- elsif (m|\s*/\s*|s) {
- ($manpage, $sec) = split(/\s*\/\s*/, $_, 2);
- }
- if (! length $sec) {
- $ref .= "the $manpage manpage" if (length $manpage);
- }
- elsif ($sec =~ /^\s*"\s*(.*)\s*"\s*$/o) {
- $ref .= "the section on \"$1\"";
- $ref .= " in the $manpage manpage" if (length $manpage);
- }
- else {
- $ref .= "the \"$sec\" entry";
- $ref .= (length $manpage) ? " in the $manpage manpage"
- : " in this manpage"
- }
- $_ = $text || $ref;
- #if ( m{^ ([a-zA-Z][^\s\/]+) (\([^\)]+\))? $}x ) {
- # ## LREF: a manpage(3f)
- # $_ = "the $1$2 manpage";
- #}
- #elsif ( m{^ ([^/]+) / ([:\w]+(\(\))?) $}x ) {
- # ## LREF: an =item on another manpage
- # $_ = "the \"$2\" entry in the $1 manpage";
- #}
- #elsif ( m{^ / ([:\w]+(\(\))?) $}x ) {
- # ## LREF: an =item on this manpage
- # $_ = $self->internal_lrefs($1);
- #}
- #elsif ( m{^ (?: ([a-zA-Z]\S+?) / )? "?(.*?)"? $}x ) {
- # ## LREF: a =head2 (head1?), maybe on a manpage, maybe right here
- # ## the "func" can disambiguate
- # $_ = ((defined $1) && $1)
- # ? "the section on \"$2\" in the $1 manpage"
- # : "the section on \"$2\"";
- #}
- }
- return $_;
-}
-
-1;
diff --git a/lib/Pod/Select.pm b/lib/Pod/Select.pm
index 26cbe021ed..94ded8697a 100644
--- a/lib/Pod/Select.pm
+++ b/lib/Pod/Select.pm
@@ -1,10 +1,7 @@
#############################################################################
# Pod/Select.pm -- function to select portions of POD docs
#
-# Based on Tom Christiansen's pod2text() function
-# (with extensive modifications).
-#
-# Copyright (C) 1996-1999 Tom Christiansen. All rights reserved.
+# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
@@ -13,7 +10,7 @@
package Pod::Select;
use vars qw($VERSION);
-$VERSION = 1.081; ## Current version of this package
+$VERSION = 1.090; ## Current version of this package
require 5.004; ## requires this Perl version or later
#############################################################################
@@ -65,7 +62,7 @@ or
=head1 REQUIRES
-perl5.004, Pod::Parser, Exporter, FileHandle, Carp
+perl5.004, Pod::Parser, Exporter, Carp
=head1 EXPORTS
diff --git a/lib/Pod/Text.pm b/lib/Pod/Text.pm
index 88c594fdd4..1425ea2438 100644
--- a/lib/Pod/Text.pm
+++ b/lib/Pod/Text.pm
@@ -1,16 +1,15 @@
# Pod::Text -- Convert POD data to formatted ASCII text.
-# $Id: Text.pm,v 0.2 1999/06/13 02:44:01 eagle Exp $
+# $Id: Text.pm,v 2.3 1999/10/07 09:41:57 eagle Exp $
#
# Copyright 1999 by Russ Allbery <rra@stanford.edu>
#
# This program is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.
#
-# This module may potentially be a replacement for Pod::Text, although it
-# does not (at the current time) attempt to match the output of Pod::Text
-# and makes several different formatting choices (mostly in the direction of
-# less markup). It uses Pod::Parser and is designed to be very easy to
-# subclass.
+# This module is intended to be a replacement for Pod::Text, and attempts to
+# match its output except for some specific circumstances where other
+# decisions seemed to produce better output. It uses Pod::Parser and is
+# designed to be very easy to subclass.
############################################################################
# Modules and declarations
@@ -20,15 +19,21 @@ package Pod::Text;
require 5.004;
-use Carp qw(carp);
-use Pod::Parser ();
+use Carp qw(carp croak);
+use Exporter ();
+use Pod::Select ();
use strict;
-use vars qw(@ISA %ESCAPES $VERSION);
+use vars qw(@ISA @EXPORT %ESCAPES $VERSION);
-@ISA = qw(Pod::Parser);
+# We inherit from Pod::Select instead of Pod::Parser so that we can be used
+# by Pod::Usage.
+@ISA = qw(Pod::Select Exporter);
-$VERSION = '0.01';
+# We have to export pod2text for backward compatibility.
+@EXPORT = qw(pod2text);
+
+($VERSION = (split (' ', q$Revision: 2.3 $ ))[1]) =~ s/\.(\d)$/.0$1/;
############################################################################
@@ -36,8 +41,8 @@ $VERSION = '0.01';
############################################################################
# This table is taken near verbatim from Pod::PlainText in Pod::Parser,
-# which got it near verbatim from Pod::Text. It is therefore credited to
-# Tom Christiansen, and I'm glad I didn't have to write it. :)
+# which got it near verbatim from the original Pod::Text. It is therefore
+# credited to Tom Christiansen, and I'm glad I didn't have to write it. :)
%ESCAPES = (
'amp' => '&', # ampersand
'lt' => '<', # left chevron, less-than
@@ -126,7 +131,6 @@ sub initialize {
$$self{sentence} = 0 unless defined $$self{sentence};
$$self{width} = 76 unless defined $$self{width};
- $$self{BEGUN} = []; # Stack of =begin blocks.
$$self{INDENTS} = []; # Stack of indentations.
$$self{MARGIN} = $$self{indent}; # Current left margin in spaces.
@@ -168,14 +172,16 @@ sub verbatim {
# Called for a regular text block. Gets the paragraph, the line number, and
# a Pod::Paragraph object. Perform interpolation and output the results.
sub textblock {
- my ($self, $text, $line) = @_;
+ my $self = shift;
return if $$self{EXCLUDE};
- local $_ = $text;
+ $self->output ($_[0]), return if $$self{VERBATIM};
+ local $_ = shift;
+ my $line = shift;
# Perform a little magic to collapse multiple L<> references. This is
- # here mostly for backwards-compatibility with Pod::Text. We'll just
- # rewrite the whole thing into actual text at this part, bypassing the
- # whole internal sequence parsing thing.
+ # here mostly for backwards-compatibility. We'll just rewrite the whole
+ # thing into actual text at this part, bypassing the whole internal
+ # sequence parsing thing.
s{
(
L< # A link of the form L</something>.
@@ -233,13 +239,17 @@ sub interior_sequence {
# Expand escapes into the actual character now, carping if invalid.
if ($command eq 'E') {
- return $ESCAPES{$_} if defined $ESCAPES{$_};
- carp "Unknown escape: E<$_>";
- return "E<$_>";
+ if (/^\d+$/) {
+ return chr;
+ } else {
+ return $ESCAPES{$_} if defined $ESCAPES{$_};
+ carp "Unknown escape: E<$_>";
+ return "E<$_>";
+ }
}
# For all the other sequences, empty content produces no output.
- return unless $_;
+ return if $_ eq '';
# For S<>, compress all internal whitespace and then map spaces to \01.
# When we output the text, we'll map this back.
@@ -279,6 +289,7 @@ sub cmd_head1 {
my $self = shift;
local $_ = shift;
s/\s+$//;
+ $_ = $self->interpolate ($_, shift);
if ($$self{alt}) {
$self->output ("\n==== $_ ====\n\n");
} else {
@@ -292,6 +303,7 @@ sub cmd_head2 {
my $self = shift;
local $_ = shift;
s/\s+$//;
+ $_ = $self->interpolate ($_, shift);
if ($$self{alt}) {
$self->output ("\n== $_ ==\n\n");
} else {
@@ -327,38 +339,35 @@ sub cmd_item {
$$self{ITEM} = $self->interpolate ($_);
}
-# Begin a block for a particular translator. To allow for weird nested
-# =begin blocks, keep track of how many blocks we were excluded from and
-# only unwind one level with each =end.
+# Begin a block for a particular translator. Setting VERBATIM triggers
+# special handling in textblock().
sub cmd_begin {
my $self = shift;
local $_ = shift;
my ($kind) = /^(\S+)/ or return;
- push (@{ $$self{BEGUN} }, $kind);
- $$self{EXCLUDE}++ unless $kind eq 'text';
+ if ($kind eq 'text') {
+ $$self{VERBATIM} = 1;
+ } else {
+ $$self{EXCLUDE} = 1;
+ }
}
# End a block for a particular translator. We assume that all =begin/=end
-# pairs are properly nested and just pop the previous one.
+# pairs are properly closed.
sub cmd_end {
my $self = shift;
- my $kind = pop @{ $$self{BEGUN} };
- $$self{EXCLUDE}-- if $$self{EXCLUDE};
+ $$self{EXCLUDE} = 0;
+ $$self{VERBATIM} = 0;
}
# One paragraph for a particular translator. Ignore it unless it's intended
-# for text, in which case we treat it as either a normal text block or a
-# verbatim text block, depending on whether it's indented.
+# for text, in which case we treat it as a verbatim text block.
sub cmd_for {
my $self = shift;
local $_ = shift;
my $line = shift;
- return unless s/^text\b[ \t]*//;
- if (/^\n\s+/) {
- $self->verbatim ($_, $line);
- } else {
- $self->textblock ($_, $line);
- }
+ return unless s/^text\b[ \t]*\n?//;
+ $self->verbatim ($_, $line);
}
@@ -368,9 +377,9 @@ sub cmd_for {
# The simple formatting ones. These are here mostly so that subclasses can
# override them and do more complicated things.
-sub seq_b { my $self = shift; return $$self{alt} ? "``$_[0]''" : $_[0] }
-sub seq_c { my $self = shift; return $$self{alt} ? "``$_[0]''" : "`$_[0]'" }
-sub seq_f { my $self = shift; return $$self{alt} ? "\"$_[0]\"" : $_[0] }
+sub seq_b { return $_[0]{alt} ? "``$_[1]''" : $_[1] }
+sub seq_c { return $_[0]{alt} ? "``$_[1]''" : "`$_[1]'" }
+sub seq_f { return $_[0]{alt} ? "\"$_[1]\"" : $_[1] }
sub seq_i { return '*' . $_[1] . '*' }
# The complicated one. Handle links. Since this is plain text, we can't
@@ -389,7 +398,6 @@ sub seq_l {
# Okay, leading and trailing whitespace isn't important; get rid of it.
s/^\s+//;
s/\s+$//;
- chomp;
# Default to using the whole content of the link entry as a section
# name. Note that L<manpage/> forces a manpage interpretation, as does
@@ -447,7 +455,12 @@ sub item {
my $space = ' ' x $indent;
$space =~ s/^ /:/ if $$self{alt};
if (!$_ || /^\s+$/ || ($$self{MARGIN} - $indent < length ($tag) + 1)) {
- $self->output ($space . $tag . "\n");
+ my $margin = $$self{MARGIN};
+ $$self{MARGIN} = $indent;
+ my $output = $self->reformat ($tag);
+ $output =~ s/\n*$/\n/;
+ $self->output ($output);
+ $$self{MARGIN} = $margin;
$self->output ($self->reformat ($_)) if /\S/;
} else {
$_ = $self->reformat ($_);
@@ -509,6 +522,49 @@ sub output { $_[1] =~ tr/\01/ /; print { $_[0]->output_handle } $_[1] }
############################################################################
+# Backwards compatibility
+############################################################################
+
+# The old Pod::Text module did everything in a pod2text() function. This
+# tries to provide the same interface for legacy applications.
+sub pod2text {
+ my @args;
+
+ # This is really ugly; I hate doing option parsing in the middle of a
+ # module. But the old Pod::Text module supported passing flags to its
+ # entry function, so handle -a and -<number>.
+ while ($_[0] =~ /^-/) {
+ my $flag = shift;
+ if ($flag eq '-a') { push (@args, alt => 1) }
+ elsif ($flag =~ /^-(\d+)$/) { push (@args, width => $1) }
+ else {
+ unshift (@_, $flag);
+ last;
+ }
+ }
+
+ # Now that we know what arguments we're using, create the parser.
+ my $parser = Pod::Text->new (@args);
+
+ # If two arguments were given, the second argument is going to be a file
+ # handle. That means we want to call parse_from_filehandle(), which
+ # means we need to turn the first argument into a file handle. Magic
+ # open will handle the <&STDIN case automagically.
+ if (defined $_[1]) {
+ local *IN;
+ unless (open (IN, $_[0])) {
+ croak ("Can't open $_[0] for reading: $!\n");
+ return;
+ }
+ $_[0] = \*IN;
+ return $parser->parse_from_filehandle (@_);
+ } else {
+ return $parser->parse_from_file (@_);
+ }
+}
+
+
+############################################################################
# Module return value and documentation
############################################################################
@@ -532,17 +588,17 @@ Pod::Text - Convert POD data to formatted ASCII text
=head1 DESCRIPTION
-Pod::Text is a module that can convert documentation in the POD format
-(such as can be found throughout the Perl distribution) into formatted
-ASCII. It uses no special formatting controls or codes whatsoever, and its
-output is therefore suitable for nearly any device.
+Pod::Text is a module that can convert documentation in the POD format (the
+preferred language for documenting Perl) into formatted ASCII. It uses no
+special formatting controls or codes whatsoever, and its output is therefore
+suitable for nearly any device.
-As a derived class from Pod::Parser, Pod::Text supports the same
-methods and interfaces. See L<Pod::Parser> for all the details; briefly,
-one creates a new parser with C<Pod::Text-E<gt>new()> and then calls
-either C<parse_from_filehandle()> or C<parse_from_file()>.
+As a derived class from Pod::Parser, Pod::Text supports the same methods and
+interfaces. See L<Pod::Parser> for all the details; briefly, one creates a
+new parser with C<Pod::Text-E<gt>new()> and then calls either
+parse_from_filehandle() or parse_from_file().
-C<new()> can take options, in the form of key/value pairs, that control the
+new() can take options, in the form of key/value pairs, that control the
behavior of the parser. The currently recognized options are:
=over 4
@@ -569,8 +625,8 @@ output.
=item sentence
-If set to a true value, Pod::Text will assume that each sentence ends
-in two spaces, and will try to preserve that spacing. If set to false, all
+If set to a true value, Pod::Text will assume that each sentence ends in two
+spaces, and will try to preserve that spacing. If set to false, all
consecutive whitespace in non-verbatim paragraphs is compressed into a
single space. Defaults to true.
@@ -580,49 +636,67 @@ The column at which to wrap text on the right-hand side. Defaults to 76.
=back
-The standard Pod::Parser method C<parse_from_filehandle()> takes up to two
+The standard Pod::Parser method parse_from_filehandle() takes up to two
arguments, the first being the file handle to read POD from and the second
being the file handle to write the formatted output to. The first defaults
to STDIN if not given, and the second defaults to STDOUT. The method
-C<parse_from_file()> is almost identical, except that its two arguments are
-the input and output disk files instead. See L<Pod::Parser> for the
-specific details.
+parse_from_file() is almost identical, except that its two arguments are the
+input and output disk files instead. See L<Pod::Parser> for the specific
+details.
=head1 DIAGNOSTICS
=over 4
+=item Bizarre space in item
+
+(W) Something has gone wrong in internal C<=item> processing. This message
+indicates a bug in Pod::Text; you should never see it.
+
+=item Can't open %s for reading: %s
+
+(F) Pod::Text was invoked via the compatibility mode pod2text() interface
+and the input file it was given could not be opened.
+
=item Unknown escape: %s
-The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text
-didn't know about.
+(W) The POD source contained an C<EE<lt>E<gt>> escape that Pod::Text didn't
+know about.
=item Unknown sequence: %s
-The POD source contained a non-standard internal sequence (something of the
-form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
+(W) The POD source contained a non-standard internal sequence (something of
+the form C<XE<lt>E<gt>>) that Pod::Text didn't know about.
=item Unmatched =back
-Pod::Text encountered a C<=back> command that didn't correspond to an
+(W) Pod::Text encountered a C<=back> command that didn't correspond to an
C<=over> command.
=back
+=head1 RESTRICTIONS
+
+Embedded Ctrl-As (octal 001) in the input will be mapped to spaces on
+output, due to an internal implementation detail.
+
=head1 NOTES
-I'm hoping this module will eventually replace Pod::Text in Perl core once
-Pod::Parser has been added to Perl core. Accordingly, don't be surprised if
-the name of this module changes to Pod::Text down the road.
+This is a replacement for an earlier Pod::Text module written by Tom
+Christiansen. It has a revamped interface, since it now uses Pod::Parser,
+but an interface roughly compatible with the old Pod::Text::pod2text()
+function is still available. Please change to the new calling convention,
+though.
The original Pod::Text contained code to do formatting via termcap
sequences, although it wasn't turned on by default and it was problematic to
-get it to work at all. This module doesn't even try to do that, but a
-subclass of it does. Look for Pod::Text::Termcap.
+get it to work at all. This rewrite doesn't even try to do that, but a
+subclass of it does. Look for L<Pod::Text::Termcap|Pod::Text::Termcap>.
=head1 SEE ALSO
-L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>
+L<Pod::Parser|Pod::Parser>, L<Pod::Text::Termcap|Pod::Text::Termcap>,
+pod2text(1)
=head1 AUTHOR
diff --git a/lib/Pod/Text/Color.pm b/lib/Pod/Text/Color.pm
index 5eac57ca9f..10e1d9fa30 100644
--- a/lib/Pod/Text/Color.pm
+++ b/lib/Pod/Text/Color.pm
@@ -1,5 +1,5 @@
# Pod::Text::Color -- Convert POD data to formatted color ASCII text
-# $Id: Color.pm,v 0.1 1999/06/13 02:41:06 eagle Exp $
+# $Id: Color.pm,v 0.5 1999/09/20 10:15:16 eagle Exp $
#
# Copyright 1999 by Russ Allbery <rra@stanford.edu>
#
@@ -27,7 +27,7 @@ use vars qw(@ISA $VERSION);
@ISA = qw(Pod::Text);
# Use the CVS revision of this file as its version number.
-($VERSION = (split (' ', q$Revision: 0.1 $ ))[1]) =~ s/\.(\d)$/.0$1/;
+($VERSION = (split (' ', q$Revision: 0.5 $ ))[1]) =~ s/\.(\d)$/.0$1/;
############################################################################
@@ -100,10 +100,19 @@ Pod::Text::Color - Convert POD data to formatted color ASCII text
=head1 DESCRIPTION
-Pod::Text::Color is a simple subclass of Pod::Text that highlights
-output text using ANSI color escape sequences. Apart from the color, it in
-all ways functions like Pod::Text. See L<Pod::Text> for details
-and available options.
+Pod::Text::Color is a simple subclass of Pod::Text that highlights output
+text using ANSI color escape sequences. Apart from the color, it in all
+ways functions like Pod::Text. See L<Pod::Text> for details and available
+options.
+
+Term::ANSIColor is used to get colors and therefore must be installed to use
+this module.
+
+=head1 BUGS
+
+This is just a basic proof of concept. It should be seriously expanded to
+support configurable coloration via options passed to the constructor, and
+B<pod2text> should be taught about those.
=head1 SEE ALSO
diff --git a/lib/Pod/Text/Termcap.pm b/lib/Pod/Text/Termcap.pm
index efb71a69ba..7e89ec61be 100644
--- a/lib/Pod/Text/Termcap.pm
+++ b/lib/Pod/Text/Termcap.pm
@@ -1,14 +1,14 @@
# Pod::Text::Termcap -- Convert POD data to ASCII text with format escapes.
-# $Id: Termcap.pm,v 0.1 1999/06/13 02:41:06 eagle Exp $
+# $Id: Termcap.pm,v 0.4 1999/09/20 10:17:45 eagle Exp $
#
# Copyright 1999 by Russ Allbery <rra@stanford.edu>
#
# This program is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.
#
-# This is a simple subclass of Pod::Text that overrides a few key
-# methods to output the right termcap escape sequences for formatted text
-# on the current terminal type.
+# This is a simple subclass of Pod::Text that overrides a few key methods to
+# output the right termcap escape sequences for formatted text on the
+# current terminal type.
############################################################################
# Modules and declarations
@@ -21,13 +21,14 @@ require 5.004;
use Pod::Text ();
use POSIX ();
use Term::Cap;
+
use strict;
use vars qw(@ISA $VERSION);
@ISA = qw(Pod::Text);
# Use the CVS revision of this file as its version number.
-($VERSION = (split (' ', q$Revision: 0.1 $ ))[1]) =~ s/\.(\d)$/.0$1/;
+($VERSION = (split (' ', q$Revision: 0.4 $ ))[1]) =~ s/\.(\d)$/.0$1/;
############################################################################
@@ -125,10 +126,10 @@ Pod::Text::Color - Convert POD data to ASCII text with format escapes
=head1 DESCRIPTION
-Pod::Text::Termcap is a simple subclass of Pod::Text that highlights
-output text using the correct termcap escape sequences for the current
-terminal. Apart from the format codes, it in all ways functions like
-Pod::Text. See L<Pod::Text> for details and available options.
+Pod::Text::Termcap is a simple subclass of Pod::Text that highlights output
+text using the correct termcap escape sequences for the current terminal.
+Apart from the format codes, it in all ways functions like Pod::Text. See
+L<Pod::Text> for details and available options.
=head1 SEE ALSO
diff --git a/lib/Pod/Usage.pm b/lib/Pod/Usage.pm
index 9cb71e0afa..6e6fb7bb80 100644
--- a/lib/Pod/Usage.pm
+++ b/lib/Pod/Usage.pm
@@ -1,10 +1,7 @@
#############################################################################
# Pod/Usage.pm -- print usage messages for the running script.
#
-# Based on Tom Christiansen's Pod::Text::pod2text() function
-# (with modifications).
-#
-# Copyright (C) 1994-1999 Tom Christiansen. All rights reserved.
+# Copyright (C) 1996-1999 by Bradford Appleton. All rights reserved.
# This file is part of "PodParser". PodParser is free software;
# you can redistribute it and/or modify it under the same terms
# as Perl itself.
@@ -13,7 +10,7 @@
package Pod::Usage;
use vars qw($VERSION);
-$VERSION = 1.081; ## Current version of this package
+$VERSION = 1.090; ## Current version of this package
require 5.004; ## requires this Perl version or later
=head1 NAME
@@ -363,12 +360,21 @@ use strict;
#use diagnostics;
use Carp;
use Exporter;
-use Pod::PlainText;
use File::Spec;
use vars qw(@ISA @EXPORT);
-@ISA = qw(Pod::PlainText);
@EXPORT = qw(&pod2usage);
+BEGIN {
+ if ( $] >= 5.005_58 ) {
+ require Pod::Text;
+ @ISA = qw( Pod::Text );
+ }
+ else {
+ require Pod::PlainText;
+ @ISA = qw( Pod::PlainText );
+ }
+}
+
##---------------------------------------------------------------------------
diff --git a/lib/Sys/Hostname.pm b/lib/Sys/Hostname.pm
index e96822e414..4d93f91f9e 100644
--- a/lib/Sys/Hostname.pm
+++ b/lib/Sys/Hostname.pm
@@ -65,6 +65,10 @@ sub hostname {
chomp($host = `hostname 2> NUL`) unless defined $host;
return $host;
}
+ elsif ($^O eq 'epoc') {
+ $host = 'localhost';
+ return $host;
+ }
else { # Unix
# method 2 - syscall is preferred since it avoids tainting problems
diff --git a/lib/Text/Tabs.pm b/lib/Text/Tabs.pm
index acd7afb7d6..c431019908 100644
--- a/lib/Text/Tabs.pm
+++ b/lib/Text/Tabs.pm
@@ -7,7 +7,7 @@ require Exporter;
@EXPORT = qw(expand unexpand $tabstop);
use vars qw($VERSION $tabstop $debug);
-$VERSION = 96.121201;
+$VERSION = 98.112801;
use strict;
@@ -18,7 +18,7 @@ BEGIN {
sub expand
{
- my @l = @_;
+ my (@l) = @_;
for $_ (@l) {
1 while s/(^|\n)([^\t\n]*)(\t+)/
$1. $2 . (" " x
@@ -32,7 +32,7 @@ sub expand
sub unexpand
{
- my @l = @_;
+ my (@l) = @_;
my @e;
my $x;
my $line;
diff --git a/lib/Tie/Array.pm b/lib/Tie/Array.pm
index 3f34c3b81f..5ef83c4781 100644
--- a/lib/Tie/Array.pm
+++ b/lib/Tie/Array.pm
@@ -1,7 +1,8 @@
package Tie::Array;
use vars qw($VERSION);
use strict;
-$VERSION = '1.00';
+use Carp;
+$VERSION = '1.01';
# Pod documentation after __END__ below.
@@ -74,6 +75,16 @@ sub SPLICE
return @result;
}
+sub EXISTS {
+ my $pkg = ref $_[0];
+ croak "$pkg dosn't define an EXISTS method";
+}
+
+sub DELETE {
+ my $pkg = ref $_[0];
+ croak "$pkg dosn't define a DELETE method";
+}
+
package Tie::StdArray;
use vars qw(@ISA);
@ISA = 'Tie::Array';
@@ -88,6 +99,8 @@ sub POP { pop(@{$_[0]}) }
sub PUSH { my $o = shift; push(@$o,@_) }
sub SHIFT { shift(@{$_[0]}) }
sub UNSHIFT { my $o = shift; unshift(@$o,@_) }
+sub EXISTS { exists $_[0]->[$_[1]] }
+sub DELETE { delete $_[0]->[$_[1]] }
sub SPLICE
{
@@ -120,6 +133,8 @@ Tie::Array - base class for tied arrays
sub STORE { ... } # mandatory if elements writeable
sub STORESIZE { ... } # mandatory if elements can be added/deleted
+ sub EXISTS { ... } # mandatory if exists() expected to work
+ sub DELETE { ... } # mandatory if delete() expected to work
# optional methods - for efficiency
sub CLEAR { ... }
@@ -150,9 +165,11 @@ Tie::Array - base class for tied arrays
This module provides methods for array-tying classes. See
L<perltie> for a list of the functions required in order to tie an array
-to a package. The basic B<Tie::Array> package provides stub C<DELETE>
-and C<EXTEND> methods, and implementations of C<PUSH>, C<POP>, C<SHIFT>,
-C<UNSHIFT>, C<SPLICE> and C<CLEAR> in terms of basic C<FETCH>, C<STORE>,
+to a package. The basic B<Tie::Array> package provides stub C<DESTROY>,
+and C<EXTEND> methods that do nothing, stub C<DELETE> and C<EXISTS>
+methods that croak() if the delete() or exists() builtins are ever called
+on the tied array, and implementations of C<PUSH>, C<POP>, C<SHIFT>,
+C<UNSHIFT>, C<SPLICE> and C<CLEAR> in terms of basic C<FETCH>, C<STORE>,
C<FETCHSIZE>, C<STORESIZE>.
The B<Tie::StdArray> package provides efficient methods required for tied arrays
@@ -203,6 +220,18 @@ deleted.
Informative call that array is likely to grow to have I<count> entries.
Can be used to optimize allocation. This method need do nothing.
+=item EXISTS this, key
+
+Verify that the element at index I<key> exists in the tied array I<this>.
+
+The B<Tie::Array> implementation is a stub that simply croaks.
+
+=item DELETE this, key
+
+Delete the element at index I<key> from the tied array I<this>.
+
+The B<Tie::Array> implementation is a stub that simply croaks.
+
=item CLEAR this
Clear (remove, delete, ...) all values from the tied array associated with
diff --git a/lib/Tie/Hash.pm b/lib/Tie/Hash.pm
index 2902efb4d0..928b798e45 100644
--- a/lib/Tie/Hash.pm
+++ b/lib/Tie/Hash.pm
@@ -73,6 +73,8 @@ Return the next key for the hash.
Verify that I<key> exists with the tied hash I<this>.
+The B<Tie::Hash> implementation is a stub that simply croaks.
+
=item DELETE this, key
Delete the key I<key> from the tied hash I<this>.
diff --git a/lib/Time/Local.pm b/lib/Time/Local.pm
index 75bcc38eea..f3f6f542a6 100644
--- a/lib/Time/Local.pm
+++ b/lib/Time/Local.pm
@@ -3,8 +3,9 @@ require 5.000;
require Exporter;
use Carp;
-@ISA = qw(Exporter);
-@EXPORT = qw(timegm timelocal);
+@ISA = qw( Exporter );
+@EXPORT = qw( timegm timelocal );
+@EXPORT_OK = qw( timegm_nocheck timelocal_nocheck );
# Set up constants
$SEC = 1;
@@ -17,6 +18,8 @@ use Carp;
$breakpoint = ($thisYear + 50) % 100;
$nextCentury += 100 if $breakpoint < 50;
+my %options;
+
sub timegm {
my (@date) = @_;
if ($date[5] > 999) {
@@ -35,6 +38,11 @@ sub timegm {
+ ($date[3]-1) * $DAY;
}
+sub timegm_nocheck {
+ local $options{no_range_check} = 1;
+ &timegm;
+}
+
sub timelocal {
my $t = &timegm;
my $tt = $t;
@@ -44,14 +52,13 @@ sub timelocal {
if ($t < $DAY and ($lt[5] >= 70 or $gt[5] >= 70 )) {
# Wrap error, too early a date
# Try a safer date
- $tt = $DAY;
+ $tt += $DAY;
@lt = localtime($tt);
@gt = gmtime($tt);
}
my $tzsec = ($gt[1] - $lt[1]) * $MIN + ($gt[2] - $lt[2]) * $HR;
- my($lday,$gday) = ($lt[7],$gt[7]);
if($lt[5] > $gt[5]) {
$tzsec -= $DAY;
}
@@ -70,14 +77,21 @@ sub timelocal {
$time;
}
+sub timelocal_nocheck {
+ local $options{no_range_check} = 1;
+ &timelocal;
+}
+
sub cheat {
$year = $_[5];
$month = $_[4];
- croak "Month '$month' out of range 0..11" if $month > 11 || $month < 0;
- croak "Day '$_[3]' out of range 1..31" if $_[3] > 31 || $_[3] < 1;
- croak "Hour '$_[2]' out of range 0..23" if $_[2] > 23 || $_[2] < 0;
- croak "Minute '$_[1]' out of range 0..59" if $_[1] > 59 || $_[1] < 0;
- croak "Second '$_[0]' out of range 0..59" if $_[0] > 59 || $_[0] < 0;
+ unless ($options{no_range_check}) {
+ croak "Month '$month' out of range 0..11" if $month > 11 || $month < 0;
+ croak "Day '$_[3]' out of range 1..31" if $_[3] > 31 || $_[3] < 1;
+ croak "Hour '$_[2]' out of range 0..23" if $_[2] > 23 || $_[2] < 0;
+ croak "Minute '$_[1]' out of range 0..59" if $_[1] > 59 || $_[1] < 0;
+ croak "Second '$_[0]' out of range 0..59" if $_[0] > 59 || $_[0] < 0;
+ }
$guess = $^T;
@g = gmtime($guess);
$lastguess = "";
@@ -137,6 +151,27 @@ the values provided. While the day of the month is expected to be in
the range 1..31, the month should be in the range 0..11.
This is consistent with the values returned from localtime() and gmtime().
+The timelocal() and timegm() functions perform range checking on the
+input $sec, $min, $hours, $mday, and $mon values by default. If you'd
+rather they didn't, you can explicitly import the timelocal_nocheck()
+and timegm_nocheck() functions.
+
+ use Time::Local 'timelocal_nocheck';
+
+ {
+ # The 365th day of 1999
+ print scalar localtime timelocal_nocheck 0,0,0,365,0,99;
+
+ # The twenty thousandth day since 1970
+ print scalar localtime timelocal_nocheck 0,0,0,20000,0,70;
+
+ # And even the 10,000,000th second since 1999!
+ print scalar localtime timelocal_nocheck 10000000,0,0,1,0,99;
+ }
+
+Your mileage may vary when trying these with minutes and hours,
+and it doesn't work at all for months.
+
Strictly speaking, the year should also be specified in a form consistent
with localtime(), i.e. the offset from 1900.
In order to make the interpretation of the year easier for humans,
diff --git a/lib/attributes.pm b/lib/attributes.pm
index e49204fc76..09f355139f 100644
--- a/lib/attributes.pm
+++ b/lib/attributes.pm
@@ -1,9 +1,10 @@
package attributes;
-$VERSION = 0.01;
+$VERSION = 0.02;
-#@EXPORT_OK = qw(get reftype);
-#@EXPORT = ();
+@EXPORT_OK = qw(get reftype);
+@EXPORT = ();
+%EXPORT_TAGS = (ALL => [@EXPORT, @EXPORT_OK]);
use strict;
@@ -29,8 +30,10 @@ sub carp {
BEGIN { bootstrap }
sub import {
- @_ > 2 && ref $_[2] or
- croak 'Usage: use '.__PACKAGE__.' $home_stash, $ref, @attrlist';
+ @_ > 2 && ref $_[2] or do {
+ require Exporter;
+ goto &Exporter::import;
+ };
my (undef,$home_stash,$svref,@attrs) = @_;
my $svtype = uc reftype($svref);
@@ -82,12 +85,7 @@ sub get ($) {
;
}
-#sub export {
-# require Exporter;
-# goto &Exporter::import;
-#}
-#
-#sub require_version { goto &UNIVERSAL::VERSION }
+sub require_version { goto &UNIVERSAL::VERSION }
1;
__END__
@@ -106,13 +104,16 @@ attributes - get/set subroutine or variable attributes
use attributes (); # optional, to get subroutine declarations
my @attrlist = attributes::get(\&foo);
+ use attributes 'get'; # import the attributes::get subroutine
+ my @attrlist = get \&foo;
+
=head1 DESCRIPTION
Subroutine declarations and definitions may optionally have attribute lists
associated with them. (Variable C<my> declarations also may, but see the
warning below.) Perl handles these declarations by passing some information
about the call site and the thing being declared along with the attribute
-list to this module. In particular, first example above is equivalent to
+list to this module. In particular, the first example above is equivalent to
the following:
use attributes __PACKAGE__, \&foo, 'method';
@@ -187,7 +188,7 @@ empty. If passed invalid arguments, it uses die() (via L<Carp::croak|Carp>)
to raise a fatal exception. If it can find an appropriate package name
for a class method lookup, it will include the results from a
C<FETCH_I<type>_ATTRIBUTES> call in its return list, as described in
-L"Package-specific Attribute Handling"> below.
+L<"Package-specific Attribute Handling"> below.
Otherwise, only L<built-in attributes|"Built-in Attributes"> will be returned.
=item reftype
@@ -196,13 +197,11 @@ This routine expects a single parameter--a reference to a subroutine or
variable. It returns the built-in type of the referenced variable,
ignoring any package into which it might have been blessed.
This can be useful for determining the I<type> value which forms part of
-the method names described in L"Package-specific Attribute Handling"> below.
+the method names described in L<"Package-specific Attribute Handling"> below.
=back
-Note that these routines are I<not> exported. This is primarily because
-the C<use> mechanism which would normally import them is already in use
-by Perl itself to implement the C<sub : attributes> syntax.
+Note that these routines are I<not> exported by default.
=head2 Package-specific Attribute Handling
@@ -289,6 +288,20 @@ Some examples of syntactically invalid attribute lists (with annotation):
Y2::north # "Y2::north" not a simple identifier
foo + bar # "+" neither a comma nor whitespace
+=head1 EXPORTS
+
+=head2 Default exports
+
+None.
+
+=head2 Available exports
+
+The routines C<get> and C<reftype> are exportable.
+
+=head2 Export tags defined
+
+The C<:ALL> tag will get all of the above exports.
+
=head1 EXAMPLES
Here are some samples of syntactically valid declarations, with annotation
diff --git a/lib/bigfloat.pl b/lib/bigfloat.pl
index 6af5f17303..8c28abdcd1 100644
--- a/lib/bigfloat.pl
+++ b/lib/bigfloat.pl
@@ -79,7 +79,12 @@ sub norm { #(mantissa, exponent) return fnum_str
sub main'fneg { #(fnum_str) return fnum_str
local($_) = &'fnorm($_[$[]);
vec($_,0,8) ^= ord('+') ^ ord('-') unless $_ eq '+0E+0'; # flip sign
- s/^H/N/;
+ if ( ord("\t") == 9 ) { # ascii
+ s/^H/N/;
+ }
+ else { # ebcdic character set
+ s/\373/N/;
+ }
$_;
}
diff --git a/lib/byte.pm b/lib/byte.pm
new file mode 100644
index 0000000000..cc23b40f4f
--- /dev/null
+++ b/lib/byte.pm
@@ -0,0 +1,33 @@
+package byte;
+
+sub import {
+ $^H |= 0x00000010;
+}
+
+sub unimport {
+ $^H &= ~0x00000010;
+}
+
+sub AUTOLOAD {
+ require "byte_heavy.pl";
+ goto &$AUTOLOAD;
+}
+
+sub length ($);
+
+1;
+__END__
+
+=head1 NAME
+
+byte - Perl pragma to turn force treating strings as bytes not UNICODE
+
+=head1 SYNOPSIS
+
+ use byte;
+ no byte;
+
+=head1 DESCRIPTION
+
+
+=cut
diff --git a/lib/byte_heavy.pl b/lib/byte_heavy.pl
new file mode 100644
index 0000000000..07c908a689
--- /dev/null
+++ b/lib/byte_heavy.pl
@@ -0,0 +1,8 @@
+package byte;
+
+sub length ($)
+{
+ return CORE::length($_[0]);
+}
+
+1;
diff --git a/lib/constant.pm b/lib/constant.pm
index 5d3dd91b46..31f47fbf54 100644
--- a/lib/constant.pm
+++ b/lib/constant.pm
@@ -1,6 +1,112 @@
package constant;
-$VERSION = '1.00';
+use strict;
+use vars qw( $VERSION %declared );
+$VERSION = '1.01';
+
+#=======================================================================
+
+require 5.005_62;
+
+# Some names are evil choices.
+my %keywords = map +($_, 1), qw{ BEGIN INIT STOP END DESTROY AUTOLOAD };
+
+my %forced_into_main = map +($_, 1),
+ qw{ STDIN STDOUT STDERR ARGV ARGVOUT ENV INC SIG };
+
+my %forbidden = (%keywords, %forced_into_main);
+
+#=======================================================================
+# import() - import symbols into user's namespace
+#
+# What we actually do is define a function in the caller's namespace
+# which returns the value. The function we create will normally
+# be inlined as a constant, thereby avoiding further sub calling
+# overhead.
+#=======================================================================
+sub import {
+ my $class = shift;
+ return unless @_; # Ignore 'use constant;'
+ my $name = shift;
+ unless (defined $name) {
+ require Carp;
+ Carp::croak("Can't use undef as constant name");
+ }
+ my $pkg = caller;
+
+ # Normal constant name
+ if ($name =~ /^(?:[A-Z]\w|_[A-Z])\w*\z/ and !$forbidden{$name}) {
+ # Everything is okay
+
+ # Name forced into main, but we're not in main. Fatal.
+ } elsif ($forced_into_main{$name} and $pkg ne 'main') {
+ require Carp;
+ Carp::croak("Constant name '$name' is forced into main::");
+
+ # Starts with double underscore. Fatal.
+ } elsif ($name =~ /^__/) {
+ require Carp;
+ Carp::croak("Constant name '$name' begins with '__'");
+
+ # Maybe the name is tolerable
+ } elsif ($name =~ /^[A-Za-z_]\w*\z/) {
+ # Then we'll warn only if you've asked for warnings
+ if ($^W) {
+ require Carp;
+ if ($keywords{$name}) {
+ Carp::carp("Constant name '$name' is a Perl keyword");
+ } elsif ($forced_into_main{$name}) {
+ Carp::carp("Constant name '$name' is " .
+ "forced into package main::");
+ } elsif (1 == length $name) {
+ Carp::carp("Constant name '$name' is too short");
+ } elsif ($name =~ /^_?[a-z\d]/) {
+ Carp::carp("Constant name '$name' should " .
+ "have an initial capital letter");
+ } else {
+ # Catch-all - what did I miss? If you get this error,
+ # please let me know what your constant's name was.
+ # Write to <rootbeer@redcat.com>. Thanks!
+ Carp::carp("Constant name '$name' has unknown problems");
+ }
+ }
+
+ # Looks like a boolean
+ # use constant FRED == fred;
+ } elsif ($name =~ /^[01]?\z/) {
+ require Carp;
+ if (@_) {
+ Carp::croak("Constant name '$name' is invalid");
+ } else {
+ Carp::croak("Constant name looks like boolean value");
+ }
+
+ } else {
+ # Must have bad characters
+ require Carp;
+ Carp::croak("Constant name '$name' has invalid characters");
+ }
+
+ {
+ no strict 'refs';
+ my $full_name = "${pkg}::$name";
+ $declared{$full_name}++;
+ if (@_ == 1) {
+ my $scalar = $_[0];
+ *$full_name = sub () { $scalar };
+ } elsif (@_) {
+ my @list = @_;
+ *$full_name = sub () { @list };
+ } else {
+ *$full_name = sub () { };
+ }
+ }
+
+}
+
+1;
+
+__END__
=head1 NAME
@@ -20,7 +126,7 @@ constant - Perl pragma to declare constants
print "This line does nothing" unless DEBUGGING;
- # references can be declared constant
+ # references can be constants
use constant CHASH => { foo => 42 };
use constant CARRAY => [ 1,2,3,4 ];
use constant CPSEUDOHASH => [ { foo => 1}, 42 ];
@@ -30,7 +136,7 @@ constant - Perl pragma to declare constants
print CARRAY->[$i];
print CPSEUDOHASH->{foo};
print CCODE->("me");
- print CHASH->[10]; # compile-time error
+ print CHASH->[10]; # compile-time error
=head1 DESCRIPTION
@@ -63,7 +169,10 @@ List constants are returned as lists, not as arrays.
The use of all caps for constant names is merely a convention,
although it is recommended in order to make constants stand out
and to help avoid collisions with other barewords, keywords, and
-subroutine names. Constant names must begin with a letter.
+subroutine names. Constant names must begin with a letter or
+underscore. Names beginning with a double underscore are reserved. Some
+poor choices for names will generate warnings, if warnings are enabled at
+compile time.
Constant symbols are package scoped (rather than block scoped, as
C<use strict> is). That is, you can refer to a constant from package
@@ -98,7 +207,24 @@ constants at compile time, allowing for way cool stuff like this.
print E2BIG, "\n"; # something like "Arg list too long"
print 0+E2BIG, "\n"; # "7"
-Errors in dereferencing constant references are trapped at compile-time.
+Dereferencing constant references incorrectly (such as using an array
+subscript on a constant hash reference, or vice versa) will be trapped at
+compile time.
+
+In the rare case in which you need to discover at run time whether a
+particular constant has been declared via this module, you may use
+this function to examine the hash C<%constant::declared>. If the given
+constant name does not include a package name, the current package is
+used.
+
+ sub declared ($) {
+ use constant 1.01; # don't omit this!
+ my $name = shift;
+ $name =~ s/^::/main::/;
+ my $pkg = caller;
+ my $full_name = $name =~ /::/ ? $name : "${pkg}::$name";
+ $constant::declared{$full_name};
+ }
=head1 TECHNICAL NOTE
@@ -115,7 +241,19 @@ In the current version of Perl, list constants are not inlined
and some symbols may be redefined without generating a warning.
It is not possible to have a subroutine or keyword with the same
-name as a constant. This is probably a Good Thing.
+name as a constant in the same package. This is probably a Good Thing.
+
+A constant with a name in the list C<STDIN STDOUT STDERR ARGV ARGVOUT
+ENV INC SIG> is not allowed anywhere but in package C<main::>, for
+technical reasons.
+
+Even though a reference may be declared as a constant, the reference may
+point to data which may be changed, as this code shows.
+
+ use constant CARRAY => [ 1,2,3,4 ];
+ print CARRAY->[1];
+ CARRAY->[1] = " be changed";
+ print CARRAY->[1];
Unlike constants in some languages, these cannot be overridden
on the command line or via environment variables.
@@ -126,61 +264,20 @@ For example, you can't say C<$hash{CONSTANT}> because C<CONSTANT> will
be interpreted as a string. Use C<$hash{CONSTANT()}> or
C<$hash{+CONSTANT}> to prevent the bareword quoting mechanism from
kicking in. Similarly, since the C<=E<gt>> operator quotes a bareword
-immediately to its left you have to say C<CONSTANT() =E<gt> 'value'>
-instead of C<CONSTANT =E<gt> 'value'>.
+immediately to its left, you have to say C<CONSTANT() =E<gt> 'value'>
+(or simply use a comma in place of the big arrow) instead of
+C<CONSTANT =E<gt> 'value'>.
=head1 AUTHOR
-Tom Phoenix, E<lt>F<rootbeer@teleport.com>E<gt>, with help from
+Tom Phoenix, E<lt>F<rootbeer@redcat.com>E<gt>, with help from
many other folks.
=head1 COPYRIGHT
-Copyright (C) 1997, Tom Phoenix
+Copyright (C) 1997, 1999 Tom Phoenix
This module is free software; you can redistribute it or modify it
under the same terms as Perl itself.
=cut
-
-use strict;
-use Carp;
-use vars qw($VERSION);
-
-#=======================================================================
-
-# Some of this stuff didn't work in version 5.003, alas.
-require 5.003_96;
-
-#=======================================================================
-# import() - import symbols into user's namespace
-#
-# What we actually do is define a function in the caller's namespace
-# which returns the value. The function we create will normally
-# be inlined as a constant, thereby avoiding further sub calling
-# overhead.
-#=======================================================================
-sub import {
- my $class = shift;
- my $name = shift or return; # Ignore 'use constant;'
- croak qq{Can't define "$name" as constant} .
- qq{ (name contains invalid characters or is empty)}
- unless $name =~ /^[^\W_0-9]\w*$/;
-
- my $pkg = caller;
- {
- no strict 'refs';
- if (@_ == 1) {
- my $scalar = $_[0];
- *{"${pkg}::$name"} = sub () { $scalar };
- } elsif (@_) {
- my @list = @_;
- *{"${pkg}::$name"} = sub () { @list };
- } else {
- *{"${pkg}::$name"} = sub () { };
- }
- }
-
-}
-
-1;
diff --git a/lib/diagnostics.pm b/lib/diagnostics.pm
index f174ee5feb..e6a9127158 100755
--- a/lib/diagnostics.pm
+++ b/lib/diagnostics.pm
@@ -167,9 +167,11 @@ Tom Christiansen <F<tchrist@mox.perl.com>>, 25 June 1995.
=cut
-require 5.001;
+require 5.005_64;
use Carp;
+$VERSION = v1.0;
+
use Config;
($privlib, $archlib) = @Config{qw(privlibexp archlibexp)};
if ($^O eq 'VMS') {
@@ -177,9 +179,14 @@ if ($^O eq 'VMS') {
$privlib = VMS::Filespec::unixify($privlib);
$archlib = VMS::Filespec::unixify($archlib);
}
-@trypod = ("$archlib/pod/perldiag.pod",
- "$privlib/pod/perldiag-$].pod",
- "$privlib/pod/perldiag.pod");
+@trypod = (
+ "$archlib/pod/perldiag.pod",
+ "$privlib/pod/perldiag-$Config{version}.pod",
+ "$privlib/pod/perldiag.pod",
+ "$archlib/pods/perldiag.pod",
+ "$privlib/pods/perldiag-$Config{version}.pod",
+ "$privlib/pods/perldiag.pod",
+ );
# handy for development testing of new warnings etc
unshift @trypod, "./pod/perldiag.pod" if -e "pod/perldiag.pod";
($PODFILE) = ((grep { -e } @trypod), $trypod[$#trypod])[0];
@@ -328,7 +335,7 @@ EOFUNC
# strip formatting directives in =item line
($header = $1) =~ s/[A-Z]<(.*?)>/$1/g;
- if ($header =~ /%[sd]/) {
+ if ($header =~ /%[csd]/) {
$rhs = $lhs = $header;
#if ($lhs =~ s/(.*?)%d(?!%d)(.*)/\Q$1\E\\d+\Q$2\E\$/g) {
if ($lhs =~ s/(.*?)%d(?!%d)(.*)/\Q$1\E\\d+\Q$2\E/g) {
@@ -341,6 +348,7 @@ EOFUNC
$lhs =~ s/\377//g;
$lhs =~ s/\.\*\?$/.*/; # Allow %s at the end to eat it all
}
+ $lhs =~ s/\\%c/./g;
$transmo .= " s{^$lhs}\n {\Q$rhs\E}s\n\t&& return 1;\n";
} else {
$transmo .= " m{^\Q$header\E} && return 1;\n";
@@ -371,7 +379,8 @@ if ($standalone) {
}
exit;
} else {
- $old_w = 0; $oldwarn = ''; $olddie = '';
+ #$old_w = 0;
+ $oldwarn = ''; $olddie = '';
}
sub import {
diff --git a/lib/lib.pm b/lib/lib.pm
index 6e6e15e4ce..afc979bb45 100644
--- a/lib/lib.pm
+++ b/lib/lib.pm
@@ -4,19 +4,19 @@ use vars qw(@ORIG_INC);
use Config;
my $archname = $Config{'archname'};
+my $ver = $Config{'version'};
@ORIG_INC = @INC; # take a handy copy of 'original' value
sub import {
shift;
+
+ my %names;
foreach (reverse @_) {
- ## Ignore this if not defined.
- next unless defined($_);
if ($_ eq '') {
require Carp;
Carp::carp("Empty compile time value given to use lib");
- # at foo.pl line ...
}
if (-e && ! -d _) {
require Carp;
@@ -27,29 +27,28 @@ sub import {
# looks like $_ has an archlib directory below it.
if (-d "$_/$archname") {
unshift(@INC, "$_/$archname") if -d "$_/$archname/auto";
- unshift(@INC, "$_/$archname/$]") if -d "$_/$archname/$]/auto";
+ unshift(@INC, "$_/$archname/$ver") if -d "$_/$archname/$ver/auto";
}
}
+
+ # remove trailing duplicates
+ @INC = grep { ++$names{$_} == 1 } @INC;
+ return;
}
sub unimport {
shift;
- my $mode = shift if $_[0] =~ m/^:[A-Z]+/;
my %names;
- foreach(@_) {
+ foreach (@_) {
++$names{$_};
++$names{"$_/$archname"} if -d "$_/$archname/auto";
}
- if ($mode and $mode eq ':ALL') {
- # Remove ALL instances of each named directory.
- @INC = grep { !exists $names{$_} } @INC;
- } else {
- # Remove INITIAL instance(s) of each named directory.
- @INC = grep { --$names{$_} < 0 } @INC;
- }
+ # Remove ALL instances of each named directory.
+ @INC = grep { !exists $names{$_} } @INC;
+ return;
}
1;
@@ -74,7 +73,7 @@ It is typically used to add extra directories to perl's search path so
that later C<use> or C<require> statements will find modules which are
not located on perl's default search path.
-=head2 ADDING DIRECTORIES TO @INC
+=head2 Adding directories to @INC
The parameters to C<use lib> are added to the start of the perl search
path. Saying
@@ -90,10 +89,10 @@ checks to see if a directory called $dir/$archname/auto exists.
If so the $dir/$archname directory is assumed to be a corresponding
architecture specific directory and is added to @INC in front of $dir.
-If LIST includes both $dir and $dir/$archname then $dir/$archname will
-be added to @INC twice (if $dir/$archname/auto exists).
+To avoid memory leaks, all trailing duplicate entries in @INC are
+removed.
-=head2 DELETING DIRECTORIES FROM @INC
+=head2 Deleting directories from @INC
You should normally only add directories to @INC. If you need to
delete directories from @INC take care to only delete those which you
@@ -101,24 +100,15 @@ added yourself or which you are certain are not needed by other modules
in your script. Other modules may have added directories which they
need for correct operation.
-By default the C<no lib> statement deletes the I<first> instance of
-each named directory from @INC. To delete multiple instances of the
-same name from @INC you can specify the name multiple times.
-
-To delete I<all> instances of I<all> the specified names from @INC you can
-specify ':ALL' as the first parameter of C<no lib>. For example:
-
- no lib qw(:ALL .);
+The C<no lib> statement deletes all instances of each named directory
+from @INC.
For each directory in LIST (called $dir here) the lib module also
checks to see if a directory called $dir/$archname/auto exists.
If so the $dir/$archname directory is assumed to be a corresponding
architecture specific directory and is also deleted from @INC.
-If LIST includes both $dir and $dir/$archname then $dir/$archname will
-be deleted from @INC twice (if $dir/$archname/auto exists).
-
-=head2 RESTORING ORIGINAL @INC
+=head2 Restoring original @INC
When the lib module is first loaded it records the current value of @INC
in an array C<@lib::ORIG_INC>. To restore @INC to that value you
@@ -136,4 +126,3 @@ FindBin - optional module which deals with paths relative to the source file.
Tim Bunce, 2nd June 1995.
=cut
-
diff --git a/lib/perl5db.pl b/lib/perl5db.pl
index 7b0567c403..d2bd98e654 100644
--- a/lib/perl5db.pl
+++ b/lib/perl5db.pl
@@ -2,7 +2,7 @@ package DB;
# Debugger for Perl 5.00x; perl5db.pl patch level:
-$VERSION = 1.0403;
+$VERSION = 1.04041;
$header = "perl5db.pl version $VERSION";
# Enhanced by ilya@math.ohio-state.edu (Ilya Zakharevich)
@@ -76,6 +76,8 @@ $header = "perl5db.pl version $VERSION";
# LineInfo - file or pipe to print line number info to. If it is a
# pipe, a short "emacs like" message is used.
#
+# RemotePort - host:port to connect to on remote host for remote debugging.
+#
# Example $rcfile: (delete leading hashes!)
#
# &parse_options("NonStop=1 LineInfo=db.out");
@@ -179,7 +181,8 @@ $inhibit_exit = $option{PrintRet} = 1;
TTY noTTY ReadLine NonStop LineInfo maxTraceLen
recallCommand ShellBang pager tkRunning ornaments
signalLevel warnLevel dieLevel inhibit_exit
- ImmediateStop bareStringify);
+ ImmediateStop bareStringify
+ RemotePort);
%optionVars = (
hashDepth => \$dumpvar::hashDepth,
@@ -197,6 +200,7 @@ $inhibit_exit = $option{PrintRet} = 1;
inhibit_exit => \$inhibit_exit,
maxTraceLen => \$maxtrace,
ImmediateStop => \$ImmediateStop,
+ RemotePort => \$remoteport,
);
%optionAction = (
@@ -216,6 +220,7 @@ $inhibit_exit = $option{PrintRet} = 1;
dieLevel => \&dieLevel,
tkRunning => \&tkRunning,
ornaments => \&ornaments,
+ RemotePort => \&RemotePort,
);
%optionRequire = (
@@ -296,7 +301,7 @@ if ($notty) {
#require Term::ReadLine;
- if ($^O =~ /cygwin/) {
+ if ($^O eq 'cygwin') {
# /dev/tty is binary. use stdin for textmode
undef $console;
} elsif (-e "/dev/tty") {
@@ -322,19 +327,30 @@ if ($notty) {
$console = $tty if defined $tty;
- if (defined $console) {
- open(IN,"+<$console") || open(IN,"<$console") || open(IN,"<&STDIN");
- open(OUT,"+>$console") || open(OUT,">$console") || open(OUT,">&STDERR")
- || open(OUT,">&STDOUT"); # so we don't dongle stdout
- } else {
- open(IN,"<&STDIN");
- open(OUT,">&STDERR") || open(OUT,">&STDOUT"); # so we don't dongle stdout
- $console = 'STDIN/OUT';
+ if (defined $remoteport) {
+ require IO::Socket;
+ $OUT = new IO::Socket::INET( Timeout => '10',
+ PeerAddr => $remoteport,
+ Proto => 'tcp',
+ );
+ if (!$OUT) { die "Could not create socket to connect to remote host."; }
+ $IN = $OUT;
}
- # so open("|more") can read from STDOUT and so we don't dingle stdin
- $IN = \*IN;
+ else {
+ if (defined $console) {
+ open(IN,"+<$console") || open(IN,"<$console") || open(IN,"<&STDIN");
+ open(OUT,"+>$console") || open(OUT,">$console") || open(OUT,">&STDERR")
+ || open(OUT,">&STDOUT"); # so we don't dongle stdout
+ } else {
+ open(IN,"<&STDIN");
+ open(OUT,">&STDERR") || open(OUT,">&STDOUT"); # so we don't dongle stdout
+ $console = 'STDIN/OUT';
+ }
+ # so open("|more") can read from STDOUT and so we don't dingle stdin
+ $IN = \*IN;
- $OUT = \*OUT;
+ $OUT = \*OUT;
+ }
select($OUT);
$| = 1; # for DB::OUT
select(STDOUT);
@@ -434,7 +450,7 @@ Debugged program terminated. Use B<q> to quit or B<R> to restart,
B<h q>, B<h R> or B<h O> to get additional info.
EOP
$package = 'main';
- $usercontext = '($@, $!, $,, $/, $\, $^W) = @saved;' .
+ $usercontext = '($@, $!, $^E, $,, $/, $\, $^W) = @saved;' .
"package $package;"; # this won't let them modify, alas
} else {
$sub =~ s/\'/::/;
@@ -689,7 +705,7 @@ EOP
for ($i = 1; $i <= $max; $i++) {
if (defined $dbline{$i}) {
- print "$file:\n" unless $was++;
+ print $OUT "$file:\n" unless $was++;
print $OUT " $i:\t", $dbline[$i];
($stop,$action) = split(/\0/, $dbline{$i});
print $OUT " break if (", $stop, ")\n"
@@ -1525,7 +1541,15 @@ sub readline {
}
local $frame = 0;
local $doret = -2;
- $term->readline(@_);
+ if (ref $OUT and UNIVERSAL::isa($OUT, 'IO::Socket::INET')) {
+ print $OUT @_;
+ my $stuff;
+ $IN->recv( $stuff, 2048 );
+ $stuff;
+ }
+ else {
+ $term->readline(@_);
+ }
}
sub dump_option {
@@ -1673,6 +1697,14 @@ sub ReadLine {
$rl;
}
+sub RemotePort {
+ if ($term) {
+ &warn("Too late to set RemotePort, enabled on next 'R'!\n") if @_;
+ }
+ $remoteport = shift if @_;
+ $remoteport;
+}
+
sub tkRunning {
if ($ {$term->Features}{tkRunning}) {
return $term->tkRunning(@_);
@@ -1823,6 +1855,7 @@ B<O> [I<opt>[B<=>I<val>]] [I<opt>B<\">I<val>B<\">] [I<opt>B<?>]...
I<signalLevel> I<warnLevel> I<dieLevel>: level of verbosity;
I<inhibit_exit> Allows stepping off the end of the script.
I<ImmediateStop> Debugger should stop as early as possible.
+ I<RemotePort>: Remote hostname:port for remote debugging
The following options affect what happens with B<V>, B<X>, and B<x> commands:
I<arrayDepth>, I<hashDepth>: print only first N elements ('' for all);
I<compactDump>, I<veryCompact>: change style of array and hash dump;
@@ -1839,7 +1872,8 @@ B<O> [I<opt>[B<=>I<val>]] [I<opt>B<\">I<val>B<\">] [I<opt>B<?>]...
I<ornaments> affects screen appearance of the command line.
During startup options are initialized from \$ENV{PERLDB_OPTS}.
You can put additional initialization options I<TTY>, I<noTTY>,
- I<ReadLine>, and I<NonStop> there (or use `B<R>' after you set them).
+ I<ReadLine>, I<NonStop>, and I<RemotePort> there (or use
+ `B<R>' after you set them).
B<<> I<expr> Define Perl command to run before each prompt.
B<<<> I<expr> Add to the list of Perl commands to run before each prompt.
B<>> I<expr> Define Perl command to run after each prompt.
diff --git a/lib/strict.pm b/lib/strict.pm
index 940e8bf7ff..99ed01d583 100644
--- a/lib/strict.pm
+++ b/lib/strict.pm
@@ -56,6 +56,9 @@ L<perlfunc/local>.
The local() generated a compile-time error because you just touched a global
name without fully qualifying it.
+Because of their special use by sort(), the variables $a and $b are
+exempted from this check.
+
=item C<strict subs>
This disables the poetry optimization, generating a compile-time error if
diff --git a/lib/unicode/Eq/Latin1 b/lib/unicode/Eq/Latin1.pl
index 89ecd763ad..e033d2cb8b 100644
--- a/lib/unicode/Eq/Latin1
+++ b/lib/unicode/Eq/Latin1.pl
@@ -1,3 +1,7 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
0041 00C0 00C1 00C2 00C3 00C4 00C5
0043 00C7
0045 00C8 00C9 00CA 00CB
@@ -14,3 +18,4 @@
006F 00BA 00F2 00F3 00F4 00F5 00F6 00F8
0075 00F9 00FA 00FB 00FC
0079 00FD 00FF
+END
diff --git a/lib/unicode/Eq/Unicode b/lib/unicode/Eq/Unicode.pl
index 29b2a1c044..35edd61d2e 100644
--- a/lib/unicode/Eq/Unicode
+++ b/lib/unicode/Eq/Unicode.pl
@@ -1,3 +1,7 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
0041 00C0 00C1 00C2 00C3 00C4 00C5 0100 0102 0104 01CD 0200 0202 0226 1E00 1EA0 1EA2 FF21
0042 0181 0182 1E02 1E04 1E06 212C FF22
0043 00C7 0106 0108 010A 010C 0187 2102 212D FF23
@@ -659,3 +663,4 @@
3163 FFDC
3164 FFA0
FB49 FB2C FB2D
+END
diff --git a/lib/unicode/In/BopomofoExtended.pl b/lib/unicode/In/BopomofoExtended.pl
new file mode 100644
index 0000000000..d0ee43a437
--- /dev/null
+++ b/lib/unicode/In/BopomofoExtended.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+31A0 31BF
+END
diff --git a/lib/unicode/In/BraillePatterns.pl b/lib/unicode/In/BraillePatterns.pl
new file mode 100644
index 0000000000..e5c9e4ca70
--- /dev/null
+++ b/lib/unicode/In/BraillePatterns.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+2800 28FF
+END
diff --git a/lib/unicode/In/CJKRadicalsSupplement.pl b/lib/unicode/In/CJKRadicalsSupplement.pl
new file mode 100644
index 0000000000..d4c0c82bb6
--- /dev/null
+++ b/lib/unicode/In/CJKRadicalsSupplement.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+2E80 2EFF
+END
diff --git a/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl b/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl
new file mode 100644
index 0000000000..012f54c824
--- /dev/null
+++ b/lib/unicode/In/CJKUnifiedIdeographsExtensionA.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+3400 4DB5
+END
diff --git a/lib/unicode/In/Cherokee.pl b/lib/unicode/In/Cherokee.pl
new file mode 100644
index 0000000000..10cae1a652
--- /dev/null
+++ b/lib/unicode/In/Cherokee.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+13A0 13FF
+END
diff --git a/lib/unicode/In/IdeographicDescriptionCharacters.pl b/lib/unicode/In/IdeographicDescriptionCharacters.pl
new file mode 100644
index 0000000000..4baae881a1
--- /dev/null
+++ b/lib/unicode/In/IdeographicDescriptionCharacters.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+2FF0 2FFF
+END
diff --git a/lib/unicode/In/KangxiRadicals.pl b/lib/unicode/In/KangxiRadicals.pl
new file mode 100644
index 0000000000..d26fd6c774
--- /dev/null
+++ b/lib/unicode/In/KangxiRadicals.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+2F00 2FDF
+END
diff --git a/lib/unicode/In/Khmer.pl b/lib/unicode/In/Khmer.pl
new file mode 100644
index 0000000000..f3e86851b3
--- /dev/null
+++ b/lib/unicode/In/Khmer.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+1780 17FF
+END
diff --git a/lib/unicode/In/Mongolian.pl b/lib/unicode/In/Mongolian.pl
new file mode 100644
index 0000000000..394014d496
--- /dev/null
+++ b/lib/unicode/In/Mongolian.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+1800 18AF
+END
diff --git a/lib/unicode/In/Myanmar.pl b/lib/unicode/In/Myanmar.pl
new file mode 100644
index 0000000000..4b3f3181b0
--- /dev/null
+++ b/lib/unicode/In/Myanmar.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+1000 109F
+END
diff --git a/lib/unicode/In/Ogham.pl b/lib/unicode/In/Ogham.pl
new file mode 100644
index 0000000000..e097d90c77
--- /dev/null
+++ b/lib/unicode/In/Ogham.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+1680 169F
+END
diff --git a/lib/unicode/In/Runic.pl b/lib/unicode/In/Runic.pl
new file mode 100644
index 0000000000..0bd42df80c
--- /dev/null
+++ b/lib/unicode/In/Runic.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+16A0 16FF
+END
diff --git a/lib/unicode/In/Sinhala.pl b/lib/unicode/In/Sinhala.pl
new file mode 100644
index 0000000000..37e007c057
--- /dev/null
+++ b/lib/unicode/In/Sinhala.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+0D80 0DFF
+END
diff --git a/lib/unicode/In/Syriac.pl b/lib/unicode/In/Syriac.pl
new file mode 100644
index 0000000000..7c81fb6f32
--- /dev/null
+++ b/lib/unicode/In/Syriac.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+0700 074F
+END
diff --git a/lib/unicode/In/Thaana.pl b/lib/unicode/In/Thaana.pl
new file mode 100644
index 0000000000..361bd4d4b4
--- /dev/null
+++ b/lib/unicode/In/Thaana.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+0780 07BF
+END
diff --git a/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl b/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl
new file mode 100644
index 0000000000..ad4eb27866
--- /dev/null
+++ b/lib/unicode/In/UnifiedCanadianAboriginalSyllabics.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+1400 167F
+END
diff --git a/lib/unicode/In/YiRadicals.pl b/lib/unicode/In/YiRadicals.pl
new file mode 100644
index 0000000000..f25c6954ff
--- /dev/null
+++ b/lib/unicode/In/YiRadicals.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+A490 A4CF
+END
diff --git a/lib/unicode/In/YiSyllables.pl b/lib/unicode/In/YiSyllables.pl
new file mode 100644
index 0000000000..f4e3a8bcbc
--- /dev/null
+++ b/lib/unicode/In/YiSyllables.pl
@@ -0,0 +1,6 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
+return <<'END';
+A000 A48F
+END
diff --git a/lib/unicode/Is/SylA.pl b/lib/unicode/Is/SylA.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylA.pl
+++ b/lib/unicode/Is/SylA.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylC.pl b/lib/unicode/Is/SylC.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylC.pl
+++ b/lib/unicode/Is/SylC.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylE.pl b/lib/unicode/Is/SylE.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylE.pl
+++ b/lib/unicode/Is/SylE.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylI.pl b/lib/unicode/Is/SylI.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylI.pl
+++ b/lib/unicode/Is/SylI.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylO.pl b/lib/unicode/Is/SylO.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylO.pl
+++ b/lib/unicode/Is/SylO.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylU.pl b/lib/unicode/Is/SylU.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylU.pl
+++ b/lib/unicode/Is/SylU.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylV.pl b/lib/unicode/Is/SylV.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylV.pl
+++ b/lib/unicode/Is/SylV.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylWA.pl b/lib/unicode/Is/SylWA.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylWA.pl
+++ b/lib/unicode/Is/SylWA.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylWC.pl b/lib/unicode/Is/SylWC.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylWC.pl
+++ b/lib/unicode/Is/SylWC.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylWE.pl b/lib/unicode/Is/SylWE.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylWE.pl
+++ b/lib/unicode/Is/SylWE.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylWI.pl b/lib/unicode/Is/SylWI.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylWI.pl
+++ b/lib/unicode/Is/SylWI.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Is/SylWV.pl b/lib/unicode/Is/SylWV.pl
index 3054fd6216..ec287c456a 100644
--- a/lib/unicode/Is/SylWV.pl
+++ b/lib/unicode/Is/SylWV.pl
@@ -1,2 +1,5 @@
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by mktables.PL from e.g. Unicode.300.
+# Any changes made here will be lost!
return <<'END';
END
diff --git a/lib/unicode/Jamo-2.txt b/lib/unicode/Jamo.txt
index 6910ab924e..6910ab924e 100644
--- a/lib/unicode/Jamo-2.txt
+++ b/lib/unicode/Jamo.txt
diff --git a/lib/unicode/NamesList.html b/lib/unicode/NamesList.html
new file mode 100644
index 0000000000..0bfc5dbdcc
--- /dev/null
+++ b/lib/unicode/NamesList.html
@@ -0,0 +1,226 @@
+<html>
+
+<head>
+<meta name="GENERATOR" content="Microsoft FrontPage 3.0">
+<title>Unicode 3.0 NamesList File Structure</title>
+</head>
+
+<body>
+
+<h3>Unicode NamesList File Format</h3>
+
+<p>Last updated: 1999-07-06</p>
+
+<h3>1.0 Introduction</h3>
+
+<p>The Unicode name list file NamesList.txt (also NamesList.lst) is a plain text file used
+to drive the layout of the character code charts in the Unicode Standard. The information
+in this file is a combination of several fields from the UnicodeData.txt and Blocks.txt files,
+together with additional annotations for many characters. This document describes the
+syntax rules for the file format, but also gives brief information on how each construct
+is rendered when laid out for the book. Some of the syntax elements were used in
+preparation of the drafts of the book and may not be present in the final, released form
+of the NamesList.txt file.</p>
+
+<p>The same input file can be used to do the draft preparation for ISO/IEC 10646 (referred
+below as ISO-style). This necessitates the presence of some information in the name list
+file that is not needed (and in fact removed during parsing) for the Unicode book.</p>
+
+<p>With access to the layout program (unibook.exe) it is a simple matter of creating
+name lists for the purpose of formatting working drafts containing proposed characters.</p>
+
+<h3>1.1 NamesList File Overview</h3>
+
+<p>The *.lst files are plain text files which in their most simple form look like this</p>
+
+<p>@@&lt;tab&gt;0020&lt;tab&gt;BASIC LATIN&lt;tab&gt;007F<br>
+; this is a file comment (ignored)<br>
+0020&lt;tab&gt;SPACE<br>
+0021&lt;tab&gt;EXCLAMATION MARK<br>
+0022&lt;tab&gt;QUOTATION MARK<br>
+. . . <br>
+007F&lt;tab&gt;DELETE</p>
+
+<p>The semicolon (as first character), @ and &lt;tab&gt; characters are used by the file
+syntax and must be provided as shown. Hexadecimal digits must be in UPPER CASE). A double
+@@ introduces a block header, with the title, and start and ending code of the block
+provided as shown.</p>
+
+<p>For an ISO-style, minimal name list, only the NAME_LINE and BLOCKHEADER and their
+constituent syntax elements are needed.</p>
+
+<p>The full syntax with all the options is provided in the following sections.</p>
+
+<h3>1.2 NamesList File Structure</h3>
+
+<p>This section gives defines the overall file structure</p>
+
+<pre><strong>NAMELIST: TITLE_PAGE* BLOCK*
+</strong>
+<strong>TITLE_PAGE: TITLE
+ | TITLE_PAGE SUBTITLE
+ | TITLE_PAGE SUBHEADER
+ | TITLE_PAGE IGNORED_LINE
+ | TITLE_PAGE EMPTY_LINE
+ | TITLE_PAGE COMMENTLINE
+ | TITLE_PAGE NOTICE
+ | TITLE_PAGE PAGEBREAK
+</strong>
+<strong>BLOCK: BLOCKHEADER
+ | BLOCK CHAR_ENTRY
+ | BLOCK SUBHEADER
+ | BLOCK NOTICE
+ | BLOCK EMPTY_LINE
+ | BLOCK IGNORED_LINE
+ | BLOCK PAGEBREAK
+
+CHAR_ENTRY: NAME_LINE | RESERVED_LINE
+ | CHAR_ENTRY ALIAS_LINE
+ | CHAR_ENTRY COMMENT_LINE
+ | CHAR_ENTRY CROSS_REF
+ | CHAR_ENTRY DECOMPOSITION
+ | CHAR_ENTRY COMPAT_MAPPING
+ | CHAR_ENTRY IGNORED_LINE
+ | CHAR_ENTRY EMPTY_LINE
+ | CHAR_ENTRY NOTICE
+</strong></pre>
+
+<p>In other words:<br>
+<br>
+Neither TITLE nor&nbsp; SUBTITLE may occur after the first BLOCKHEADER. </p>
+
+<p>Only TITLE, SUBTITLE, SUBHEADER, PAGEBREAK, COMMENT_LINE,&nbsp; and IGNORED_LINE may
+occur before the first BLOCKHEADER.</p>
+
+<p>Directly following either a NAME_LINE or a RESERVED_LINE an uninterrupted sequence of
+the following lines may occur (in any order and repeated as often as needed): ALIAS_LINE,
+CROSS_REF, DECOMPOSITION, COMPAT_MAPPING, NOTICE, EMPTY_LINE and IGNORED_LINE.</p>
+
+<p>Except for EMPTY_LINE, NOTICE and IGNORED_LINE, none of these lines may occur in any other
+place. </p>
+
+<p>Note: A NOTICE displays differently depending on whether it follows a header or title
+or is part of a CHAR_ENTRY.</p>
+
+<h3>1.3 NamesList File Elements</h3>
+
+<p>This section provides the details of the syntax for the individual elements.</p>
+
+<pre><small><strong>ELEMENT SYNTAX</strong> // How rendered</small></pre>
+
+<pre><small><strong>NAME_LINE: CHAR &lt;tab&gt; LINE
+</strong> // the CHAR and the corresponding image are echoed,
+ // followed by the name as given in LINE
+
+<strong> CHAR TAB NAME COMMENT LF
+</strong> // Names may have a comment, which is stripped off
+ // unless the file is parsed for an ISO style list
+
+<strong>RESERVED_LINE: CHAR TAB &lt;reserved&gt;
+</strong> // the CHAR is echoed followed by an icon for the
+ // reserved character and a fixed string e.g. &lt;reserved&gt;
+
+<strong>COMMMENT_LINE: &lt;tab&gt; &quot;*&quot; SP EXPAND_LINE
+</strong> // * is replaced by BULLET, output line as comment
+ <strong>&lt;tab&gt; EXPAND_LINE</strong>
+ // output line as comment
+
+<strong>ALIAS_LINE: &lt;tab&gt; &quot;=&quot; SP LINE
+</strong> // replace = by itself, output line as alias
+
+<strong>CROSS_REF: &lt;tab&gt; &quot;X&quot; SP EXPAND_LINE
+</strong> // X is replaced by a right arrow
+<strong> &lt;tab&gt; &quot;X&quot; SP &quot;(&quot; STRING SP &quot;-&quot; SP CHAR &quot;)&quot;
+</strong> // X is replaced by a right arrow
+ // the &quot;(&quot;, &quot;-&quot;, &quot;)&quot; are removed, the
+ // order of CHAR and STRING is reversed
+ // i.e. both inputs result in the same output
+
+<strong>IGNORED_LINE: &lt;tab&gt; &quot;;&quot; EXPAND_LINE
+EMPTY_LINE: LF
+</strong> // empty lines and file comments are ignored
+
+<strong>DECOMPOSITION: &lt;tab&gt; &quot;:&quot; EXPAND_LINE
+</strong> // replace ':' by EQUIV, expand line into
+ // decomposition
+
+<strong>COMPAT_MAPPING: &lt;tab&gt; &quot;#&quot; SP EXPAND_LINE
+</strong> // replace '#' by APPROX, output line as mapping
+
+<strong>NOTICE: &quot;@+&quot; &lt;tab&gt; LINE
+</strong> // skip '@+', output text as notice
+<strong> &quot;@+&quot; TAB * SP LINE
+</strong> // skip '@', output text as notice
+ // &quot;*&quot; expands to a bullet character
+ // Notices following a character code apply to the
+ // character and are indented. Notices not following
+ // a character code apply to the page/block/column
+ // and are italicized, but not indented
+
+<strong>SUBTITLE: &quot;@@@+&quot; &lt;tab&gt; LINE
+</strong> // skip &quot;@@@+&quot;, output text as subtitle
+
+<strong>SUBHEADER: &quot;@&quot; &lt;tab&gt; LINE
+</strong> // skip '@', output line as text as column header
+
+<strong>BLOCKHEADER: &quot;@@&quot; &lt;tab&gt; BLOCKSTART &lt;tab&gt; BLOCKNAME &lt;tab&gt; BLOCKEND
+</strong> // skip &quot;@@&quot;, cause a page break and optional
+ // blank page, then output one or more charts
+ // followed by the list of character names.
+ // use BLOCKSTART and BLOCKEND to define the
+ // what characters belong to a block
+ // use blockname in page and table headers
+ <strong> &quot;@@&quot; &lt;tab&gt; BLOCKSTART &lt;tab&gt; BLOCKNAME COMMENT &lt;tab&gt; BLOCKEND
+ </strong>// if a comment is present it replaces the blockname
+ // when an ISO-style namelist is laid out
+
+<strong>BLOCKSTART: CHAR</strong> // first character position in block
+<strong>BLOCKEND: CHAR</strong> // last character position in block
+<strong>PAGE_BREAK: &quot;@@&quot;</strong> // insert a (column) break
+
+<strong>TITLE: &quot;@@@&quot; &lt;tab&gt; LINE</strong>
+ // skip &quot;@@@&quot;, output line as text
+ // Title is used in page headers
+
+<strong>EXPAND_LINE: {CHAR | STRING}+ LF </strong>
+ // all instances of CHAR *) are replaced by
+ // CHAR NBSP x NBSP where x is the single Unicode
+ // character corresponding to char
+ // If character is combining, it is replaced with
+ // CHAR NBSP &lt;circ&gt; x NBSP where &lt;circ&gt; is the
+ // dotted circle</small>
+</pre>
+
+<h3><strong>1.4 NamesList File Primitives</strong></h3>
+
+<p>The following are the primitives and terminals for the NamesList syntax.</p>
+
+<pre><small><strong>LINE: STRING LF
+COMMENT: &quot;(&quot; NAME &quot;)&quot;
+ &quot;(&quot; NAME &quot;)&quot; &quot;*&quot;
+</strong>
+<strong>NAME</strong>: &lt;sequence of ASCII characters, except &quot;(&quot; or &quot;)&quot; &gt;
+<strong>STRING</strong>: &lt;sequence of Latin-1 characters&gt;
+<strong>CHAR</strong>: <strong>X X X X</strong>
+ <strong>| X X X X X X X X X</strong></small>
+<small><strong>X: &quot;0&quot;|&quot;1&quot;|&quot;2&quot;|&quot;3&quot;|&quot;4&quot;|&quot;5&quot;|&quot;6&quot;|&quot;7&quot;|&quot;8&quot;|&quot;9&quot;|&quot;A&quot;|&quot;B&quot;|&quot;C&quot;|&quot;D&quot;|&quot;E&quot;|&quot;F&quot;
+&lt;tab&gt;:</strong> &lt;sequence of one or more ASCII tab characters 0x09&gt;
+<strong>SP</strong>: &lt;ASCII 0x20&gt;
+<strong>LF</strong>: &lt;any sequence of ASCII 0x0A and 0x0D&gt;
+</small></pre>
+
+<p><strong>Notes:</strong>
+
+<ul>
+ <li>Special lookahead logic prevents a mention of a 4 digit standard, such as ISO 9999 from
+ being misinterpreted as ISO CHAR.</li>
+ <li>Use of Latin-1 is supported in unibook.exe, but not portably, unless the file is encoded as
+ UTF-16LE.</li>
+ <li>The final LF in the file must be present</li>
+ <li>A CHAR inside ' or &quot; is expanded, but only its glyph image is printed,&nbsp; the
+ code value is not echoed</li>
+ <li>Straight quotes in an EXPAND_LINE are replaced by curly quotes using English rules.
+ Apostrophes are supported, but nested quotes are not.</li>
+</ul>
+</body>
+</html>
diff --git a/lib/unicode/ReadMe.txt b/lib/unicode/ReadMe.txt
index 889c32572c..c2c4aee6a5 100644
--- a/lib/unicode/ReadMe.txt
+++ b/lib/unicode/ReadMe.txt
@@ -14,15 +14,32 @@ UnicodeCharacterDatabase.html.
--------------------------------------------------------------------------
NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
-The files have been copied 1999-Sep-14 from
+The files have been copied from
ftp://ftp.unicode.org/Public/3.0-Update/
-and renamed to better fit 8.3 filename limitations.
-
-For example, the UnicodeCharacterDatabase.html referred above is
-now called Unicode.html.
-
+and most of them have been renamed to better fit 8.3 filename limitations.
+
+long name at unicode.org short name latest '#'
+------------------------ ---------- ----------
+ArabicShaping-#.txt ArabShap.txt 2
+Blocks-#.txt Blocks.txt 3
+CompositionExclusions-#.txt CompExcl.txt 1
+EastAsianWidth-#.txt EAWidth.txt 3
+Index-#.txt Index.txt 3.0.0
+Jamo-#.txt Jamo.txt 2
+LineBreak-#.txt LineBrk.txt 5
+NamesList-#.txt Names.txt 3.0.0
+NamesList-#.html NamesList.html 1
+PropList-#.txt Props.txt 3.0.0
+SpecialCasing-#.txt SpecCase.txt 2
+UnicodeData-#.txt Unicode.300 3.0.0
+UnicodeData-#.html Unicode3.html 3.0.0
+UnicodeCharacterDatabase-#.html UCD300.html 3.0.0
+
+The *.pl files are generated from these files by the 'mktables.PL' script.
+
+While the files have been renamed the links in the html files haven't.
+
+--
jhi@iki.fi
-
-
diff --git a/lib/unicode/Unicode.html b/lib/unicode/UCD300.html
index 113d311f01..113d311f01 100644
--- a/lib/unicode/Unicode.html
+++ b/lib/unicode/UCD300.html
diff --git a/lib/unicode/UnicodeData-Latest.txt b/lib/unicode/Unicode.300
index 6a54d3d74e..6a54d3d74e 100644
--- a/lib/unicode/UnicodeData-Latest.txt
+++ b/lib/unicode/Unicode.300
diff --git a/lib/unicode/Unicode3.html b/lib/unicode/Unicode3.html
new file mode 100644
index 0000000000..a08a25ec75
--- /dev/null
+++ b/lib/unicode/Unicode3.html
@@ -0,0 +1,1988 @@
+<html>
+
+
+
+<head>
+
+<meta NAME="GENERATOR" CONTENT="Microsoft FrontPage 4.0">
+
+<meta HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
+
+<link REL="stylesheet" HREF="http://www.unicode.org/unicode.css" TYPE="text/css">
+
+<title>UnicodeData File Format</title>
+
+</head>
+
+
+
+<body>
+
+
+
+<h1>UnicodeData File Format<br>
+Version 3.0.0</h1>
+
+
+
+<table BORDER="1" CELLSPACING="2" CELLPADDING="0" HEIGHT="87" WIDTH="100%">
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Revision</td>
+
+ <td VALIGN="TOP">3.0.0</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Authors</td>
+
+ <td VALIGN="TOP">Mark Davis and Ken Whistler</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Date</td>
+
+ <td VALIGN="TOP">1999-09-12</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">This Version</td>
+
+ <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Previous Version</td>
+
+ <td VALIGN="TOP">n/a</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP" width="144">Latest Version</td>
+
+ <td VALIGN="TOP"><a href="ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html">ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html</a></td>
+
+ </tr>
+
+</table>
+
+
+
+<p align="center">Copyright © 1995-1999 Unicode, Inc. All Rights reserved.<br>
+
+<i>For more information, including Disclamer and Limitations, see <a HREF="UnicodeCharacterDatabase-3.0.0.html">UnicodeCharacterDatabase-3.0.0.html</a> </i></p>
+
+
+
+<p>This document describes the format of the UnicodeData.txt file, which is one of the
+
+files in the Unicode Character Database. The document is divided into the following
+
+sections:
+
+
+
+<ul>
+
+ <li><a HREF="#Field Formats">Field Formats</a> <ul>
+
+ <li><a HREF="#General Category">General Category</a> </li>
+
+ <li><a HREF="#Bidirectional Category">Bidirectional Category</a> </li>
+
+ <li><a HREF="#Character Decomposition">Character Decomposition Mapping</a> </li>
+
+ <li><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </li>
+
+ <li><a HREF="#Decompositions and Normalization">Decompositions and Normalization</a> </li>
+
+ <li><a HREF="#Case Mappings">Case Mappings</a> </li>
+
+ </ul>
+
+ </li>
+
+ <li><a HREF="#Property Invariants">Property Invariants</a> </li>
+
+ <li><a HREF="#Modification History">Modification History</a> </li>
+
+</ul>
+
+
+
+<p><b>Warning: </b>the information in this file does not completely describe the use and
+
+interpretation of Unicode character properties and behavior. It must be used in
+
+conjunction with the data in the other files in the Unicode Character Database, and relies
+
+on the notation and definitions supplied in <i><a href="http://www.unicode.org/unicode/standard/versions/Unicode3.0.html"> The Unicode
+Standard</a></i>. All chapter references
+
+are to Version 3.0 of the standard.</p>
+
+
+
+<h2><a NAME="Field Formats"></a>Field Formats</h2>
+
+
+
+<p>The file consists of lines containing fields terminated by semicolons. Each line
+
+represents the data for one encoded character in the Unicode Standard. Every encoded
+
+character has a data entry, with the exception of certain special ranges, as detailed
+
+below.
+
+
+
+<ul>
+
+ <li>There are six special ranges of characters that are represented only by their start and
+
+ end characters, since the properties in the file are uniform, except for code values
+
+ (which are all sequential and assigned). </li>
+
+ <li>The names of CJK ideograph characters and the names and decompositions of Hangul
+
+ syllable characters are algorithmically derivable. (See the Unicode Standard and <a
+
+ HREF="http://www.unicode.org/unicode/reports/tr15/">Unicode Technical Report #15</a> for
+
+ more information). </li>
+
+ <li>Surrogate code values and private use characters have no names. </li>
+
+ <li>The Private Use character outside of the BMP (U+F0000..U+FFFFD, U+100000..U+10FFFD) are
+
+ not listed. These correspond to surrogate pairs where the first surrogate is in the High
+
+ Surrogate Private Use section. </li>
+
+</ul>
+
+
+
+<p>The exact ranges represented by start and end characters are:
+
+
+
+<ul>
+
+ <li>CJK Ideographs Extension A (U+3400 - U+4DB5) </li>
+
+ <li>CJK Ideographs (U+4E00 - U+9FA5) </li>
+
+ <li>Hangul Syllables (U+AC00 - U+D7A3) </li>
+
+ <li>Non-Private Use High Surrogates (U+D800 - U+DB7F) </li>
+
+ <li>Private Use High Surrogates (U+DB80 - U+DBFF) </li>
+
+ <li>Low Surrogates (U+DC00 - U+DFFF) </li>
+
+ <li>The Private Use Area (U+E000 - U+F8FF) </li>
+
+</ul>
+
+
+
+<p>The following table describes the format and meaning of each field in a data entry in
+
+the UnicodeData file. Fields which contain normative information are so indicated.</p>
+
+
+
+<table BORDER="1" CELLSPACING="2" CELLPADDING="2">
+
+ <tr>
+
+ <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Field</th>
+
+ <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Name</th>
+
+ <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Status</th>
+
+ <th VALIGN="top" ALIGN="LEFT"><p ALIGN="LEFT">Explanation</th>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">0</th>
+
+ <td VALIGN="top">Code value</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">Code value in 4-digit hexadecimal format.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">1</th>
+
+ <td VALIGN="top">Character name</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">These names match exactly the names published in Chapter 14 of the
+
+ Unicode Standard, Version 3.0.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">2</th>
+
+ <td VALIGN="top"><a HREF="#General Category">General Category</a> </td>
+
+ <td VALIGN="top">normative / informative<br>
+
+ (see below)</td>
+
+ <td VALIGN="top">This is a useful breakdown into various &quot;character types&quot; which
+
+ can be used as a default categorization in implementations. See below for a brief
+
+ explanation.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">3</th>
+
+ <td VALIGN="top"><a HREF="#Canonical Combining Classes">Canonical Combining Classes</a> </td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">The classes used for the Canonical Ordering Algorithm in the Unicode
+
+ Standard. These classes are also printed in Chapter 4 of the Unicode Standard.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">4</th>
+
+ <td VALIGN="top"><a HREF="#Bidirectional Category">Bidirectional Category</a> </td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">See the list below for an explanation of the abbreviations used in this
+
+ field. These are the categories required by the Bidirectional Behavior Algorithm in the
+
+ Unicode Standard. These categories are summarized in Chapter 3 of the Unicode Standard.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">5</th>
+
+ <td VALIGN="top"><a HREF="#Character Decomposition">Character Decomposition
+ Mapping</a></td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">In the Unicode Standard, not all of the mappings are full (maximal)
+
+ decompositions. Recursive application of look-up for decompositions will, in all cases,
+
+ lead to a maximal decomposition. The decomposition mappings match exactly the
+
+ decomposition mappings published with the character names in the Unicode Standard.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">6</th>
+
+ <td VALIGN="top">Decimal digit value</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">This is a numeric field. If the character has the decimal digit property,
+
+ as specified in Chapter 4 of the Unicode Standard, the value of that digit is represented
+
+ with an integer value in this field</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">7</th>
+
+ <td VALIGN="top">Digit value</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">This is a numeric field. If the character represents a digit, not
+
+ necessarily a decimal digit, the value is here. This covers digits which do not form
+
+ decimal radix forms, such as the compatibility superscript digits</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">8</th>
+
+ <td VALIGN="top">Numeric value</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">This is a numeric field. If the character has the numeric property, as
+
+ specified in Chapter 4 of the Unicode Standard, the value of that character is represented
+
+ with an integer or rational number in this field. This includes fractions as, e.g.,
+
+ &quot;1/5&quot; for U+2155 VULGAR FRACTION ONE FIFTH Also included are numerical values
+
+ for compatibility characters such as circled numbers.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">8</th>
+
+ <td VALIGN="top">Mirrored</td>
+
+ <td VALIGN="top">normative</td>
+
+ <td VALIGN="top">If the character has been identified as a &quot;mirrored&quot; character
+
+ in bidirectional text, this field has the value &quot;Y&quot;; otherwise &quot;N&quot;.
+
+ The list of mirrored characters is also printed in Chapter 4 of the Unicode Standard.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">10</th>
+
+ <td VALIGN="top">Unicode 1.0 Name</td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">This is the old name as published in Unicode 1.0. This name is only
+
+ provided when it is significantly different from the Unicode 3.0 name for the character.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">11</th>
+
+ <td VALIGN="top">10646 comment field</td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">This is the ISO 10646 comment field. It is in parantheses in the 10646
+
+ names list.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">12</th>
+
+ <td VALIGN="top"><a HREF="#Case Mappings">Uppercase Mapping</a></td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">Upper case equivalent mapping. If a character is part of an alphabet with
+
+ case distinctions, and has an upper case equivalent, then the upper case equivalent is in
+
+ this field. See the explanation below on case distinctions. These mappings are always
+
+ one-to-one, not one-to-many or many-to-one. This field is informative.</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">13</th>
+
+ <td VALIGN="top"><a HREF="#Case Mappings">Lowercase Mapping</a></td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">Similar to Uppercase mapping</td>
+
+ </tr>
+
+ <tr>
+
+ <th VALIGN="top">14</th>
+
+ <td VALIGN="top"><a HREF="#Case Mappings">Titlecase Mapping</a></td>
+
+ <td VALIGN="top">informative</td>
+
+ <td VALIGN="top">Similar to Uppercase mapping</td>
+
+ </tr>
+
+</table>
+
+
+
+<h3><a NAME="General Category"></a>General Category</h3>
+
+
+
+<p>The values in this field are abbreviations for the following. Some of the values are
+
+normative, and some are informative. For more information, see the Unicode Standard.</p>
+
+
+
+<p><b>Note:</b> the standard does not assign information to control characters (except for
+
+certain cases in the Bidirectional Algorithm). Implementations will generally also assign
+
+categories to certain control characters, notably CR and LF, according to platform
+
+conventions.</p>
+
+
+
+<h4>Normative Categories</h4>
+
+
+
+<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
+
+ <tr>
+
+ <th><p ALIGN="LEFT">Abbr.</th>
+
+ <th><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Lu</td>
+
+ <td>Letter, Uppercase</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Ll</td>
+
+ <td>Letter, Lowercase</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Lt</td>
+
+ <td>Letter, Titlecase</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Mn</td>
+
+ <td>Mark, Non-Spacing</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Mc</td>
+
+ <td>Mark, Spacing Combining</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Me</td>
+
+ <td>Mark, Enclosing</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Nd</td>
+
+ <td>Number, Decimal Digit</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Nl</td>
+
+ <td>Number, Letter</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">No</td>
+
+ <td>Number, Other</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Zs</td>
+
+ <td>Separator, Space</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Zl</td>
+
+ <td>Separator, Line</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Zp</td>
+
+ <td>Separator, Paragraph</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Cc</td>
+
+ <td>Other, Control</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Cf</td>
+
+ <td>Other, Format</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Cs</td>
+
+ <td>Other, Surrogate</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Co</td>
+
+ <td>Other, Private Use</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Cn</td>
+
+ <td>Other, Not Assigned (no characters in the file have this property)</td>
+
+ </tr>
+
+</table>
+
+
+
+<h4>Informative Categories</h4>
+
+
+
+<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
+
+ <tr>
+
+ <th><p ALIGN="LEFT">Abbr.</th>
+
+ <th><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Lm</td>
+
+ <td>Letter, Modifier</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Lo</td>
+
+ <td>Letter, Other</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pc</td>
+
+ <td>Punctuation, Connector</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pd</td>
+
+ <td>Punctuation, Dash</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Ps</td>
+
+ <td>Punctuation, Open</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pe</td>
+
+ <td>Punctuation, Close</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pi</td>
+
+ <td>Punctuation, Initial quote (may behave like Ps or Pe depending on usage)</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Pf</td>
+
+ <td>Punctuation, Final quote (may behave like Ps or Pe depending on usage)</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Po</td>
+
+ <td>Punctuation, Other</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Sm</td>
+
+ <td>Symbol, Math</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Sc</td>
+
+ <td>Symbol, Currency</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">Sk</td>
+
+ <td>Symbol, Modifier</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">So</td>
+
+ <td>Symbol, Other</td>
+
+ </tr>
+
+</table>
+
+
+
+<h3><a NAME="Bidirectional Category"></a>Bidirectional Category</h3>
+
+
+
+<p>Please refer to Chapter 3 for an explanation of the algorithm for Bidirectional
+
+Behavior and an explanation of the significance of these categories. An up-to-date version
+
+can be found on <a HREF="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical
+
+Report #9: The Bidirectional Algorithm</a>. These values are normative.</p>
+
+
+
+<table BORDER="0" CELLPADDING="2">
+
+ <tr>
+
+ <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Type</th>
+
+ <th VALIGN="TOP" ALIGN="LEFT"><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>L</b></td>
+
+ <td VALIGN="TOP">Left-to-Right</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>LRE</b></td>
+
+ <td VALIGN="TOP">Left-to-Right Embedding</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>LRO</b></td>
+
+ <td VALIGN="TOP">Left-to-Right Override</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>R</b></td>
+
+ <td VALIGN="TOP">Right-to-Left</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>AL</b></td>
+
+ <td VALIGN="TOP">Right-to-Left Arabic</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>RLE</b></td>
+
+ <td VALIGN="TOP">Right-to-Left Embedding</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>RLO</b></td>
+
+ <td VALIGN="TOP">Right-to-Left Override</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>PDF</b></td>
+
+ <td VALIGN="TOP">Pop Directional Format</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>EN</b></td>
+
+ <td VALIGN="TOP">European Number</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>ES</b></td>
+
+ <td VALIGN="TOP">European Number Separator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>ET</b></td>
+
+ <td VALIGN="TOP">European Number Terminator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>AN</b></td>
+
+ <td VALIGN="TOP">Arabic Number</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>CS</b></td>
+
+ <td VALIGN="TOP">Common Number Separator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>NSM</b></td>
+
+ <td VALIGN="TOP">Non-Spacing Mark</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>BN</b></td>
+
+ <td VALIGN="TOP">Boundary Neutral</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>B</b></td>
+
+ <td VALIGN="TOP">Paragraph Separator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>S</b></td>
+
+ <td VALIGN="TOP">Segment Separator</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>WS</b></td>
+
+ <td VALIGN="TOP">Whitespace</td>
+
+ </tr>
+
+ <tr>
+
+ <td VALIGN="TOP"><b>ON</b></td>
+
+ <td VALIGN="TOP">Other Neutrals</td>
+
+ </tr>
+
+</table>
+
+
+
+<h3><a NAME="Character Decomposition"></a>Character Decomposition Mapping</h3>
+
+
+
+<p>The decomposition is a normative property of a character. The tags supplied with
+
+certain decomposition mappings generally indicate formatting information. Where no such
+
+tag is given, the mapping is designated as canonical. Conversely, the presence of a
+
+formatting tag also indicates that the mapping is a compatibility mapping and not a
+
+canonical mapping. In the absence of other formatting information in a compatibility
+
+mapping, the tag is used to distinguish it from canonical mappings.</p>
+
+
+
+<p>In some instances a canonical mapping or a compatibility mapping may consist of a
+
+single character. For a canonical mapping, this indicates that the character is a
+
+canonical equivalent of another single character. For a compatibility mapping, this
+
+indicates that the character is a compatibility equivalent of another single character.
+
+The compatibility formatting tags used are:</p>
+
+
+
+<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
+
+ <tr>
+
+ <th>Tag</th>
+
+ <th><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;font&gt;&nbsp;&nbsp;</td>
+
+ <td>A font variant (e.g. a blackletter form).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;noBreak&gt;&nbsp;&nbsp;</td>
+
+ <td>A no-break version of a space or hyphen.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;initial&gt;&nbsp;&nbsp;</td>
+
+ <td>An initial presentation form (Arabic).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;medial&gt;&nbsp;&nbsp;</td>
+
+ <td>A medial presentation form (Arabic).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;final&gt;&nbsp;&nbsp;</td>
+
+ <td>A final presentation form (Arabic).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;isolated&gt;&nbsp;&nbsp;</td>
+
+ <td>An isolated presentation form (Arabic).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;circle&gt;&nbsp;&nbsp;</td>
+
+ <td>An encircled form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;super&gt;&nbsp;&nbsp;</td>
+
+ <td>A superscript form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;sub&gt;&nbsp;&nbsp;</td>
+
+ <td>A subscript form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;vertical&gt;&nbsp;&nbsp;</td>
+
+ <td>A vertical layout presentation form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;wide&gt;&nbsp;&nbsp;</td>
+
+ <td>A wide (or zenkaku) compatibility character.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;narrow&gt;&nbsp;&nbsp;</td>
+
+ <td>A narrow (or hankaku) compatibility character.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;small&gt;&nbsp;&nbsp;</td>
+
+ <td>A small variant form (CNS compatibility).</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;square&gt;&nbsp;&nbsp;</td>
+
+ <td>A CJK squared font variant.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;fraction&gt;&nbsp;&nbsp;</td>
+
+ <td>A vulgar fraction form.</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="CENTER">&lt;compat&gt;&nbsp;&nbsp;</td>
+
+ <td>Otherwise unspecified compatibility character.</td>
+
+ </tr>
+
+</table>
+
+
+
+<p><b>Reminder: </b>There is a difference between decomposition and decomposition mapping.
+
+The decomposition mappings are defined in the UnicodeData, while the decomposition (also
+
+termed &quot;full decomposition&quot;) is defined in Chapter 3 to use those mappings
+<i>
+
+recursively.</i>
+
+
+
+<ul>
+
+ <li>The canonical decomposition is formed by recursively applying the canonical mappings,
+
+ then applying the canonical reordering algorithm. </li>
+
+ <li>The compatibility decomposition is formed by recursively applying the canonical <em>and</em>
+
+ compatibility mappings, then applying the canonical reordering algorithm. </li>
+
+</ul>
+
+
+
+<h3><a NAME="Canonical Combining Classes"></a>Canonical Combining Classes</h3>
+
+
+
+<table BORDER="0" CELLSPACING="2" CELLPADDING="0">
+
+ <tr>
+
+ <th><p ALIGN="LEFT">Value</th>
+
+ <th><p ALIGN="LEFT">Description</th>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">0:</td>
+
+ <td>Spacing, split, enclosing, reordrant, and Tibetan subjoined</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">1:</td>
+
+ <td>Overlays and interior</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">7:</td>
+
+ <td>Nuktas</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">8:</td>
+
+ <td>Hiragana/Katakana voicing marks</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">9:</td>
+
+ <td>Viramas</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">10:</td>
+
+ <td>Start of fixed position classes</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">199:</td>
+
+ <td>End of fixed position classes</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">200:</td>
+
+ <td>Below left attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">202:</td>
+
+ <td>Below attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">204:</td>
+
+ <td>Below right attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">208:</td>
+
+ <td>Left attached (reordrant around single base character)</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">210:</td>
+
+ <td>Right attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">212:</td>
+
+ <td>Above left attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">214:</td>
+
+ <td>Above attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">216:</td>
+
+ <td>Above right attached</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">218:</td>
+
+ <td>Below left</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">220:</td>
+
+ <td>Below</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">222:</td>
+
+ <td>Below right</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">224:</td>
+
+ <td>Left (reordrant around single base character)</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">226:</td>
+
+ <td>Right</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">228:</td>
+
+ <td>Above left</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">230:</td>
+
+ <td>Above</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">232:</td>
+
+ <td>Above right</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">233:</td>
+
+ <td>Double below</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">234:</td>
+
+ <td>Double above</td>
+
+ </tr>
+
+ <tr>
+
+ <td ALIGN="RIGHT">240:</td>
+
+ <td>Below (iota subscript)</td>
+
+ </tr>
+
+</table>
+
+
+
+<p><strong>Note: </strong>some of the combining classes in this list do not currently have
+
+members but are specified here for completeness.</p>
+
+
+
+<h3><a NAME="Decompositions and Normalization"></a>Decompositions and Normalization</h3>
+
+
+
+<p>Decomposition is specified in Chapter 3. <a href="http://www.unicode.org/unicode/reports/tr15/"><i>Unicode Technical Report #15:
+
+Normalization Forms</i></a> specifies the interaction between decomposition and normalization. The
+
+most up-to-date version is found on <a HREF="http://www.unicode.org/unicode/reports/tr15/">http://www.unicode.org/unicode/reports/tr15/</a>.
+
+That report specifies how the decompositions defined in UnicodeData.txt are used to derive
+
+normalized forms of Unicode text.</p>
+
+
+
+<p>Note that as of the 2.1.9 update of the Unicode Character Database, the decompositions
+
+in the UnicodeData.txt file can be used to recursively derive the full decomposition in
+
+canonical order, without the need to separately apply canonical reordering. However,
+
+canonical reordering of combining character sequences must still be applied in
+
+decomposition when normalizing source text which contains any combining marks.</p>
+
+
+
+<h3><a NAME="Case Mappings"></a>Case Mappings</h3>
+
+
+
+<p>The case mapping is an informative, default mapping. Case itself, on the other hand,
+
+has normative status. Thus, for example, 0041 LATIN CAPITAL LETTER A is normatively
+
+uppercase, but its lowercase mapping the 0061 LATIN SMALL LETTER A is informative. The
+
+reason for this is that case can be considered to be an inherent property of a particular
+
+character (and is usually, but not always, derivable from the presence of the terms
+
+&quot;CAPITAL&quot; or &quot;SMALL&quot; in the character name), but case mappings between
+
+characters are occasionally influenced by local conventions. For example, certain
+
+languages, such as Turkish, German, French, or Greek may have small deviations from the
+
+default mappings listed in UnicodeData.</p>
+
+
+
+<p>In addition to uppercase and lowercase, because of the inclusion of certain composite
+
+characters for compatibility, such as 01F1 LATIN CAPITAL LETTER DZ, there is a third case,
+
+called <i>titlecase</i>, which is used where the first letter of a word is to be
+
+capitalized (e.g. UPPERCASE, Titlecase, lowercase). An example of such a titlecase letter
+
+is 01F2 LATIN CAPITAL LETTER D WITH SMALL LETTER Z.</p>
+
+
+
+<p>The uppercase, titlecase and lowercase fields are only included for characters that
+
+have a single corresponding character of that type. Composite characters (such as
+
+&quot;339D SQUARE CM&quot;) that do not have a single corresponding character of that type
+
+can be cased by decomposition.</p>
+
+
+
+<p>For compatibility with existing parsers, UnicodeData only contains case mappings for
+
+characters where they are one-to-one mappings; it also omits information about
+
+context-sensitive case mappings. Information about these special cases can be found in a
+
+separate data file, SpecialCasing.txt,
+
+which has been added starting with the 2.1.8 update to the Unicode data files.
+
+SpecialCasing.txt contains additional informative case mappings that are either not
+
+one-to-one or which are context-sensitive.</p>
+
+
+
+<h2><a NAME="Property Invariants"></a>Property Invariants</h2>
+
+
+
+<p>Values in UnicodeData.txt are subject to correction as errors are found; however, some
+
+characteristics of the categories themselves can be considered invariants. Applications
+
+may wish to take these invariants into account when choosing how to implement character
+
+properties. The following is a partial list of known invariants for the Unicode Character
+
+Database.</p>
+
+
+
+<h4>Database Fields</h4>
+
+
+
+<ul>
+
+ <li>The number of fields in UnicodeData.txt is fixed. </li>
+
+ <li>The order of the fields is also fixed. <ul>
+
+ <li>Any additional information about character properties to be added in the future will
+
+ appear in separate data tables, rather than being added on to the existing table or by
+
+ subdivision or reinterpretation of existing fields. </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h4>General Category</h4>
+
+
+
+<ul>
+
+ <li>There will never be more than 32 General Category values. <ul>
+
+ <li>It is very unlikely that the Unicode Technical Committee will subdivide the General
+
+ Category partition any further, since that can cause implementations to misbehave. Because
+
+ the General Category is limited to 32 values, 5 bits can be used to represent the
+
+ information, and a 32-bit integer can be used as a bitmask to represent arbitrary sets of
+
+ categories. </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h4>Combining Classes</h4>
+
+
+
+<ul>
+
+ <li>Combining classes are limited to the values 0 to 255. <ul>
+
+ <li>In practice, there are far fewer than 256 values used. Implementations may take
+
+ advantage of this fact for compression, since only the ordering of the non-zero values
+
+ matters for the Canonical Reordering Algorithm. It is possible for up to 256 values to be
+
+ used in the future; however, UTC decisions in the future may restrict the number of values
+
+ to 128, since this has implementation advantages. [Signed bytes can be used without
+
+ widening to ints in Java, for example.] </li>
+
+ </ul>
+
+ </li>
+
+ <li>All characters other than those of General Category M* have the combining class 0. <ul>
+
+ <li>Currently, all characters other than those of General Category Mn have the value 0.
+
+ However, some characters of General Category Me or Mc may be given non-zero values in the
+
+ future. </li>
+
+ <li>The precise values above the value 0 are not invariant--only the relative ordering is
+
+ considered normative. For example, it is not guaranteed in future versions that the class
+
+ of U+05B4 will be precisely 14. </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h4>Case</h4>
+
+
+
+<ul>
+
+ <li>Characters of type Lu, Lt, or Ll are called <i>cased</i>. All characters with an Upper,
+
+ Lower, or Titlecase mapping are cased characters. <ul>
+
+ <li>However, characters with the General Categories of Lu, Ll, or Lt may not always have
+
+ case mappings, and case mappings may vary by locale. (See
+
+ ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt). </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h4>Canonical Decomposition</h4>
+
+
+
+<ul>
+
+ <li>Canonical mappings are always in canonical order. </li>
+
+ <li>Canonical mappings have only the first of a pair possibly further decomposing. </li>
+
+ <li>Canonical decompositions are &quot;transparent&quot; to other character data: <ul>
+
+ <li><tt>BIDI(a) = BIDI(principal(canonicalDecomposition(a))</tt> </li>
+
+ <li><tt>Category(a) = Category(principal(canonicalDecomposition(a))</tt> </li>
+
+ <li><tt>CombiningClass(a) = CombiningClass(principal(canonicalDecomposition(a))</tt><br>
+
+ where principal(a) is the first character not of type Mn, or the first character if all
+
+ characters are of type Mn. </li>
+
+ </ul>
+
+ </li>
+
+ <li>However, because there are sometimes missing case pairs, and because of some legacy
+
+ characters, it is only generally true that: <ul>
+
+ <li><tt>upper(canonicalDecomposition(a)) = canonicalDecomposition(upper(a))</tt> </li>
+
+ <li><tt>lower(canonicalDecomposition(a)) = canonicalDecomposition(lower(a))</tt> </li>
+
+ <li><tt>title(canonicalDecomposition(a)) = canonicalDecomposition(title(a))</tt> </li>
+
+ </ul>
+
+ </li>
+
+</ul>
+
+
+
+<h2><a NAME="Modification History"></a>Modification History</h2>
+
+
+
+<p>This section provides a summary of the changes between update versions of the Unicode
+
+Standard.</p>
+
+
+
+<h3><a href="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 3.0.0"> Unicode 3.0.0</a></h3>
+
+
+
+<p>Modifications made for Version 3.0.0 of UnicodeData.txt include many new characters and
+
+a number of property changes. These are summarized in Appendex D of <em>The Unicode
+
+Standard, Version 3.0.</em></p>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.9">Unicode 2.1.9</a> </h3>
+
+
+
+<p>Modifications made for Version 2.1.9 of UnicodeData.txt include:
+
+
+
+<ul>
+
+ <li>Corrected combining class for U+05AE HEBREW ACCENT ZINOR. </li>
+
+ <li>Corrected combining class for U+20E1 COMBINING LEFT RIGHT ARROW ABOVE </li>
+
+ <li>Corrected combining class for U+0F35 and U+0F37 to 220. </li>
+
+ <li>Corrected combining class for U+0F71 to 129. </li>
+
+ <li>Added a decomposition for U+0F0C TIBETAN MARK DELIMITER TSHEG BSTAR. </li>
+
+ <li>Added&nbsp; decompositions for several Greek symbol letters: U+03D0..U+03D2, U+03D5,
+
+ U+03D6, U+03F0..U+03F2. </li>
+
+ <li>Removed&nbsp; decompositions from the conjoining jamo block: U+1100..U+11F8. </li>
+
+ <li>Changes to decomposition mappings for some Tibetan vowels for consistency in
+
+ normalization. (U+0F71, U+0F73, U+0F77, U+0F79, U+0F81) </li>
+
+ <li>Updated the decomposition mappings for several Vietnamese characters with two diacritics
+
+ (U+1EAC, U+1EAD, U+1EB6, U+1EB7, U+1EC6, U+1EC7, U+1ED8, U+1ED9), so that the recursive
+
+ decomposition can be generated directly in canonically reordered form (not a normative
+
+ change). </li>
+
+ <li>Updated the decomposition mappings for several Arabic compatibility characters involving
+
+ shadda (U+FC5E..U+FC62, U+FCF2..U+FCF4), and two Latin characters (U+1E1C, U+1E1D), so
+
+ that the decompositions are generated directly in canonically reordered form (not a
+
+ normative change). </li>
+
+ <li>Changed BIDI category for: U+00A0 NO-BREAK SPACE, U+2007 FIGURE SPACE, U+2028 LINE
+
+ SEPARATOR. </li>
+
+ <li>Changed BIDI category for extenders of General Category Lm: U+3005, U+3021..U+3035,
+
+ U+FF9E, U+FF9F. </li>
+
+ <li>Changed General Category and BIDI category for the Greek numeral signs: U+0374, U+0375. </li>
+
+ <li>Corrected General Category for U+FFE8 HALFWIDTH FORMS LIGHT VERTICAL. </li>
+
+ <li>Added Unicode 1.0 names for many Tibetan characters (informative). </li>
+
+</ul>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.8">Unicode 2.1.8</a> </h3>
+
+
+
+<p>Modifications made for Version 2.1.8 of UnicodeData.txt include:
+
+
+
+<ul>
+
+ <li>Added combining class 240 for U+0345 COMBINING GREEK YPOGEGRAMMENI so that
+
+ decompositions involving iota subscript are derivable directly in canonically reordered
+
+ form; this also has a bearing on simplification of casing of polytonic Greek. </li>
+
+ <li>Changes in decompositions related to Greek tonos. These result from the clarification
+
+ that monotonic Greek &quot;tonos&quot; should be equated with U+0301 COMBINING ACUTE,
+
+ rather than with U+030D COMBINING VERTICAL LINE ABOVE. (All Greek characters in the Greek
+
+ block involving &quot;tonos&quot;; some Greek characters in the polytonic Greek in the
+
+ 1FXX block.) </li>
+
+ <li>Changed decompositions involving dialytika tonos. (U+0390, U+03B0) </li>
+
+ <li>Changed ternary decompositions to binary. (U+0CCB, U+FB2C, U+FB2D) These changes
+
+ simplify normalization. </li>
+
+ <li>Removed canonical decomposition for Latin Candrabindu. (U+0310) </li>
+
+ <li>Corrected error in canonical decomposition for U+1FF4. </li>
+
+ <li>Added compatibility decompositions to clarify collation tables. (U+2100, U+2101, U+2105,
+
+ U+2106, U+1E9A) </li>
+
+ <li>A series of general category changes to assist the convergence of of Unicode definition
+
+ of identifier with ISO TR 10176: <ul>
+
+ <li>So &gt; Lo: U+0950, U+0AD0, U+0F00, U+0F88..U+0F8B </li>
+
+ <li>Po &gt; Lo: U+0E2F, U+0EAF, U+3006 </li>
+
+ <li>Lm &gt; Sk: U+309B, U+309C </li>
+
+ <li>Po &gt; Pc: U+30FB, U+FF65 </li>
+
+ <li>Ps/Pe &gt; Mn: U+0F3E, U+0F3F </li>
+
+ </ul>
+
+ </li>
+
+ <li>A series of bidi property changes for consistency. <ul>
+
+ <li>L &gt; ET: U+09F2, U+09F3 </li>
+
+ <li>ON &gt; L: U+3007 </li>
+
+ <li>L &gt; ON: U+0F3A..U+0F3D, U+037E, U+0387 </li>
+
+ </ul>
+
+ </li>
+
+ <li>Add case mapping: U+01A6 &lt;-&gt; U+0280 </li>
+
+ <li>Updated symmetric swapping value for guillemets: U+00AB, U+00BB, U+2039, U+203A. </li>
+
+ <li>Changes to combining class values. Most Indic fixed position class non-spacing marks
+
+ were changed to combining class 0. This fixes some inconsistencies in how canonical
+
+ reordering would apply to Indic scripts, including Tibetan. Indic interacting top/bottom
+
+ fixed position classes were merged into single (non-zero) classes as part of this change.
+
+ Tibetan subjoined consonants are changed from combining class 6 to combining class 0. Thai
+
+ pinthu (U+0E3A) moved to combining class 9. Moved two Devanagari stress marks into generic
+
+ above and below combining classes (U+0951, U+0952). </li>
+
+ <li>Corrected placement of semicolon near symmetric swapping field. (U+FA0E, etc., scattered
+
+ positions to U+FA29) </li>
+
+</ul>
+
+
+
+<h3>Version 2.1.7</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3>Version 2.1.6</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.5">Unicode 2.1.5</a> </h3>
+
+
+
+<p>Modifications made for Version 2.1.5 of UnicodeData.txt include:
+
+
+
+<ul>
+
+ <li>Changed decomposition for U+FF9E and U+FF9F so that correct collation weighting will
+
+ automatically result from the canonical equivalences. </li>
+
+ <li>Removed canonical decompositions for U+04D4, U+04D5, U+04D8, U+04D9, U+04E0, U+04E1,
+
+ U+04E8, U+04E9 (the implication being that no canonical equivalence is claimed between
+
+ these 8 characters and similar Latin letters), and updated 4 canonical decompositions for
+
+ U+04DB, U+04DC, U+04EA, U+04EB to reflect the implied difference in the base character. </li>
+
+ <li>Added Pi, and Pf categories and assigned the relevant quotation marks to those
+
+ categories, based on the Unicode Technical Corrigendum on Quotation Characters. </li>
+
+ <li>Updating of many bidi properties, following the advice of the ad hoc committee on bidi,
+
+ and to make the bidi properties of compatibility characters more consistent. </li>
+
+ <li>Changed category of several Tibetan characters: U+0F3E, U+0F3F, U+0F88..U+0F8B to make
+
+ them non-combining, reflecting the combined opinion of Tibetan experts. </li>
+
+ <li>Added case mapping for U+03F2. </li>
+
+ <li>Corrected case mapping for U+0275. </li>
+
+ <li>Added titlecase mappings for U+03D0, U+03D1, U+03D5, U+03D6, U+03F0.. U+03F2. </li>
+
+ <li>Corrected compatibility label for U+2121. </li>
+
+ <li>Add specific entries for all the CJK compatibility ideographs, U+F900..U+FA2D, so the
+
+ canonical decomposition for each (the URO character it is equivalent to) can be carried in
+
+ the database. </li>
+
+</ul>
+
+
+
+<h3>Version 2.1.4</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3>Version 2.1.3</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.1.2">Unicode 2.1.2</a> </h3>
+
+
+
+<p>Modifications made in updating UnicodeData.txt to Version 2.1.2 for the Unicode
+
+Standard, Version 2.1 (from Version 2.0) include:
+
+
+
+<ul>
+
+ <li>Added two characters (U+20AC and U+FFFC). </li>
+
+ <li>Amended bidi properties for U+0026, U+002E, U+0040, U+2007. </li>
+
+ <li>Corrected case mappings for U+018E, U+019F, U+01DD, U+0258, U+0275, U+03C2, U+1E9B. </li>
+
+ <li>Changed combining order class for U+0F71. </li>
+
+ <li>Corrected canonical decompositions for U+0F73, U+1FBE. </li>
+
+ <li>Changed decomposition for U+FB1F from compatibility to canonical. </li>
+
+ <li>Added compatibility decompositions for U+FBE8, U+FBE9, U+FBF9..U+FBFB. </li>
+
+ <li>Corrected compatibility decompositions for U+2469, U+246A, U+3358. </li>
+
+</ul>
+
+
+
+<h3>Version 2.1.1</h3>
+
+
+
+<p><i>This version was for internal change tracking only, and never publicly released.</i></p>
+
+
+
+<h3><a HREF="http://www.unicode.org/unicode/standard/versions/enumeratedversions.html#Unicode 2.0.0">Unicode 2.0.0</a> </h3>
+
+
+
+<p>The modifications made in updating UnicodeData.txt for the Unicode
+
+Standard, Version 2.0 include:
+
+
+
+<ul>
+
+ <li>Fixed decompositions with TONOS to use correct NSM: 030D. </li>
+
+ <li>Removed old Hangul Syllables; mapping to new characters are in a separate table. </li>
+
+ <li>Marked compatibility decompositions with additional tags. </li>
+
+ <li>Changed old tag names for clarity. </li>
+
+ <li>Revision of decompositions to use first-level decomposition, instead of maximal
+
+ decomposition. </li>
+
+ <li>Correction of all known errors in decompositions from earlier versions. </li>
+
+ <li>Added control code names (as old Unicode names). </li>
+
+ <li>Added Hangul Jamo decompositions. </li>
+
+ <li>Added Number category to match properties list in book. </li>
+
+ <li>Fixed categories of Koranic Arabic marks. </li>
+
+ <li>Fixed categories of precomposed characters to match decomposition where possible. </li>
+
+ <li>Added Hebrew cantillation marks and the Tibetan script. </li>
+
+ <li>Added place holders for ranges such as CJK Ideographic Area and the Private Use Area. </li>
+
+ <li>Added categories Me, Sk, Pc, Nl, Cs, Cf, and rectified a number of mistakes in the
+
+ database. </li>
+
+</ul>
+
+</body>
+
+</html>
+
diff --git a/lib/unicode/mktables.PL b/lib/unicode/mktables.PL
index 7d70b18469..48d40f4541 100755
--- a/lib/unicode/mktables.PL
+++ b/lib/unicode/mktables.PL
@@ -1,6 +1,6 @@
#!../../miniperl
-$UnicodeData = "UnicodeData-Latest.txt";
+$UnicodeData = "Unicode.300";
# Note: we try to keep filenames unique within first 8 chars. Using
# subdirectories for the following helps.
@@ -181,6 +181,11 @@ foreach $file (@todo) {
else {
open(OUT, ">$table.pl") or die "Can't create $table.pl: $!\n";
}
+ print OUT <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+EOH
print OUT <<"END";
return <<'END';
END
@@ -195,6 +200,11 @@ exit if @ARGV and not grep { $_ eq Block } @ARGV;
print "Block\n";
open(UD, 'Blocks.txt') or die "Can't open blocks.txt: $!\n";
open(OUT, ">Block.pl") or die "Can't create $table.pl: $!\n";
+print OUT <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+EOH
print OUT <<"END";
return <<'END';
END
@@ -208,6 +218,11 @@ while (<UD>) {
print OUT "$code $last $name\n";
$name =~ s/\s+//g;
open(BLOCK, ">In/$name.pl");
+ print BLOCK <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+EOH
print BLOCK <<"END2";
return <<'END';
$code $last
@@ -234,7 +249,7 @@ sub proplist {
$split = '($code, $name, $link, $linkgroup) = split(/; */);';
}
elsif ($table =~ /^Jamo/) {
- open(UD, "Jamo-2.txt") or warn "Can't open $table: $!";
+ open(UD, "Jamo.txt") or warn "Can't open $table: $!";
$split = '($code, $short, $name) = split(/; */); $code =~ s/^U\+//;';
}
@@ -388,26 +403,40 @@ foreach my $b (@base) {
@unicode = sort keys %unicode;
print "EqUnicode\n";
-if (open(EQ_UNICODE, ">Eq/Unicode")) {
+if (open(OUT, ">Eq/Unicode.pl")) {
+ print OUT <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+return <<'END';
+EOH
foreach my $c (@unicode) {
- print EQ_UNICODE "$c @{$unicode{$c}}\n";
+ print OUT "$c @{$unicode{$c}}\n";
}
- close EQ_UNICODE;
+ print OUT "END\n";
+ close OUT;
} else {
- die "$0: failed to open Eq/Unicode for writing: $!\n";
+ die "$0: failed to open Eq/Unicode.pl for writing: $!\n";
}
print "EqLatin1\n";
-if (open(EQ_LATIN1, ">Eq/Latin1")) {
+if (open(OUT, ">Eq/Latin1.pl")) {
+ print OUT <<EOH;
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by $0 from e.g. $UnicodeData.
+# Any changes made here will be lost!
+return <<'END';
+EOH
foreach my $c (@unicode) {
last if hex($c) > 255;
my @c = grep { hex($_) < 256 } @{$unicode{$c}};
next unless @c;
- print EQ_LATIN1 "$c @c\n";
+ print OUT "$c @c\n";
}
- close EQ_LATIN1;
+ print OUT "END\n";
+ close OUT;
} else {
- die "$0: failed to open Eq/Latin1 for writing: $!\n";
+ die "$0: failed to open Eq/Latin1.pl for writing: $!\n";
}
# eof
diff --git a/lib/vars.pm b/lib/vars.pm
index ca2a08dcf6..6ae5373f89 100644
--- a/lib/vars.pm
+++ b/lib/vars.pm
@@ -45,7 +45,7 @@ __END__
=head1 NAME
-vars - Perl pragma to predeclare global variable names
+vars - Perl pragma to predeclare global variable names (obsolete)
=head1 SYNOPSIS
@@ -53,6 +53,10 @@ vars - Perl pragma to predeclare global variable names
=head1 DESCRIPTION
+NOTE: The functionality provided by this pragma has been superseded
+by C<our> declarations, available in Perl v5.6.0 or later. See
+L<perlfunc/our>.
+
This will predeclare all the variables whose names are
in the list, allowing you to use them under "use strict", and
disabling any typo warnings.
diff --git a/lib/warning.pm b/lib/warning.pm
deleted file mode 100644
index 1df83d946f..0000000000
--- a/lib/warning.pm
+++ /dev/null
@@ -1,163 +0,0 @@
-
-# This file was created by warning.pl
-# Any changes made here will be lost.
-#
-
-package warning;
-
-=head1 NAME
-
-warning - Perl pragma to control optional warnings
-
-=head1 SYNOPSIS
-
- use warning;
- no warning;
-
- use warning "all";
- no warning "all";
-
-=head1 DESCRIPTION
-
-If no import list is supplied, all possible warnings are either enabled
-or disabled.
-
-See L<perlmod/Pragmatic Modules> and L<perllexwarn>.
-
-
-=cut
-
-use Carp ;
-
-%Bits = (
- 'all' => "\x55\x55\x55\x55\x55\x55\x55\x55\x55", # [0..35]
- 'ambiguous' => "\x00\x00\x00\x00\x01\x00\x00\x00\x00", # [16]
- 'closed' => "\x04\x00\x00\x00\x00\x00\x00\x00\x00", # [1]
- 'closure' => "\x00\x00\x00\x00\x00\x00\x10\x00\x00", # [26]
- 'debugging' => "\x00\x00\x00\x01\x00\x00\x00\x00\x00", # [12]
- 'deprecated' => "\x00\x00\x00\x00\x04\x00\x00\x00\x00", # [17]
- 'exec' => "\x10\x00\x00\x00\x00\x00\x00\x00\x00", # [2]
- 'inplace' => "\x00\x00\x00\x04\x00\x00\x00\x00\x00", # [13]
- 'internal' => "\x00\x00\x00\x10\x00\x00\x00\x00\x00", # [14]
- 'io' => "\x55\x05\x00\x00\x00\x00\x00\x00\x00", # [0..5]
- 'misc' => "\x00\x10\x00\x00\x00\x00\x00\x00\x00", # [6]
- 'newline' => "\x40\x00\x00\x00\x00\x00\x00\x00\x00", # [3]
- 'numeric' => "\x00\x40\x00\x00\x00\x00\x00\x00\x00", # [7]
- 'octal' => "\x00\x00\x00\x00\x10\x00\x00\x00\x00", # [18]
- 'once' => "\x00\x00\x01\x00\x00\x00\x00\x00\x00", # [8]
- 'parenthesis' => "\x00\x00\x00\x00\x40\x00\x00\x00\x00", # [19]
- 'pipe' => "\x00\x01\x00\x00\x00\x00\x00\x00\x00", # [4]
- 'precedence' => "\x00\x00\x00\x00\x00\x01\x00\x00\x00", # [20]
- 'printf' => "\x00\x00\x00\x00\x00\x04\x00\x00\x00", # [21]
- 'recursion' => "\x00\x00\x04\x00\x00\x00\x00\x00\x00", # [9]
- 'redefine' => "\x00\x00\x10\x00\x00\x00\x00\x00\x00", # [10]
- 'reserved' => "\x00\x00\x00\x00\x00\x10\x00\x00\x00", # [22]
- 'semicolon' => "\x00\x00\x00\x00\x00\x40\x00\x00\x00", # [23]
- 'severe' => "\x00\x00\x40\x15\x00\x00\x00\x00\x00", # [11..14]
- 'signal' => "\x00\x00\x00\x00\x00\x00\x40\x00\x00", # [27]
- 'substr' => "\x00\x00\x00\x00\x00\x00\x00\x01\x00", # [28]
- 'syntax' => "\x00\x00\x00\x40\x55\x55\x00\x00\x00", # [15..23]
- 'taint' => "\x00\x00\x00\x00\x00\x00\x00\x04\x00", # [29]
- 'uninitialized' => "\x00\x00\x00\x00\x00\x00\x01\x00\x00", # [24]
- 'unopened' => "\x00\x04\x00\x00\x00\x00\x00\x00\x00", # [5]
- 'unsafe' => "\x00\x00\x00\x00\x00\x00\x54\x55\x00", # [25..31]
- 'untie' => "\x00\x00\x00\x00\x00\x00\x00\x10\x00", # [30]
- 'utf8' => "\x00\x00\x00\x00\x00\x00\x00\x40\x00", # [31]
- 'void' => "\x00\x00\x00\x00\x00\x00\x00\x00\x01", # [32]
- );
-
-%DeadBits = (
- 'all' => "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa", # [0..35]
- 'ambiguous' => "\x00\x00\x00\x00\x02\x00\x00\x00\x00", # [16]
- 'closed' => "\x08\x00\x00\x00\x00\x00\x00\x00\x00", # [1]
- 'closure' => "\x00\x00\x00\x00\x00\x00\x20\x00\x00", # [26]
- 'debugging' => "\x00\x00\x00\x02\x00\x00\x00\x00\x00", # [12]
- 'deprecated' => "\x00\x00\x00\x00\x08\x00\x00\x00\x00", # [17]
- 'exec' => "\x20\x00\x00\x00\x00\x00\x00\x00\x00", # [2]
- 'inplace' => "\x00\x00\x00\x08\x00\x00\x00\x00\x00", # [13]
- 'internal' => "\x00\x00\x00\x20\x00\x00\x00\x00\x00", # [14]
- 'io' => "\xaa\x0a\x00\x00\x00\x00\x00\x00\x00", # [0..5]
- 'misc' => "\x00\x20\x00\x00\x00\x00\x00\x00\x00", # [6]
- 'newline' => "\x80\x00\x00\x00\x00\x00\x00\x00\x00", # [3]
- 'numeric' => "\x00\x80\x00\x00\x00\x00\x00\x00\x00", # [7]
- 'octal' => "\x00\x00\x00\x00\x20\x00\x00\x00\x00", # [18]
- 'once' => "\x00\x00\x02\x00\x00\x00\x00\x00\x00", # [8]
- 'parenthesis' => "\x00\x00\x00\x00\x80\x00\x00\x00\x00", # [19]
- 'pipe' => "\x00\x02\x00\x00\x00\x00\x00\x00\x00", # [4]
- 'precedence' => "\x00\x00\x00\x00\x00\x02\x00\x00\x00", # [20]
- 'printf' => "\x00\x00\x00\x00\x00\x08\x00\x00\x00", # [21]
- 'recursion' => "\x00\x00\x08\x00\x00\x00\x00\x00\x00", # [9]
- 'redefine' => "\x00\x00\x20\x00\x00\x00\x00\x00\x00", # [10]
- 'reserved' => "\x00\x00\x00\x00\x00\x20\x00\x00\x00", # [22]
- 'semicolon' => "\x00\x00\x00\x00\x00\x80\x00\x00\x00", # [23]
- 'severe' => "\x00\x00\x80\x2a\x00\x00\x00\x00\x00", # [11..14]
- 'signal' => "\x00\x00\x00\x00\x00\x00\x80\x00\x00", # [27]
- 'substr' => "\x00\x00\x00\x00\x00\x00\x00\x02\x00", # [28]
- 'syntax' => "\x00\x00\x00\x80\xaa\xaa\x00\x00\x00", # [15..23]
- 'taint' => "\x00\x00\x00\x00\x00\x00\x00\x08\x00", # [29]
- 'uninitialized' => "\x00\x00\x00\x00\x00\x00\x02\x00\x00", # [24]
- 'unopened' => "\x00\x08\x00\x00\x00\x00\x00\x00\x00", # [5]
- 'unsafe' => "\x00\x00\x00\x00\x00\x00\xa8\xaa\x00", # [25..31]
- 'untie' => "\x00\x00\x00\x00\x00\x00\x00\x20\x00", # [30]
- 'utf8' => "\x00\x00\x00\x00\x00\x00\x00\x80\x00", # [31]
- 'void' => "\x00\x00\x00\x00\x00\x00\x00\x00\x02", # [32]
- );
-
-
-sub bits {
- my $mask ;
- my $catmask ;
- my $fatal = 0 ;
- foreach my $word (@_) {
- if ($word eq 'FATAL')
- { $fatal = 1 }
- elsif ($catmask = $Bits{$word}) {
- $mask |= $catmask ;
- $mask |= $DeadBits{$word} if $fatal ;
- }
- else
- { croak "unknown warning category '$word'" }
- }
-
- return $mask ;
-}
-
-sub import {
- shift;
- $^B |= bits(@_ ? @_ : 'all') ;
-}
-
-sub unimport {
- shift;
- $^B &= ~ bits(@_ ? @_ : 'all') ;
-}
-
-
-sub make_fatal
-{
- my $self = shift ;
- my $bitmask = $self->bits(@_) ;
- $SIG{__WARN__} =
- sub
- {
- die @_ if $^B & $bitmask ;
- warn @_
- } ;
-}
-
-sub bitmask
-{
- return $^B ;
-}
-
-sub enabled
-{
- my $string = shift ;
-
- return 1
- if $bits{$string} && $^B & $bits{$string} ;
-
- return 0 ;
-}
-
-1;