diff options
author | Marc Green <marcgreen@cpan.org> | 2011-06-28 07:55:00 -0400 |
---|---|---|
committer | Marc Green <marcgreen@cpan.org> | 2011-10-31 13:26:41 -0400 |
commit | 7b7d94fd07e15987d4d1afb12a16b635c370a159 (patch) | |
tree | 32ee21a60622b8cc7cfeb48b978948685eda70a1 /ext/Pod-Html | |
parent | baf7658bacfa659cdab08050470b20ebd5973384 (diff) | |
download | perl-7b7d94fd07e15987d4d1afb12a16b635c370a159.tar.gz |
Remove /usr/share/perl presence dependency
Diffstat (limited to 'ext/Pod-Html')
-rw-r--r-- | ext/Pod-Html/t/htmlcrossref.t | 10 | ||||
-rw-r--r-- | ext/Pod-Html/t/htmldir.t | 21 | ||||
-rw-r--r-- | ext/Pod-Html/t/htmldir3.t | 10 | ||||
-rw-r--r-- | ext/Pod-Html/test.lib/perlpodspec.pod | 1899 | ||||
-rw-r--r-- | ext/Pod-Html/test.lib/perlvar.pod | 1737 |
5 files changed, 3661 insertions, 16 deletions
diff --git a/ext/Pod-Html/t/htmlcrossref.t b/ext/Pod-Html/t/htmlcrossref.t index 96654cac88..2ef56b02ab 100644 --- a/ext/Pod-Html/t/htmlcrossref.t +++ b/ext/Pod-Html/t/htmlcrossref.t @@ -13,10 +13,10 @@ use Cwd; # XXX Is there a better way to do this? I need a relative url to cwd because of # --podpath and --podroot # Remove root dir from path -my $cwd = substr(Cwd::cwd(), length(File::Spec->rootdir())); +my $relcwd = substr(Cwd::cwd(), length(File::Spec->rootdir())); convert_n_test("htmlcrossref", "html cross references", - "--podpath=$cwd/t:usr/share/perl", + "--podpath=$relcwd/t:$relcwd/test.lib", "--podroot=/", "--quiet", ); @@ -59,15 +59,15 @@ __DATA__ <p><a href="#non-existant-section">"non existant section"</a></p> -<p><a href="/usr/share/perl/5.10.1/pod/perlvar.html">perlvar</a></p> +<p><a href="[CURRENTWORKINGDIRECTORY]/test.lib/perlvar.html">perlvar</a></p> -<p><a href="/usr/share/perl/5.10.1/pod/perlvar.html#pod-">"$"" in perlvar</a></p> +<p><a href="[CURRENTWORKINGDIRECTORY]/test.lib/perlvar.html#pod-">"$"" in perlvar</a></p> <p><code>perlvar</code></p> <p><code>perlvar/$"</code></p> -<p><a href="/usr/share/perl/5.10.1/pod/perlpodspec.html#First:">"First:" in perlpodspec</a></p> +<p><a href="[CURRENTWORKINGDIRECTORY]/test.lib/perlpodspec.html#First:">"First:" in perlpodspec</a></p> <p><code>perlpodspec/First:</code></p> diff --git a/ext/Pod-Html/t/htmldir.t b/ext/Pod-Html/t/htmldir.t index 2552c63dac..3d3a28d11a 100644 --- a/ext/Pod-Html/t/htmldir.t +++ b/ext/Pod-Html/t/htmldir.t @@ -5,7 +5,7 @@ BEGIN { } use strict; -use Test::More tests => 2; +use Test::More tests => 3; use File::Spec; use Cwd; @@ -13,12 +13,12 @@ use Cwd; # XXX Is there a better way to do this? I need a relative url to cwd because of # --podpath and --podroot # Remove root dir from path -my $cwd = substr(Cwd::cwd(), length(File::Spec->rootdir())); +my $relcwd = substr(Cwd::cwd(), length(File::Spec->rootdir())); my $data_pos = tell DATA; # to read <DATA> twice convert_n_test("htmldir", "test --htmldir and --htmlroot 1a", - "--podpath=$cwd/t:usr/share/perl", + "--podpath=$relcwd/t:$relcwd/test.lib", "--podroot=/", # "--podpath=t", # "--htmlroot=/test/dir", @@ -28,9 +28,18 @@ convert_n_test("htmldir", "test --htmldir and --htmlroot 1a", seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same) convert_n_test("htmldir", "test --htmldir and --htmlroot 1b", - "--podpath=$cwd:usr/share/perl", + "--podpath=$relcwd", "--podroot=/", - "--htmldir=$cwd/t", + "--htmldir=$relcwd/t", + "--htmlroot=/", +); + +seek DATA, $data_pos, 0; # to read <DATA> thrice (expected output is the same) + +convert_n_test("htmldir", "test --htmldir and --htmlroot 1c", + "--podpath=t:test.lib", + "--podroot=$relcwd", + "--htmldir=$relcwd", "--htmlroot=/", ); @@ -61,7 +70,7 @@ __DATA__ <p>Normal text, a <a>link</a> to nowhere,</p> -<p>a link to <a href="/usr/share/perl/5.10.1/pod/perlvar.html">perlvar</a>,</p> +<p>a link to <a href="[CURRENTWORKINGDIRECTORY]/test.lib/perlvar.html">perlvar</a>,</p> <p><a href="[CURRENTWORKINGDIRECTORY]/t/htmlescp.html">htmlescp</a>,</p> diff --git a/ext/Pod-Html/t/htmldir3.t b/ext/Pod-Html/t/htmldir3.t index 3ea936301a..0b5e6ff14e 100644 --- a/ext/Pod-Html/t/htmldir3.t +++ b/ext/Pod-Html/t/htmldir3.t @@ -13,20 +13,20 @@ use Cwd; # XXX Is there a better way to do this? I need a relative url to cwd because of # --podpath and --podroot # Remove root dir from path -my $cwd = substr(Cwd::cwd(), length(File::Spec->rootdir())); +my $relcwd = substr(Cwd::cwd(), length(File::Spec->rootdir())); my $data_pos = tell DATA; # to read <DATA> twice convert_n_test("htmldir3", "test --htmldir and --htmlroot 3a", - "--podpath=$cwd", + "--podpath=$relcwd", "--podroot=/", - "--htmldir=/$cwd/t/", # test removal trailing slash + "--htmldir=/$relcwd/t/", # test removal trailing slash ); seek DATA, $data_pos, 0; # to read <DATA> twice (expected output is the same) convert_n_test("htmldir3", "test --htmldir and --htmlroot 3b", - "--podpath=$cwd/t", + "--podpath=$relcwd/t", "--podroot=/", "--htmldir=t", "--outfile=t/htmldir4.html", @@ -59,7 +59,7 @@ __DATA__ <p>Normal text, a <a>link</a> to nowhere,</p> -<p>a link to <a>perlvar</a>,</p> +<p>a link to <a href="[RELCURRENTWORKINGDIRECTORY]/test.lib/perlvar.html">perlvar</a>,</p> <p><a href="[RELCURRENTWORKINGDIRECTORY]/t/htmlescp.html">htmlescp</a>,</p> diff --git a/ext/Pod-Html/test.lib/perlpodspec.pod b/ext/Pod-Html/test.lib/perlpodspec.pod new file mode 100644 index 0000000000..8973a7080c --- /dev/null +++ b/ext/Pod-Html/test.lib/perlpodspec.pod @@ -0,0 +1,1899 @@ + +=head1 NAME + +perlpodspec - Plain Old Documentation: format specification and notes + +=head1 DESCRIPTION + +This document is detailed notes on the Pod markup language. Most +people will only have to read L<perlpod|perlpod> to know how to write +in Pod, but this document may answer some incidental questions to do +with parsing and rendering Pod. + +In this document, "must" / "must not", "should" / +"should not", and "may" have their conventional (cf. RFC 2119) +meanings: "X must do Y" means that if X doesn't do Y, it's against +this specification, and should really be fixed. "X should do Y" +means that it's recommended, but X may fail to do Y, if there's a +good reason. "X may do Y" is merely a note that X can do Y at +will (although it is up to the reader to detect any connotation of +"and I think it would be I<nice> if X did Y" versus "it wouldn't +really I<bother> me if X did Y"). + +Notably, when I say "the parser should do Y", the +parser may fail to do Y, if the calling application explicitly +requests that the parser I<not> do Y. I often phrase this as +"the parser should, by default, do Y." This doesn't I<require> +the parser to provide an option for turning off whatever +feature Y is (like expanding tabs in verbatim paragraphs), although +it implicates that such an option I<may> be provided. + +=head1 Pod Definitions + +Pod is embedded in files, typically Perl source files -- although you +can write a file that's nothing but Pod. + +A B<line> in a file consists of zero or more non-newline characters, +terminated by either a newline or the end of the file. + +A B<newline sequence> is usually a platform-dependent concept, but +Pod parsers should understand it to mean any of CR (ASCII 13), LF +(ASCII 10), or a CRLF (ASCII 13 followed immediately by ASCII 10), in +addition to any other system-specific meaning. The first CR/CRLF/LF +sequence in the file may be used as the basis for identifying the +newline sequence for parsing the rest of the file. + +A B<blank line> is a line consisting entirely of zero or more spaces +(ASCII 32) or tabs (ASCII 9), and terminated by a newline or end-of-file. +A B<non-blank line> is a line containing one or more characters other +than space or tab (and terminated by a newline or end-of-file). + +(I<Note:> Many older Pod parsers did not accept a line consisting of +spaces/tabs and then a newline as a blank line -- the only lines they +considered blank were lines consisting of I<no characters at all>, +terminated by a newline.) + +B<Whitespace> is used in this document as a blanket term for spaces, +tabs, and newline sequences. (By itself, this term usually refers +to literal whitespace. That is, sequences of whitespace characters +in Pod source, as opposed to "EE<lt>32>", which is a formatting +code that I<denotes> a whitespace character.) + +A B<Pod parser> is a module meant for parsing Pod (regardless of +whether this involves calling callbacks or building a parse tree or +directly formatting it). A B<Pod formatter> (or B<Pod translator>) +is a module or program that converts Pod to some other format (HTML, +plaintext, TeX, PostScript, RTF). A B<Pod processor> might be a +formatter or translator, or might be a program that does something +else with the Pod (like counting words, scanning for index points, +etc.). + +Pod content is contained in B<Pod blocks>. A Pod block starts with a +line that matches <m/\A=[a-zA-Z]/>, and continues up to the next line +that matches C<m/\A=cut/> -- or up to the end of the file, if there is +no C<m/\A=cut/> line. + +=for comment + The current perlsyn says: + [beginquote] + Note that pod translators should look at only paragraphs beginning + with a pod directive (it makes parsing easier), whereas the compiler + actually knows to look for pod escapes even in the middle of a + paragraph. This means that the following secret stuff will be ignored + by both the compiler and the translators. + $a=3; + =secret stuff + warn "Neither POD nor CODE!?" + =cut back + print "got $a\n"; + You probably shouldn't rely upon the warn() being podded out forever. + Not all pod translators are well-behaved in this regard, and perhaps + the compiler will become pickier. + [endquote] + I think that those paragraphs should just be removed; paragraph-based + parsing seems to have been largely abandoned, because of the hassle + with non-empty blank lines messing up what people meant by "paragraph". + Even if the "it makes parsing easier" bit were especially true, + it wouldn't be worth the confusion of having perl and pod2whatever + actually disagree on what can constitute a Pod block. + +Within a Pod block, there are B<Pod paragraphs>. A Pod paragraph +consists of non-blank lines of text, separated by one or more blank +lines. + +For purposes of Pod processing, there are four types of paragraphs in +a Pod block: + +=over + +=item * + +A command paragraph (also called a "directive"). The first line of +this paragraph must match C<m/\A=[a-zA-Z]/>. Command paragraphs are +typically one line, as in: + + =head1 NOTES + + =item * + +But they may span several (non-blank) lines: + + =for comment + Hm, I wonder what it would look like if + you tried to write a BNF for Pod from this. + + =head3 Dr. Strangelove, or: How I Learned to + Stop Worrying and Love the Bomb + +I<Some> command paragraphs allow formatting codes in their content +(i.e., after the part that matches C<m/\A=[a-zA-Z]\S*\s*/>), as in: + + =head1 Did You Remember to C<use strict;>? + +In other words, the Pod processing handler for "head1" will apply the +same processing to "Did You Remember to CE<lt>use strict;>?" that it +would to an ordinary paragraph -- i.e., formatting codes (like +"CE<lt>...>") are parsed and presumably formatted appropriately, and +whitespace in the form of literal spaces and/or tabs is not +significant. + +=item * + +A B<verbatim paragraph>. The first line of this paragraph must be a +literal space or tab, and this paragraph must not be inside a "=begin +I<identifier>", ... "=end I<identifier>" sequence unless +"I<identifier>" begins with a colon (":"). That is, if a paragraph +starts with a literal space or tab, but I<is> inside a +"=begin I<identifier>", ... "=end I<identifier>" region, then it's +a data paragraph, unless "I<identifier>" begins with a colon. + +Whitespace I<is> significant in verbatim paragraphs (although, in +processing, tabs are probably expanded). + +=item * + +An B<ordinary paragraph>. A paragraph is an ordinary paragraph +if its first line matches neither C<m/\A=[a-zA-Z]/> nor +C<m/\A[ \t]/>, I<and> if it's not inside a "=begin I<identifier>", +... "=end I<identifier>" sequence unless "I<identifier>" begins with +a colon (":"). + +=item * + +A B<data paragraph>. This is a paragraph that I<is> inside a "=begin +I<identifier>" ... "=end I<identifier>" sequence where +"I<identifier>" does I<not> begin with a literal colon (":"). In +some sense, a data paragraph is not part of Pod at all (i.e., +effectively it's "out-of-band"), since it's not subject to most kinds +of Pod parsing; but it is specified here, since Pod +parsers need to be able to call an event for it, or store it in some +form in a parse tree, or at least just parse I<around> it. + +=back + +For example: consider the following paragraphs: + + # <- that's the 0th column + + =head1 Foo + + Stuff + + $foo->bar + + =cut + +Here, "=head1 Foo" and "=cut" are command paragraphs because the first +line of each matches C<m/\A=[a-zA-Z]/>. "I<[space][space]>$foo->bar" +is a verbatim paragraph, because its first line starts with a literal +whitespace character (and there's no "=begin"..."=end" region around). + +The "=begin I<identifier>" ... "=end I<identifier>" commands stop +paragraphs that they surround from being parsed as ordinary or verbatim +paragraphs, if I<identifier> doesn't begin with a colon. This +is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +=head1 Pod Commands + +This section is intended to supplement and clarify the discussion in +L<perlpod/"Command Paragraph">. These are the currently recognized +Pod commands: + +=over + +=item "=head1", "=head2", "=head3", "=head4" + +This command indicates that the text in the remainder of the paragraph +is a heading. That text may contain formatting codes. Examples: + + =head1 Object Attributes + + =head3 What B<Not> to Do! + +=item "=pod" + +This command indicates that this paragraph begins a Pod block. (If we +are already in the middle of a Pod block, this command has no effect at +all.) If there is any text in this command paragraph after "=pod", +it must be ignored. Examples: + + =pod + + This is a plain Pod paragraph. + + =pod This text is ignored. + +=item "=cut" + +This command indicates that this line is the end of this previously +started Pod block. If there is any text after "=cut" on the line, it must be +ignored. Examples: + + =cut + + =cut The documentation ends here. + + =cut + # This is the first line of program text. + sub foo { # This is the second. + +It is an error to try to I<start> a Pod block with a "=cut" command. In +that case, the Pod processor must halt parsing of the input file, and +must by default emit a warning. + +=item "=over" + +This command indicates that this is the start of a list/indent +region. If there is any text following the "=over", it must consist +of only a nonzero positive numeral. The semantics of this numeral is +explained in the L</"About =over...=back Regions"> section, further +below. Formatting codes are not expanded. Examples: + + =over 3 + + =over 3.5 + + =over + +=item "=item" + +This command indicates that an item in a list begins here. Formatting +codes are processed. The semantics of the (optional) text in the +remainder of this paragraph are +explained in the L</"About =over...=back Regions"> section, further +below. Examples: + + =item + + =item * + + =item * + + =item 14 + + =item 3. + + =item C<< $thing->stuff(I<dodad>) >> + + =item For transporting us beyond seas to be tried for pretended + offenses + + =item He is at this time transporting large armies of foreign + mercenaries to complete the works of death, desolation and + tyranny, already begun with circumstances of cruelty and perfidy + scarcely paralleled in the most barbarous ages, and totally + unworthy the head of a civilized nation. + +=item "=back" + +This command indicates that this is the end of the region begun +by the most recent "=over" command. It permits no text after the +"=back" command. + +=item "=begin formatname" + +This marks the following paragraphs (until the matching "=end +formatname") as being for some special kind of processing. Unless +"formatname" begins with a colon, the contained non-command +paragraphs are data paragraphs. But if "formatname" I<does> begin +with a colon, then non-command paragraphs are ordinary paragraphs +or data paragraphs. This is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +It is advised that formatnames match the regexp +C<m/\A:?[-a-zA-Z0-9_]+\z/>. Implementors should anticipate future +expansion in the semantics and syntax of the first parameter +to "=begin"/"=end"/"=for". + +=item "=end formatname" + +This marks the end of the region opened by the matching +"=begin formatname" region. If "formatname" is not the formatname +of the most recent open "=begin formatname" region, then this +is an error, and must generate an error message. This +is discussed in detail in the section +L</About Data Paragraphs and "=beginE<sol>=end" Regions>. + +=item "=for formatname text..." + +This is synonymous with: + + =begin formatname + + text... + + =end formatname + +That is, it creates a region consisting of a single paragraph; that +paragraph is to be treated as a normal paragraph if "formatname" +begins with a ":"; if "formatname" I<doesn't> begin with a colon, +then "text..." will constitute a data paragraph. There is no way +to use "=for formatname text..." to express "text..." as a verbatim +paragraph. + +=item "=encoding encodingname" + +This command, which should occur early in the document (at least +before any non-US-ASCII data!), declares that this document is +encoded in the encoding I<encodingname>, which must be +an encoding name that L<Encode> recognizes. (Encode's list +of supported encodings, in L<Encode::Supported>, is useful here.) +If the Pod parser cannot decode the declared encoding, it +should emit a warning and may abort parsing the document +altogether. + +A document having more than one "=encoding" line should be +considered an error. Pod processors may silently tolerate this if +the not-first "=encoding" lines are just duplicates of the +first one (e.g., if there's a "=encoding utf8" line, and later on +another "=encoding utf8" line). But Pod processors should complain if +there are contradictory "=encoding" lines in the same document +(e.g., if there is a "=encoding utf8" early in the document and +"=encoding big5" later). Pod processors that recognize BOMs +may also complain if they see an "=encoding" line +that contradicts the BOM (e.g., if a document with a UTF-16LE +BOM has an "=encoding shiftjis" line). + +=back + +If a Pod processor sees any command other than the ones listed +above (like "=head", or "=haed1", or "=stuff", or "=cuttlefish", +or "=w123"), that processor must by default treat this as an +error. It must not process the paragraph beginning with that +command, must by default warn of this as an error, and may +abort the parse. A Pod parser may allow a way for particular +applications to add to the above list of known commands, and to +stipulate, for each additional command, whether formatting +codes should be processed. + +Future versions of this specification may add additional +commands. + + + +=head1 Pod Formatting Codes + +(Note that in previous drafts of this document and of perlpod, +formatting codes were referred to as "interior sequences", and +this term may still be found in the documentation for Pod parsers, +and in error messages from Pod processors.) + +There are two syntaxes for formatting codes: + +=over + +=item * + +A formatting code starts with a capital letter (just US-ASCII [A-Z]) +followed by a "<", any number of characters, and ending with the first +matching ">". Examples: + + That's what I<you> think! + + What's C<dump()> for? + + X<C<chmod> and C<unlink()> Under Different Operating Systems> + +=item * + +A formatting code starts with a capital letter (just US-ASCII [A-Z]) +followed by two or more "<"'s, one or more whitespace characters, +any number of characters, one or more whitespace characters, +and ending with the first matching sequence of two or more ">"'s, where +the number of ">"'s equals the number of "<"'s in the opening of this +formatting code. Examples: + + That's what I<< you >> think! + + C<<< open(X, ">>thing.dat") || die $! >>> + + B<< $foo->bar(); >> + +With this syntax, the whitespace character(s) after the "CE<lt><<" +and before the ">>" (or whatever letter) are I<not> renderable -- they +do not signify whitespace, are merely part of the formatting codes +themselves. That is, these are all synonymous: + + C<thing> + C<< thing >> + C<< thing >> + C<<< thing >>> + C<<<< + thing + >>>> + +and so on. + +=back + +In parsing Pod, a notably tricky part is the correct parsing of +(potentially nested!) formatting codes. Implementors should +consult the code in the C<parse_text> routine in Pod::Parser as an +example of a correct implementation. + +=over + +=item C<IE<lt>textE<gt>> -- italic text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<BE<lt>textE<gt>> -- bold text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<CE<lt>codeE<gt>> -- code text + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<FE<lt>filenameE<gt>> -- style for filenames + +See the brief discussion in L<perlpod/"Formatting Codes">. + +=item C<XE<lt>topic nameE<gt>> -- an index entry + +See the brief discussion in L<perlpod/"Formatting Codes">. + +This code is unusual in that most formatters completely discard +this code and its content. Other formatters will render it with +invisible codes that can be used in building an index of +the current document. + +=item C<ZE<lt>E<gt>> -- a null (zero-effect) formatting code + +Discussed briefly in L<perlpod/"Formatting Codes">. + +This code is unusual is that it should have no content. That is, +a processor may complain if it sees C<ZE<lt>potatoesE<gt>>. Whether +or not it complains, the I<potatoes> text should ignored. + +=item C<LE<lt>nameE<gt>> -- a hyperlink + +The complicated syntaxes of this code are discussed at length in +L<perlpod/"Formatting Codes">, and implementation details are +discussed below, in L</"About LE<lt>...E<gt> Codes">. Parsing the +contents of LE<lt>content> is tricky. Notably, the content has to be +checked for whether it looks like a URL, or whether it has to be split +on literal "|" and/or "/" (in the right order!), and so on, +I<before> EE<lt>...> codes are resolved. + +=item C<EE<lt>escapeE<gt>> -- a character escape + +See L<perlpod/"Formatting Codes">, and several points in +L</Notes on Implementing Pod Processors>. + +=item C<SE<lt>textE<gt>> -- text contains non-breaking spaces + +This formatting code is syntactically simple, but semantically +complex. What it means is that each space in the printable +content of this code signifies a non-breaking space. + +Consider: + + C<$x ? $y : $z> + + S<C<$x ? $y : $z>> + +Both signify the monospace (c[ode] style) text consisting of +"$x", one space, "?", one space, ":", one space, "$z". The +difference is that in the latter, with the S code, those spaces +are not "normal" spaces, but instead are non-breaking spaces. + +=back + + +If a Pod processor sees any formatting code other than the ones +listed above (as in "NE<lt>...>", or "QE<lt>...>", etc.), that +processor must by default treat this as an error. +A Pod parser may allow a way for particular +applications to add to the above list of known formatting codes; +a Pod parser might even allow a way to stipulate, for each additional +command, whether it requires some form of special processing, as +LE<lt>...> does. + +Future versions of this specification may add additional +formatting codes. + +Historical note: A few older Pod processors would not see a ">" as +closing a "CE<lt>" code, if the ">" was immediately preceded by +a "-". This was so that this: + + C<$foo->bar> + +would parse as equivalent to this: + + C<$foo-E<gt>bar> + +instead of as equivalent to a "C" formatting code containing +only "$foo-", and then a "bar>" outside the "C" formatting code. This +problem has since been solved by the addition of syntaxes like this: + + C<< $foo->bar >> + +Compliant parsers must not treat "->" as special. + +Formatting codes absolutely cannot span paragraphs. If a code is +opened in one paragraph, and no closing code is found by the end of +that paragraph, the Pod parser must close that formatting code, +and should complain (as in "Unterminated I code in the paragraph +starting at line 123: 'Time objects are not...'"). So these +two paragraphs: + + I<I told you not to do this! + + Don't make me say it again!> + +...must I<not> be parsed as two paragraphs in italics (with the I +code starting in one paragraph and starting in another.) Instead, +the first paragraph should generate a warning, but that aside, the +above code must parse as if it were: + + I<I told you not to do this!> + + Don't make me say it again!E<gt> + +(In SGMLish jargon, all Pod commands are like block-level +elements, whereas all Pod formatting codes are like inline-level +elements.) + + + +=head1 Notes on Implementing Pod Processors + +The following is a long section of miscellaneous requirements +and suggestions to do with Pod processing. + +=over + +=item * + +Pod formatters should tolerate lines in verbatim blocks that are of +any length, even if that means having to break them (possibly several +times, for very long lines) to avoid text running off the side of the +page. Pod formatters may warn of such line-breaking. Such warnings +are particularly appropriate for lines are over 100 characters long, which +are usually not intentional. + +=item * + +Pod parsers must recognize I<all> of the three well-known newline +formats: CR, LF, and CRLF. See L<perlport|perlport>. + +=item * + +Pod parsers should accept input lines that are of any length. + +=item * + +Since Perl recognizes a Unicode Byte Order Mark at the start of files +as signaling that the file is Unicode encoded as in UTF-16 (whether +big-endian or little-endian) or UTF-8, Pod parsers should do the +same. Otherwise, the character encoding should be understood as +being UTF-8 if the first highbit byte sequence in the file seems +valid as a UTF-8 sequence, or otherwise as Latin-1. + +Future versions of this specification may specify +how Pod can accept other encodings. Presumably treatment of other +encodings in Pod parsing would be as in XML parsing: whatever the +encoding declared by a particular Pod file, content is to be +stored in memory as Unicode characters. + +=item * + +The well known Unicode Byte Order Marks are as follows: if the +file begins with the two literal byte values 0xFE 0xFF, this is +the BOM for big-endian UTF-16. If the file begins with the two +literal byte value 0xFF 0xFE, this is the BOM for little-endian +UTF-16. If the file begins with the three literal byte values +0xEF 0xBB 0xBF, this is the BOM for UTF-8. + +=for comment + use bytes; print map sprintf(" 0x%02X", ord $_), split '', "\x{feff}"; + 0xEF 0xBB 0xBF + +=for comment + If toke.c is modified to support UTF-32, add mention of those here. + +=item * + +A naive but sufficient heuristic for testing the first highbit +byte-sequence in a BOM-less file (whether in code or in Pod!), to see +whether that sequence is valid as UTF-8 (RFC 2279) is to check whether +that the first byte in the sequence is in the range 0xC0 - 0xFD +I<and> whether the next byte is in the range +0x80 - 0xBF. If so, the parser may conclude that this file is in +UTF-8, and all highbit sequences in the file should be assumed to +be UTF-8. Otherwise the parser should treat the file as being +in Latin-1. In the unlikely circumstance that the first highbit +sequence in a truly non-UTF-8 file happens to appear to be UTF-8, one +can cater to our heuristic (as well as any more intelligent heuristic) +by prefacing that line with a comment line containing a highbit +sequence that is clearly I<not> valid as UTF-8. A line consisting +of simply "#", an e-acute, and any non-highbit byte, +is sufficient to establish this file's encoding. + +=for comment + If/WHEN some brave soul makes these heuristics into a generic + text-file class (or PerlIO layer?), we can presumably delete + mention of these icky details from this file, and can instead + tell people to just use appropriate class/layer. + Auto-recognition of newline sequences would be another desirable + feature of such a class/layer. + HINT HINT HINT. + +=for comment + "The probability that a string of characters + in any other encoding appears as valid UTF-8 is low" - RFC2279 + +=item * + +This document's requirements and suggestions about encodings +do not apply to Pod processors running on non-ASCII platforms, +notably EBCDIC platforms. + +=item * + +Pod processors must treat a "=for [label] [content...]" paragraph as +meaning the same thing as a "=begin [label]" paragraph, content, and +an "=end [label]" paragraph. (The parser may conflate these two +constructs, or may leave them distinct, in the expectation that the +formatter will nevertheless treat them the same.) + +=item * + +When rendering Pod to a format that allows comments (i.e., to nearly +any format other than plaintext), a Pod formatter must insert comment +text identifying its name and version number, and the name and +version numbers of any modules it might be using to process the Pod. +Minimal examples: + + %% POD::Pod2PS v3.14159, using POD::Parser v1.92 + + <!-- Pod::HTML v3.14159, using POD::Parser v1.92 --> + + {\doccomm generated by Pod::Tree::RTF 3.14159 using Pod::Tree 1.08} + + .\" Pod::Man version 3.14159, using POD::Parser version 1.92 + +Formatters may also insert additional comments, including: the +release date of the Pod formatter program, the contact address for +the author(s) of the formatter, the current time, the name of input +file, the formatting options in effect, version of Perl used, etc. + +Formatters may also choose to note errors/warnings as comments, +besides or instead of emitting them otherwise (as in messages to +STDERR, or C<die>ing). + +=item * + +Pod parsers I<may> emit warnings or error messages ("Unknown E code +EE<lt>zslig>!") to STDERR (whether through printing to STDERR, or +C<warn>ing/C<carp>ing, or C<die>ing/C<croak>ing), but I<must> allow +suppressing all such STDERR output, and instead allow an option for +reporting errors/warnings +in some other way, whether by triggering a callback, or noting errors +in some attribute of the document object, or some similarly unobtrusive +mechanism -- or even by appending a "Pod Errors" section to the end of +the parsed form of the document. + +=item * + +In cases of exceptionally aberrant documents, Pod parsers may abort the +parse. Even then, using C<die>ing/C<croak>ing is to be avoided; where +possible, the parser library may simply close the input file +and add text like "*** Formatting Aborted ***" to the end of the +(partial) in-memory document. + +=item * + +In paragraphs where formatting codes (like EE<lt>...>, BE<lt>...>) +are understood (i.e., I<not> verbatim paragraphs, but I<including> +ordinary paragraphs, and command paragraphs that produce renderable +text, like "=head1"), literal whitespace should generally be considered +"insignificant", in that one literal space has the same meaning as any +(nonzero) number of literal spaces, literal newlines, and literal tabs +(as long as this produces no blank lines, since those would terminate +the paragraph). Pod parsers should compact literal whitespace in each +processed paragraph, but may provide an option for overriding this +(since some processing tasks do not require it), or may follow +additional special rules (for example, specially treating +period-space-space or period-newline sequences). + +=item * + +Pod parsers should not, by default, try to coerce apostrophe (') and +quote (") into smart quotes (little 9's, 66's, 99's, etc), nor try to +turn backtick (`) into anything else but a single backtick character +(distinct from an open quote character!), nor "--" into anything but +two minus signs. They I<must never> do any of those things to text +in CE<lt>...> formatting codes, and never I<ever> to text in verbatim +paragraphs. + +=item * + +When rendering Pod to a format that has two kinds of hyphens (-), one +that's a non-breaking hyphen, and another that's a breakable hyphen +(as in "object-oriented", which can be split across lines as +"object-", newline, "oriented"), formatters are encouraged to +generally translate "-" to non-breaking hyphen, but may apply +heuristics to convert some of these to breaking hyphens. + +=item * + +Pod formatters should make reasonable efforts to keep words of Perl +code from being broken across lines. For example, "Foo::Bar" in some +formatting systems is seen as eligible for being broken across lines +as "Foo::" newline "Bar" or even "Foo::-" newline "Bar". This should +be avoided where possible, either by disabling all line-breaking in +mid-word, or by wrapping particular words with internal punctuation +in "don't break this across lines" codes (which in some formats may +not be a single code, but might be a matter of inserting non-breaking +zero-width spaces between every pair of characters in a word.) + +=item * + +Pod parsers should, by default, expand tabs in verbatim paragraphs as +they are processed, before passing them to the formatter or other +processor. Parsers may also allow an option for overriding this. + +=item * + +Pod parsers should, by default, remove newlines from the end of +ordinary and verbatim paragraphs before passing them to the +formatter. For example, while the paragraph you're reading now +could be considered, in Pod source, to end with (and contain) +the newline(s) that end it, it should be processed as ending with +(and containing) the period character that ends this sentence. + +=item * + +Pod parsers, when reporting errors, should make some effort to report +an approximate line number ("Nested EE<lt>>'s in Paragraph #52, near +line 633 of Thing/Foo.pm!"), instead of merely noting the paragraph +number ("Nested EE<lt>>'s in Paragraph #52 of Thing/Foo.pm!"). Where +this is problematic, the paragraph number should at least be +accompanied by an excerpt from the paragraph ("Nested EE<lt>>'s in +Paragraph #52 of Thing/Foo.pm, which begins 'Read/write accessor for +the CE<lt>interest rate> attribute...'"). + +=item * + +Pod parsers, when processing a series of verbatim paragraphs one +after another, should consider them to be one large verbatim +paragraph that happens to contain blank lines. I.e., these two +lines, which have a blank line between them: + + use Foo; + + print Foo->VERSION + +should be unified into one paragraph ("\tuse Foo;\n\n\tprint +Foo->VERSION") before being passed to the formatter or other +processor. Parsers may also allow an option for overriding this. + +While this might be too cumbersome to implement in event-based Pod +parsers, it is straightforward for parsers that return parse trees. + +=item * + +Pod formatters, where feasible, are advised to avoid splitting short +verbatim paragraphs (under twelve lines, say) across pages. + +=item * + +Pod parsers must treat a line with only spaces and/or tabs on it as a +"blank line" such as separates paragraphs. (Some older parsers +recognized only two adjacent newlines as a "blank line" but would not +recognize a newline, a space, and a newline, as a blank line. This +is noncompliant behavior.) + +=item * + +Authors of Pod formatters/processors should make every effort to +avoid writing their own Pod parser. There are already several in +CPAN, with a wide range of interface styles -- and one of them, +Pod::Parser, comes with modern versions of Perl. + +=item * + +Characters in Pod documents may be conveyed either as literals, or by +number in EE<lt>n> codes, or by an equivalent mnemonic, as in +EE<lt>eacute> which is exactly equivalent to EE<lt>233>. + +Characters in the range 32-126 refer to those well known US-ASCII +characters (also defined there by Unicode, with the same meaning), +which all Pod formatters must render faithfully. Characters +in the ranges 0-31 and 127-159 should not be used (neither as +literals, nor as EE<lt>number> codes), except for the +literal byte-sequences for newline (13, 13 10, or 10), and tab (9). + +Characters in the range 160-255 refer to Latin-1 characters (also +defined there by Unicode, with the same meaning). Characters above +255 should be understood to refer to Unicode characters. + +=item * + +Be warned +that some formatters cannot reliably render characters outside 32-126; +and many are able to handle 32-126 and 160-255, but nothing above +255. + +=item * + +Besides the well-known "EE<lt>lt>" and "EE<lt>gt>" codes for +less-than and greater-than, Pod parsers must understand "EE<lt>sol>" +for "/" (solidus, slash), and "EE<lt>verbar>" for "|" (vertical bar, +pipe). Pod parsers should also understand "EE<lt>lchevron>" and +"EE<lt>rchevron>" as legacy codes for characters 171 and 187, i.e., +"left-pointing double angle quotation mark" = "left pointing +guillemet" and "right-pointing double angle quotation mark" = "right +pointing guillemet". (These look like little "<<" and ">>", and they +are now preferably expressed with the HTML/XHTML codes "EE<lt>laquo>" +and "EE<lt>raquo>".) + +=item * + +Pod parsers should understand all "EE<lt>html>" codes as defined +in the entity declarations in the most recent XHTML specification at +C<www.W3.org>. Pod parsers must understand at least the entities +that define characters in the range 160-255 (Latin-1). Pod parsers, +when faced with some unknown "EE<lt>I<identifier>>" code, +shouldn't simply replace it with nullstring (by default, at least), +but may pass it through as a string consisting of the literal characters +E, less-than, I<identifier>, greater-than. Or Pod parsers may offer the +alternative option of processing such unknown +"EE<lt>I<identifier>>" codes by firing an event especially +for such codes, or by adding a special node-type to the in-memory +document tree. Such "EE<lt>I<identifier>>" may have special meaning +to some processors, or some processors may choose to add them to +a special error report. + +=item * + +Pod parsers must also support the XHTML codes "EE<lt>quot>" for +character 34 (doublequote, "), "EE<lt>amp>" for character 38 +(ampersand, &), and "EE<lt>apos>" for character 39 (apostrophe, '). + +=item * + +Note that in all cases of "EE<lt>whatever>", I<whatever> (whether +an htmlname, or a number in any base) must consist only of +alphanumeric characters -- that is, I<whatever> must watch +C<m/\A\w+\z/>. So "EE<lt> 0 1 2 3 >" is invalid, because +it contains spaces, which aren't alphanumeric characters. This +presumably does not I<need> special treatment by a Pod processor; +" 0 1 2 3 " doesn't look like a number in any base, so it would +presumably be looked up in the table of HTML-like names. Since +there isn't (and cannot be) an HTML-like entity called " 0 1 2 3 ", +this will be treated as an error. However, Pod processors may +treat "EE<lt> 0 1 2 3 >" or "EE<lt>e-acute>" as I<syntactically> +invalid, potentially earning a different error message than the +error message (or warning, or event) generated by a merely unknown +(but theoretically valid) htmlname, as in "EE<lt>qacute>" +[sic]. However, Pod parsers are not required to make this +distinction. + +=item * + +Note that EE<lt>number> I<must not> be interpreted as simply +"codepoint I<number> in the current/native character set". It always +means only "the character represented by codepoint I<number> in +Unicode." (This is identical to the semantics of &#I<number>; in XML.) + +This will likely require many formatters to have tables mapping from +treatable Unicode codepoints (such as the "\xE9" for the e-acute +character) to the escape sequences or codes necessary for conveying +such sequences in the target output format. A converter to *roff +would, for example know that "\xE9" (whether conveyed literally, or via +a EE<lt>...> sequence) is to be conveyed as "e\\*'". +Similarly, a program rendering Pod in a Mac OS application window, would +presumably need to know that "\xE9" maps to codepoint 142 in MacRoman +encoding that (at time of writing) is native for Mac OS. Such +Unicode2whatever mappings are presumably already widely available for +common output formats. (Such mappings may be incomplete! Implementers +are not expected to bend over backwards in an attempt to render +Cherokee syllabics, Etruscan runes, Byzantine musical symbols, or any +of the other weird things that Unicode can encode.) And +if a Pod document uses a character not found in such a mapping, the +formatter should consider it an unrenderable character. + +=item * + +If, surprisingly, the implementor of a Pod formatter can't find a +satisfactory pre-existing table mapping from Unicode characters to +escapes in the target format (e.g., a decent table of Unicode +characters to *roff escapes), it will be necessary to build such a +table. If you are in this circumstance, you should begin with the +characters in the range 0x00A0 - 0x00FF, which is mostly the heavily +used accented characters. Then proceed (as patience permits and +fastidiousness compels) through the characters that the (X)HTML +standards groups judged important enough to merit mnemonics +for. These are declared in the (X)HTML specifications at the +www.W3.org site. At time of writing (September 2001), the most recent +entity declaration files are: + + http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent + http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent + http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent + +Then you can progress through any remaining notable Unicode characters +in the range 0x2000-0x204D (consult the character tables at +www.unicode.org), and whatever else strikes your fancy. For example, +in F<xhtml-symbol.ent>, there is the entry: + + <!ENTITY infin "∞"> <!-- infinity, U+221E ISOtech --> + +While the mapping "infin" to the character "\x{221E}" will (hopefully) +have been already handled by the Pod parser, the presence of the +character in this file means that it's reasonably important enough to +include in a formatter's table that maps from notable Unicode characters +to the codes necessary for rendering them. So for a Unicode-to-*roff +mapping, for example, this would merit the entry: + + "\x{221E}" => '\(in', + +It is eagerly hoped that in the future, increasing numbers of formats +(and formatters) will support Unicode characters directly (as (X)HTML +does with C<∞>, C<∞>, or C<∞>), reducing the need +for idiosyncratic mappings of Unicode-to-I<my_escapes>. + +=item * + +It is up to individual Pod formatter to display good judgement when +confronted with an unrenderable character (which is distinct from an +unknown EE<lt>thing> sequence that the parser couldn't resolve to +anything, renderable or not). It is good practice to map Latin letters +with diacritics (like "EE<lt>eacute>"/"EE<lt>233>") to the corresponding +unaccented US-ASCII letters (like a simple character 101, "e"), but +clearly this is often not feasible, and an unrenderable character may +be represented as "?", or the like. In attempting a sane fallback +(as from EE<lt>233> to "e"), Pod formatters may use the +%Latin1Code_to_fallback table in L<Pod::Escapes|Pod::Escapes>, or +L<Text::Unidecode|Text::Unidecode>, if available. + +For example, this Pod text: + + magic is enabled if you set C<$Currency> to 'E<euro>'. + +may be rendered as: +"magic is enabled if you set C<$Currency> to 'I<?>'" or as +"magic is enabled if you set C<$Currency> to 'B<[euro]>'", or as +"magic is enabled if you set C<$Currency> to '[x20AC]', etc. + +A Pod formatter may also note, in a comment or warning, a list of what +unrenderable characters were encountered. + +=item * + +EE<lt>...> may freely appear in any formatting code (other than +in another EE<lt>...> or in an ZE<lt>>). That is, "XE<lt>The +EE<lt>euro>1,000,000 Solution>" is valid, as is "LE<lt>The +EE<lt>euro>1,000,000 Solution|Million::Euros>". + +=item * + +Some Pod formatters output to formats that implement non-breaking +spaces as an individual character (which I'll call "NBSP"), and +others output to formats that implement non-breaking spaces just as +spaces wrapped in a "don't break this across lines" code. Note that +at the level of Pod, both sorts of codes can occur: Pod can contain a +NBSP character (whether as a literal, or as a "EE<lt>160>" or +"EE<lt>nbsp>" code); and Pod can contain "SE<lt>foo +IE<lt>barE<gt> baz>" codes, where "mere spaces" (character 32) in +such codes are taken to represent non-breaking spaces. Pod +parsers should consider supporting the optional parsing of "SE<lt>foo +IE<lt>barE<gt> baz>" as if it were +"fooI<NBSP>IE<lt>barE<gt>I<NBSP>baz", and, going the other way, the +optional parsing of groups of words joined by NBSP's as if each group +were in a SE<lt>...> code, so that formatters may use the +representation that maps best to what the output format demands. + +=item * + +Some processors may find that the C<SE<lt>...E<gt>> code is easiest to +implement by replacing each space in the parse tree under the content +of the S, with an NBSP. But note: the replacement should apply I<not> to +spaces in I<all> text, but I<only> to spaces in I<printable> text. (This +distinction may or may not be evident in the particular tree/event +model implemented by the Pod parser.) For example, consider this +unusual case: + + S<L</Autoloaded Functions>> + +This means that the space in the middle of the visible link text must +not be broken across lines. In other words, it's the same as this: + + L<"AutoloadedE<160>Functions"/Autoloaded Functions> + +However, a misapplied space-to-NBSP replacement could (wrongly) +produce something equivalent to this: + + L<"AutoloadedE<160>Functions"/AutoloadedE<160>Functions> + +...which is almost definitely not going to work as a hyperlink (assuming +this formatter outputs a format supporting hypertext). + +Formatters may choose to just not support the S format code, +especially in cases where the output format simply has no NBSP +character/code and no code for "don't break this stuff across lines". + +=item * + +Besides the NBSP character discussed above, implementors are reminded +of the existence of the other "special" character in Latin-1, the +"soft hyphen" character, also known as "discretionary hyphen", +i.e. C<EE<lt>173E<gt>> = C<EE<lt>0xADE<gt>> = +C<EE<lt>shyE<gt>>). This character expresses an optional hyphenation +point. That is, it normally renders as nothing, but may render as a +"-" if a formatter breaks the word at that point. Pod formatters +should, as appropriate, do one of the following: 1) render this with +a code with the same meaning (e.g., "\-" in RTF), 2) pass it through +in the expectation that the formatter understands this character as +such, or 3) delete it. + +For example: + + sigE<shy>action + manuE<shy>script + JarkE<shy>ko HieE<shy>taE<shy>nieE<shy>mi + +These signal to a formatter that if it is to hyphenate "sigaction" +or "manuscript", then it should be done as +"sig-I<[linebreak]>action" or "manu-I<[linebreak]>script" +(and if it doesn't hyphenate it, then the C<EE<lt>shyE<gt>> doesn't +show up at all). And if it is +to hyphenate "Jarkko" and/or "Hietaniemi", it can do +so only at the points where there is a C<EE<lt>shyE<gt>> code. + +In practice, it is anticipated that this character will not be used +often, but formatters should either support it, or delete it. + +=item * + +If you think that you want to add a new command to Pod (like, say, a +"=biblio" command), consider whether you could get the same +effect with a for or begin/end sequence: "=for biblio ..." or "=begin +biblio" ... "=end biblio". Pod processors that don't understand +"=for biblio", etc, will simply ignore it, whereas they may complain +loudly if they see "=biblio". + +=item * + +Throughout this document, "Pod" has been the preferred spelling for +the name of the documentation format. One may also use "POD" or +"pod". For the documentation that is (typically) in the Pod +format, you may use "pod", or "Pod", or "POD". Understanding these +distinctions is useful; but obsessing over how to spell them, usually +is not. + +=back + + + + + +=head1 About LE<lt>...E<gt> Codes + +As you can tell from a glance at L<perlpod|perlpod>, the LE<lt>...> +code is the most complex of the Pod formatting codes. The points below +will hopefully clarify what it means and how processors should deal +with it. + +=over + +=item * + +In parsing an LE<lt>...> code, Pod parsers must distinguish at least +four attributes: + +=over + +=item First: + +The link-text. If there is none, this must be undef. (E.g., in +"LE<lt>Perl Functions|perlfunc>", the link-text is "Perl Functions". +In "LE<lt>Time::HiRes>" and even "LE<lt>|Time::HiRes>", there is no +link text. Note that link text may contain formatting.) + +=item Second: + +The possibly inferred link-text -- i.e., if there was no real link +text, then this is the text that we'll infer in its place. (E.g., for +"LE<lt>Getopt::Std>", the inferred link text is "Getopt::Std".) + +=item Third: + +The name or URL, or undef if none. (E.g., in "LE<lt>Perl +Functions|perlfunc>", the name -- also sometimes called the page -- +is "perlfunc". In "LE<lt>/CAVEATS>", the name is undef.) + +=item Fourth: + +The section (AKA "item" in older perlpods), or undef if none. E.g., +in "LE<lt>Getopt::Std/DESCRIPTIONE<gt>", "DESCRIPTION" is the section. (Note +that this is not the same as a manpage section like the "5" in "man 5 +crontab". "Section Foo" in the Pod sense means the part of the text +that's introduced by the heading or item whose text is "Foo".) + +=back + +Pod parsers may also note additional attributes including: + +=over + +=item Fifth: + +A flag for whether item 3 (if present) is a URL (like +"http://lists.perl.org" is), in which case there should be no section +attribute; a Pod name (like "perldoc" and "Getopt::Std" are); or +possibly a man page name (like "crontab(5)" is). + +=item Sixth: + +The raw original LE<lt>...> content, before text is split on +"|", "/", etc, and before EE<lt>...> codes are expanded. + +=back + +(The above were numbered only for concise reference below. It is not +a requirement that these be passed as an actual list or array.) + +For example: + + L<Foo::Bar> + => undef, # link text + "Foo::Bar", # possibly inferred link text + "Foo::Bar", # name + undef, # section + 'pod', # what sort of link + "Foo::Bar" # original content + + L<Perlport's section on NL's|perlport/Newlines> + => "Perlport's section on NL's", # link text + "Perlport's section on NL's", # possibly inferred link text + "perlport", # name + "Newlines", # section + 'pod', # what sort of link + "Perlport's section on NL's|perlport/Newlines" # orig. content + + L<perlport/Newlines> + => undef, # link text + '"Newlines" in perlport', # possibly inferred link text + "perlport", # name + "Newlines", # section + 'pod', # what sort of link + "perlport/Newlines" # original content + + L<crontab(5)/"DESCRIPTION"> + => undef, # link text + '"DESCRIPTION" in crontab(5)', # possibly inferred link text + "crontab(5)", # name + "DESCRIPTION", # section + 'man', # what sort of link + 'crontab(5)/"DESCRIPTION"' # original content + + L</Object Attributes> + => undef, # link text + '"Object Attributes"', # possibly inferred link text + undef, # name + "Object Attributes", # section + 'pod', # what sort of link + "/Object Attributes" # original content + + L<http://www.perl.org/> + => undef, # link text + "http://www.perl.org/", # possibly inferred link text + "http://www.perl.org/", # name + undef, # section + 'url', # what sort of link + "http://www.perl.org/" # original content + +Note that you can distinguish URL-links from anything else by the +fact that they match C<m/\A\w+:[^:\s]\S*\z/>. So +C<LE<lt>http://www.perl.comE<gt>> is a URL, but +C<LE<lt>HTTP::ResponseE<gt>> isn't. + +=item * + +In case of LE<lt>...> codes with no "text|" part in them, +older formatters have exhibited great variation in actually displaying +the link or cross reference. For example, LE<lt>crontab(5)> would render +as "the C<crontab(5)> manpage", or "in the C<crontab(5)> manpage" +or just "C<crontab(5)>". + +Pod processors must now treat "text|"-less links as follows: + + L<name> => L<name|name> + L</section> => L<"section"|/section> + L<name/section> => L<"section" in name|name/section> + +=item * + +Note that section names might contain markup. I.e., if a section +starts with: + + =head2 About the C<-M> Operator + +or with: + + =item About the C<-M> Operator + +then a link to it would look like this: + + L<somedoc/About the C<-M> Operator> + +Formatters may choose to ignore the markup for purposes of resolving +the link and use only the renderable characters in the section name, +as in: + + <h1><a name="About_the_-M_Operator">About the <code>-M</code> + Operator</h1> + + ... + + <a href="somedoc#About_the_-M_Operator">About the <code>-M</code> + Operator" in somedoc</a> + +=item * + +Previous versions of perlpod distinguished C<LE<lt>name/"section"E<gt>> +links from C<LE<lt>name/itemE<gt>> links (and their targets). These +have been merged syntactically and semantically in the current +specification, and I<section> can refer either to a "=headI<n> Heading +Content" command or to a "=item Item Content" command. This +specification does not specify what behavior should be in the case +of a given document having several things all seeming to produce the +same I<section> identifier (e.g., in HTML, several things all producing +the same I<anchorname> in <a name="I<anchorname>">...</a> +elements). Where Pod processors can control this behavior, they should +use the first such anchor. That is, C<LE<lt>Foo/BarE<gt>> refers to the +I<first> "Bar" section in Foo. + +But for some processors/formats this cannot be easily controlled; as +with the HTML example, the behavior of multiple ambiguous +<a name="I<anchorname>">...</a> is most easily just left up to +browsers to decide. + +=item * + +Authors wanting to link to a particular (absolute) URL, must do so +only with "LE<lt>scheme:...>" codes (like +LE<lt>http://www.perl.org>), and must not attempt "LE<lt>Some Site +Name|scheme:...>" codes. This restriction avoids many problems +in parsing and rendering LE<lt>...> codes. + +=item * + +In a C<LE<lt>text|...E<gt>> code, text may contain formatting codes +for formatting or for EE<lt>...> escapes, as in: + + L<B<ummE<234>stuff>|...> + +For C<LE<lt>...E<gt>> codes without a "name|" part, only +C<EE<lt>...E<gt>> and C<ZE<lt>E<gt>> codes may occur -- no +other formatting codes. That is, authors should not use +"C<LE<lt>BE<lt>Foo::BarE<gt>E<gt>>". + +Note, however, that formatting codes and ZE<lt>>'s can occur in any +and all parts of an LE<lt>...> (i.e., in I<name>, I<section>, I<text>, +and I<url>). + +Authors must not nest LE<lt>...> codes. For example, "LE<lt>The +LE<lt>Foo::Bar> man page>" should be treated as an error. + +=item * + +Note that Pod authors may use formatting codes inside the "text" +part of "LE<lt>text|name>" (and so on for LE<lt>text|/"sec">). + +In other words, this is valid: + + Go read L<the docs on C<$.>|perlvar/"$."> + +Some output formats that do allow rendering "LE<lt>...>" codes as +hypertext, might not allow the link-text to be formatted; in +that case, formatters will have to just ignore that formatting. + +=item * + +At time of writing, C<LE<lt>nameE<gt>> values are of two types: +either the name of a Pod page like C<LE<lt>Foo::BarE<gt>> (which +might be a real Perl module or program in an @INC / PATH +directory, or a .pod file in those places); or the name of a UNIX +man page, like C<LE<lt>crontab(5)E<gt>>. In theory, C<LE<lt>chmodE<gt>> +in ambiguous between a Pod page called "chmod", or the Unix man page +"chmod" (in whatever man-section). However, the presence of a string +in parens, as in "crontab(5)", is sufficient to signal that what +is being discussed is not a Pod page, and so is presumably a +UNIX man page. The distinction is of no importance to many +Pod processors, but some processors that render to hypertext formats +may need to distinguish them in order to know how to render a +given C<LE<lt>fooE<gt>> code. + +=item * + +Previous versions of perlpod allowed for a C<LE<lt>sectionE<gt>> syntax +(as in C<LE<lt>Object AttributesE<gt>>), which was not easily distinguishable +from C<LE<lt>nameE<gt>> syntax. This syntax is no longer in the +specification, and has been replaced by the C<LE<lt>"section"E<gt>> syntax +(where the quotes were formerly optional). Pod parsers should tolerate +the C<LE<lt>sectionE<gt>> syntax, for a while at least. The suggested +heuristic for distinguishing C<LE<lt>sectionE<gt>> from C<LE<lt>nameE<gt>> +is that if it contains any whitespace, it's a I<section>. Pod processors +may warn about this being deprecated syntax. + +=back + +=head1 About =over...=back Regions + +"=over"..."=back" regions are used for various kinds of list-like +structures. (I use the term "region" here simply as a collective +term for everything from the "=over" to the matching "=back".) + +=over + +=item * + +The non-zero numeric I<indentlevel> in "=over I<indentlevel>" ... +"=back" is used for giving the formatter a clue as to how many +"spaces" (ems, or roughly equivalent units) it should tab over, +although many formatters will have to convert this to an absolute +measurement that may not exactly match with the size of spaces (or M's) +in the document's base font. Other formatters may have to completely +ignore the number. The lack of any explicit I<indentlevel> parameter is +equivalent to an I<indentlevel> value of 4. Pod processors may +complain if I<indentlevel> is present but is not a positive number +matching C<m/\A(\d*\.)?\d+\z/>. + +=item * + +Authors of Pod formatters are reminded that "=over" ... "=back" may +map to several different constructs in your output format. For +example, in converting Pod to (X)HTML, it can map to any of +<ul>...</ul>, <ol>...</ol>, <dl>...</dl>, or +<blockquote>...</blockquote>. Similarly, "=item" can map to <li> or +<dt>. + +=item * + +Each "=over" ... "=back" region should be one of the following: + +=over + +=item * + +An "=over" ... "=back" region containing only "=item *" commands, +each followed by some number of ordinary/verbatim paragraphs, other +nested "=over" ... "=back" regions, "=for..." paragraphs, and +"=begin"..."=end" regions. + +(Pod processors must tolerate a bare "=item" as if it were "=item +*".) Whether "*" is rendered as a literal asterisk, an "o", or as +some kind of real bullet character, is left up to the Pod formatter, +and may depend on the level of nesting. + +=item * + +An "=over" ... "=back" region containing only +C<m/\A=item\s+\d+\.?\s*\z/> paragraphs, each one (or each group of them) +followed by some number of ordinary/verbatim paragraphs, other nested +"=over" ... "=back" regions, "=for..." paragraphs, and/or +"=begin"..."=end" codes. Note that the numbers must start at 1 +in each section, and must proceed in order and without skipping +numbers. + +(Pod processors must tolerate lines like "=item 1" as if they were +"=item 1.", with the period.) + +=item * + +An "=over" ... "=back" region containing only "=item [text]" +commands, each one (or each group of them) followed by some number of +ordinary/verbatim paragraphs, other nested "=over" ... "=back" +regions, or "=for..." paragraphs, and "=begin"..."=end" regions. + +The "=item [text]" paragraph should not match +C<m/\A=item\s+\d+\.?\s*\z/> or C<m/\A=item\s+\*\s*\z/>, nor should it +match just C<m/\A=item\s*\z/>. + +=item * + +An "=over" ... "=back" region containing no "=item" paragraphs at +all, and containing only some number of +ordinary/verbatim paragraphs, and possibly also some nested "=over" +... "=back" regions, "=for..." paragraphs, and "=begin"..."=end" +regions. Such an itemless "=over" ... "=back" region in Pod is +equivalent in meaning to a "<blockquote>...</blockquote>" element in +HTML. + +=back + +Note that with all the above cases, you can determine which type of +"=over" ... "=back" you have, by examining the first (non-"=cut", +non-"=pod") Pod paragraph after the "=over" command. + +=item * + +Pod formatters I<must> tolerate arbitrarily large amounts of text +in the "=item I<text...>" paragraph. In practice, most such +paragraphs are short, as in: + + =item For cutting off our trade with all parts of the world + +But they may be arbitrarily long: + + =item For transporting us beyond seas to be tried for pretended + offenses + + =item He is at this time transporting large armies of foreign + mercenaries to complete the works of death, desolation and + tyranny, already begun with circumstances of cruelty and perfidy + scarcely paralleled in the most barbarous ages, and totally + unworthy the head of a civilized nation. + +=item * + +Pod processors should tolerate "=item *" / "=item I<number>" commands +with no accompanying paragraph. The middle item is an example: + + =over + + =item 1 + + Pick up dry cleaning. + + =item 2 + + =item 3 + + Stop by the store. Get Abba Zabas, Stoli, and cheap lawn chairs. + + =back + +=item * + +No "=over" ... "=back" region can contain headings. Processors may +treat such a heading as an error. + +=item * + +Note that an "=over" ... "=back" region should have some +content. That is, authors should not have an empty region like this: + + =over + + =back + +Pod processors seeing such a contentless "=over" ... "=back" region, +may ignore it, or may report it as an error. + +=item * + +Processors must tolerate an "=over" list that goes off the end of the +document (i.e., which has no matching "=back"), but they may warn +about such a list. + +=item * + +Authors of Pod formatters should note that this construct: + + =item Neque + + =item Porro + + =item Quisquam Est + + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + =item Ut Enim + +is semantically ambiguous, in a way that makes formatting decisions +a bit difficult. On the one hand, it could be mention of an item +"Neque", mention of another item "Porro", and mention of another +item "Quisquam Est", with just the last one requiring the explanatory +paragraph "Qui dolorem ipsum quia dolor..."; and then an item +"Ut Enim". In that case, you'd want to format it like so: + + Neque + + Porro + + Quisquam Est + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +But it could equally well be a discussion of three (related or equivalent) +items, "Neque", "Porro", and "Quisquam Est", followed by a paragraph +explaining them all, and then a new item "Ut Enim". In that case, you'd +probably want to format it like so: + + Neque + Porro + Quisquam Est + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +But (for the foreseeable future), Pod does not provide any way for Pod +authors to distinguish which grouping is meant by the above +"=item"-cluster structure. So formatters should format it like so: + + Neque + + Porro + + Quisquam Est + + Qui dolorem ipsum quia dolor sit amet, consectetur, adipisci + velit, sed quia non numquam eius modi tempora incidunt ut + labore et dolore magnam aliquam quaerat voluptatem. + + Ut Enim + +That is, there should be (at least roughly) equal spacing between +items as between paragraphs (although that spacing may well be less +than the full height of a line of text). This leaves it to the reader +to use (con)textual cues to figure out whether the "Qui dolorem +ipsum..." paragraph applies to the "Quisquam Est" item or to all three +items "Neque", "Porro", and "Quisquam Est". While not an ideal +situation, this is preferable to providing formatting cues that may +be actually contrary to the author's intent. + +=back + + + +=head1 About Data Paragraphs and "=begin/=end" Regions + +Data paragraphs are typically used for inlining non-Pod data that is +to be used (typically passed through) when rendering the document to +a specific format: + + =begin rtf + + \par{\pard\qr\sa4500{\i Printed\~\chdate\~\chtime}\par} + + =end rtf + +The exact same effect could, incidentally, be achieved with a single +"=for" paragraph: + + =for rtf \par{\pard\qr\sa4500{\i Printed\~\chdate\~\chtime}\par} + +(Although that is not formally a data paragraph, it has the same +meaning as one, and Pod parsers may parse it as one.) + +Another example of a data paragraph: + + =begin html + + I like <em>PIE</em>! + + <hr>Especially pecan pie! + + =end html + +If these were ordinary paragraphs, the Pod parser would try to +expand the "EE<lt>/em>" (in the first paragraph) as a formatting +code, just like "EE<lt>lt>" or "EE<lt>eacute>". But since this +is in a "=begin I<identifier>"..."=end I<identifier>" region I<and> +the identifier "html" doesn't begin have a ":" prefix, the contents +of this region are stored as data paragraphs, instead of being +processed as ordinary paragraphs (or if they began with a spaces +and/or tabs, as verbatim paragraphs). + +As a further example: At time of writing, no "biblio" identifier is +supported, but suppose some processor were written to recognize it as +a way of (say) denoting a bibliographic reference (necessarily +containing formatting codes in ordinary paragraphs). The fact that +"biblio" paragraphs were meant for ordinary processing would be +indicated by prefacing each "biblio" identifier with a colon: + + =begin :biblio + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =end :biblio + +This would signal to the parser that paragraphs in this begin...end +region are subject to normal handling as ordinary/verbatim paragraphs +(while still tagged as meant only for processors that understand the +"biblio" identifier). The same effect could be had with: + + =for :biblio + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + +The ":" on these identifiers means simply "process this stuff +normally, even though the result will be for some special target". +I suggest that parser APIs report "biblio" as the target identifier, +but also report that it had a ":" prefix. (And similarly, with the +above "html", report "html" as the target identifier, and note the +I<lack> of a ":" prefix.) + +Note that a "=begin I<identifier>"..."=end I<identifier>" region where +I<identifier> begins with a colon, I<can> contain commands. For example: + + =begin :biblio + + Wirth's classic is available in several editions, including: + + =for comment + hm, check abebooks.com for how much used copies cost. + + =over + + =item + + Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.> + Teubner, Stuttgart. [Yes, it's in German.] + + =item + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =back + + =end :biblio + +Note, however, a "=begin I<identifier>"..."=end I<identifier>" +region where I<identifier> does I<not> begin with a colon, should not +directly contain "=head1" ... "=head4" commands, nor "=over", nor "=back", +nor "=item". For example, this may be considered invalid: + + =begin somedata + + This is a data paragraph. + + =head1 Don't do this! + + This is a data paragraph too. + + =end somedata + +A Pod processor may signal that the above (specifically the "=head1" +paragraph) is an error. Note, however, that the following should +I<not> be treated as an error: + + =begin somedata + + This is a data paragraph. + + =cut + + # Yup, this isn't Pod anymore. + sub excl { (rand() > .5) ? "hoo!" : "hah!" } + + =pod + + This is a data paragraph too. + + =end somedata + +And this too is valid: + + =begin someformat + + This is a data paragraph. + + And this is a data paragraph. + + =begin someotherformat + + This is a data paragraph too. + + And this is a data paragraph too. + + =begin :yetanotherformat + + =head2 This is a command paragraph! + + This is an ordinary paragraph! + + And this is a verbatim paragraph! + + =end :yetanotherformat + + =end someotherformat + + Another data paragraph! + + =end someformat + +The contents of the above "=begin :yetanotherformat" ... +"=end :yetanotherformat" region I<aren't> data paragraphs, because +the immediately containing region's identifier (":yetanotherformat") +begins with a colon. In practice, most regions that contain +data paragraphs will contain I<only> data paragraphs; however, +the above nesting is syntactically valid as Pod, even if it is +rare. However, the handlers for some formats, like "html", +will accept only data paragraphs, not nested regions; and they may +complain if they see (targeted for them) nested regions, or commands, +other than "=end", "=pod", and "=cut". + +Also consider this valid structure: + + =begin :biblio + + Wirth's classic is available in several editions, including: + + =over + + =item + + Wirth, Niklaus. 1975. I<Algorithmen und Datenstrukturen.> + Teubner, Stuttgart. [Yes, it's in German.] + + =item + + Wirth, Niklaus. 1976. I<Algorithms + Data Structures = + Programs.> Prentice-Hall, Englewood Cliffs, NJ. + + =back + + Buy buy buy! + + =begin html + + <img src='wirth_spokesmodeling_book.png'> + + <hr> + + =end html + + Now now now! + + =end :biblio + +There, the "=begin html"..."=end html" region is nested inside +the larger "=begin :biblio"..."=end :biblio" region. Note that the +content of the "=begin html"..."=end html" region is data +paragraph(s), because the immediately containing region's identifier +("html") I<doesn't> begin with a colon. + +Pod parsers, when processing a series of data paragraphs one +after another (within a single region), should consider them to +be one large data paragraph that happens to contain blank lines. So +the content of the above "=begin html"..."=end html" I<may> be stored +as two data paragraphs (one consisting of +"<img src='wirth_spokesmodeling_book.png'>\n" +and another consisting of "<hr>\n"), but I<should> be stored as +a single data paragraph (consisting of +"<img src='wirth_spokesmodeling_book.png'>\n\n<hr>\n"). + +Pod processors should tolerate empty +"=begin I<something>"..."=end I<something>" regions, +empty "=begin :I<something>"..."=end :I<something>" regions, and +contentless "=for I<something>" and "=for :I<something>" +paragraphs. I.e., these should be tolerated: + + =for html + + =begin html + + =end html + + =begin :biblio + + =end :biblio + +Incidentally, note that there's no easy way to express a data +paragraph starting with something that looks like a command. Consider: + + =begin stuff + + =shazbot + + =end stuff + +There, "=shazbot" will be parsed as a Pod command "shazbot", not as a data +paragraph "=shazbot\n". However, you can express a data paragraph consisting +of "=shazbot\n" using this code: + + =for stuff =shazbot + +The situation where this is necessary, is presumably quite rare. + +Note that =end commands must match the currently open =begin command. That +is, they must properly nest. For example, this is valid: + + =begin outer + + X + + =begin inner + + Y + + =end inner + + Z + + =end outer + +while this is invalid: + + =begin outer + + X + + =begin inner + + Y + + =end outer + + Z + + =end inner + +This latter is improper because when the "=end outer" command is seen, the +currently open region has the formatname "inner", not "outer". (It just +happens that "outer" is the format name of a higher-up region.) This is +an error. Processors must by default report this as an error, and may halt +processing the document containing that error. A corollary of this is that +regions cannot "overlap" -- i.e., the latter block above does not represent +a region called "outer" which contains X and Y, overlapping a region called +"inner" which contains Y and Z. But because it is invalid (as all +apparently overlapping regions would be), it doesn't represent that, or +anything at all. + +Similarly, this is invalid: + + =begin thing + + =end hting + +This is an error because the region is opened by "thing", and the "=end" +tries to close "hting" [sic]. + +This is also invalid: + + =begin thing + + =end + +This is invalid because every "=end" command must have a formatname +parameter. + +=head1 SEE ALSO + +L<perlpod>, L<perlsyn/"PODs: Embedded Documentation">, +L<podchecker> + +=head1 AUTHOR + +Sean M. Burke + +=cut + + diff --git a/ext/Pod-Html/test.lib/perlvar.pod b/ext/Pod-Html/test.lib/perlvar.pod new file mode 100644 index 0000000000..914eebe2b3 --- /dev/null +++ b/ext/Pod-Html/test.lib/perlvar.pod @@ -0,0 +1,1737 @@ +=head1 NAME + +perlvar - Perl predefined variables + +=head1 DESCRIPTION + +=head2 Predefined Names + +The following names have special meaning to Perl. Most +punctuation names have reasonable mnemonics, or analogs in the +shells. Nevertheless, if you wish to use long variable names, +you need only say + + use English; + +at the top of your program. This aliases all the short names to the long +names in the current package. Some even have medium names, generally +borrowed from B<awk>. In general, it's best to use the + + use English '-no_match_vars'; + +invocation if you don't need $PREMATCH, $MATCH, or $POSTMATCH, as it avoids +a certain performance hit with the use of regular expressions. See +L<English>. + +Variables that depend on the currently selected filehandle may be set by +calling an appropriate object method on the IO::Handle object, although +this is less efficient than using the regular built-in variables. (Summary +lines below for this contain the word HANDLE.) First you must say + + use IO::Handle; + +after which you may use either + + method HANDLE EXPR + +or more safely, + + HANDLE->method(EXPR) + +Each method returns the old value of the IO::Handle attribute. +The methods each take an optional EXPR, which, if supplied, specifies the +new value for the IO::Handle attribute in question. If not supplied, +most methods do nothing to the current value--except for +autoflush(), which will assume a 1 for you, just to be different. + +Because loading in the IO::Handle class is an expensive operation, you should +learn how to use the regular built-in variables. + +A few of these variables are considered "read-only". This means that if +you try to assign to this variable, either directly or indirectly through +a reference, you'll raise a run-time exception. + +You should be very careful when modifying the default values of most +special variables described in this document. In most cases you want +to localize these variables before changing them, since if you don't, +the change may affect other modules which rely on the default values +of the special variables that you have changed. This is one of the +correct ways to read the whole file at once: + + open my $fh, "<", "foo" or die $!; + local $/; # enable localized slurp mode + my $content = <$fh>; + close $fh; + +But the following code is quite bad: + + open my $fh, "<", "foo" or die $!; + undef $/; # enable slurp mode + my $content = <$fh>; + close $fh; + +since some other module, may want to read data from some file in the +default "line mode", so if the code we have just presented has been +executed, the global value of C<$/> is now changed for any other code +running inside the same Perl interpreter. + +Usually when a variable is localized you want to make sure that this +change affects the shortest scope possible. So unless you are already +inside some short C<{}> block, you should create one yourself. For +example: + + my $content = ''; + open my $fh, "<", "foo" or die $!; + { + local $/; + $content = <$fh>; + } + close $fh; + +Here is an example of how your own code can go broken: + + for (1..5){ + nasty_break(); + print "$_ "; + } + sub nasty_break { + $_ = 5; + # do something with $_ + } + +You probably expect this code to print: + + 1 2 3 4 5 + +but instead you get: + + 5 5 5 5 5 + +Why? Because nasty_break() modifies C<$_> without localizing it +first. The fix is to add local(): + + local $_ = 5; + +It's easy to notice the problem in such a short example, but in more +complicated code you are looking for trouble if you don't localize +changes to the special variables. + +The following list is ordered by scalar variables first, then the +arrays, then the hashes. + +=over 8 + +=item $ARG + +=item $_ +X<$_> X<$ARG> + +The default input and pattern-searching space. The following pairs are +equivalent: + + while (<>) {...} # equivalent only in while! + while (defined($_ = <>)) {...} + + /^Subject:/ + $_ =~ /^Subject:/ + + tr/a-z/A-Z/ + $_ =~ tr/a-z/A-Z/ + + chomp + chomp($_) + +Here are the places where Perl will assume $_ even if you +don't use it: + +=over 3 + +=item * + +The following functions: + +abs, alarm, chomp, chop, chr, chroot, cos, defined, eval, exp, glob, +hex, int, lc, lcfirst, length, log, lstat, mkdir, oct, ord, pos, print, +quotemeta, readlink, readpipe, ref, require, reverse (in scalar context only), +rmdir, sin, split (on its second argument), sqrt, stat, study, uc, ucfirst, +unlink, unpack. + +=item * + +All file tests (C<-f>, C<-d>) except for C<-t>, which defaults to STDIN. +See L<perlfunc/-X> + + +=item * + +The pattern matching operations C<m//>, C<s///> and C<tr///> (aka C<y///>) +when used without an C<=~> operator. + +=item * + +The default iterator variable in a C<foreach> loop if no other +variable is supplied. + +=item * + +The implicit iterator variable in the grep() and map() functions. + +=item * + +The implicit variable of given(). + +=item * + +The default place to put an input record when a C<< <FH> >> +operation's result is tested by itself as the sole criterion of a C<while> +test. Outside a C<while> test, this will not happen. + +=back + +As C<$_> is a global variable, this may lead in some cases to unwanted +side-effects. As of perl 5.9.1, you can now use a lexical version of +C<$_> by declaring it in a file or in a block with C<my>. Moreover, +declaring C<our $_> restores the global C<$_> in the current scope. + +(Mnemonic: underline is understood in certain operations.) + +=back + +=over 8 + +=item $a + +=item $b +X<$a> X<$b> + +Special package variables when using sort(), see L<perlfunc/sort>. +Because of this specialness $a and $b don't need to be declared +(using use vars, or our()) even when using the C<strict 'vars'> pragma. +Don't lexicalize them with C<my $a> or C<my $b> if you want to be +able to use them in the sort() comparison block or function. + +=back + +=over 8 + +=item $<I<digits>> +X<$1> X<$2> X<$3> + +Contains the subpattern from the corresponding set of capturing +parentheses from the last pattern match, not counting patterns +matched in nested blocks that have been exited already. (Mnemonic: +like \digits.) These variables are all read-only and dynamically +scoped to the current BLOCK. + +=item $MATCH + +=item $& +X<$&> X<$MATCH> + +The string matched by the last successful pattern match (not counting +any matches hidden within a BLOCK or eval() enclosed by the current +BLOCK). (Mnemonic: like & in some editors.) This variable is read-only +and dynamically scoped to the current BLOCK. + +The use of this variable anywhere in a program imposes a considerable +performance penalty on all regular expression matches. See L</BUGS>. + +See L</@-> for a replacement. + +=item ${^MATCH} +X<${^MATCH}> + +This is similar to C<$&> (C<$MATCH>) except that it does not incur the +performance penalty associated with that variable, and is only guaranteed +to return a defined value when the pattern was compiled or executed with +the C</p> modifier. + +=item $PREMATCH + +=item $` +X<$`> X<$PREMATCH> + +The string preceding whatever was matched by the last successful +pattern match (not counting any matches hidden within a BLOCK or eval +enclosed by the current BLOCK). (Mnemonic: C<`> often precedes a quoted +string.) This variable is read-only. + +The use of this variable anywhere in a program imposes a considerable +performance penalty on all regular expression matches. See L</BUGS>. + +See L</@-> for a replacement. + +=item ${^PREMATCH} +X<${^PREMATCH}> + +This is similar to C<$`> ($PREMATCH) except that it does not incur the +performance penalty associated with that variable, and is only guaranteed +to return a defined value when the pattern was compiled or executed with +the C</p> modifier. + +=item $POSTMATCH + +=item $' +X<$'> X<$POSTMATCH> + +The string following whatever was matched by the last successful +pattern match (not counting any matches hidden within a BLOCK or eval() +enclosed by the current BLOCK). (Mnemonic: C<'> often follows a quoted +string.) Example: + + local $_ = 'abcdefghi'; + /def/; + print "$`:$&:$'\n"; # prints abc:def:ghi + +This variable is read-only and dynamically scoped to the current BLOCK. + +The use of this variable anywhere in a program imposes a considerable +performance penalty on all regular expression matches. See L</BUGS>. + +See L</@-> for a replacement. + +=item ${^POSTMATCH} +X<${^POSTMATCH}> + +This is similar to C<$'> (C<$POSTMATCH>) except that it does not incur the +performance penalty associated with that variable, and is only guaranteed +to return a defined value when the pattern was compiled or executed with +the C</p> modifier. + +=item $LAST_PAREN_MATCH + +=item $+ +X<$+> X<$LAST_PAREN_MATCH> + +The text matched by the last bracket of the last successful search pattern. +This is useful if you don't know which one of a set of alternative patterns +matched. For example: + + /Version: (.*)|Revision: (.*)/ && ($rev = $+); + +(Mnemonic: be positive and forward looking.) +This variable is read-only and dynamically scoped to the current BLOCK. + +=item $LAST_SUBMATCH_RESULT + +=item $^N +X<$^N> + +The text matched by the used group most-recently closed (i.e. the group +with the rightmost closing parenthesis) of the last successful search +pattern. (Mnemonic: the (possibly) Nested parenthesis that most +recently closed.) + +This is primarily used inside C<(?{...})> blocks for examining text +recently matched. For example, to effectively capture text to a variable +(in addition to C<$1>, C<$2>, etc.), replace C<(...)> with + + (?:(...)(?{ $var = $^N })) + +By setting and then using C<$var> in this way relieves you from having to +worry about exactly which numbered set of parentheses they are. + +This variable is dynamically scoped to the current BLOCK. + +=item @LAST_MATCH_END + +=item @+ +X<@+> X<@LAST_MATCH_END> + +This array holds the offsets of the ends of the last successful +submatches in the currently active dynamic scope. C<$+[0]> is +the offset into the string of the end of the entire match. This +is the same value as what the C<pos> function returns when called +on the variable that was matched against. The I<n>th element +of this array holds the offset of the I<n>th submatch, so +C<$+[1]> is the offset past where $1 ends, C<$+[2]> the offset +past where $2 ends, and so on. You can use C<$#+> to determine +how many subgroups were in the last successful match. See the +examples given for the C<@-> variable. + +=item %LAST_PAREN_MATCH + +=item %+ +X<%+> + +Similar to C<@+>, the C<%+> hash allows access to the named capture +buffers, should they exist, in the last successful match in the +currently active dynamic scope. + +For example, C<$+{foo}> is equivalent to C<$1> after the following match: + + 'foo' =~ /(?<foo>foo)/; + +The keys of the C<%+> hash list only the names of buffers that have +captured (and that are thus associated to defined values). + +The underlying behaviour of C<%+> is provided by the +L<Tie::Hash::NamedCapture> module. + +B<Note:> C<%-> and C<%+> are tied views into a common internal hash +associated with the last successful regular expression. Therefore mixing +iterative access to them via C<each> may have unpredictable results. +Likewise, if the last successful match changes, then the results may be +surprising. + +=item HANDLE->input_line_number(EXPR) + +=item $INPUT_LINE_NUMBER + +=item $NR + +=item $. +X<$.> X<$NR> X<$INPUT_LINE_NUMBER> X<line number> + +Current line number for the last filehandle accessed. + +Each filehandle in Perl counts the number of lines that have been read +from it. (Depending on the value of C<$/>, Perl's idea of what +constitutes a line may not match yours.) When a line is read from a +filehandle (via readline() or C<< <> >>), or when tell() or seek() is +called on it, C<$.> becomes an alias to the line counter for that +filehandle. + +You can adjust the counter by assigning to C<$.>, but this will not +actually move the seek pointer. I<Localizing C<$.> will not localize +the filehandle's line count>. Instead, it will localize perl's notion +of which filehandle C<$.> is currently aliased to. + +C<$.> is reset when the filehandle is closed, but B<not> when an open +filehandle is reopened without an intervening close(). For more +details, see L<perlop/"IE<sol>O Operators">. Because C<< <> >> never does +an explicit close, line numbers increase across ARGV files (but see +examples in L<perlfunc/eof>). + +You can also use C<< HANDLE->input_line_number(EXPR) >> to access the +line counter for a given filehandle without having to worry about +which handle you last accessed. + +(Mnemonic: many programs use "." to mean the current line number.) + +=item IO::Handle->input_record_separator(EXPR) + +=item $INPUT_RECORD_SEPARATOR + +=item $RS + +=item $/ +X<$/> X<$RS> X<$INPUT_RECORD_SEPARATOR> + +The input record separator, newline by default. This +influences Perl's idea of what a "line" is. Works like B<awk>'s RS +variable, including treating empty lines as a terminator if set to +the null string. (An empty line cannot contain any spaces +or tabs.) You may set it to a multi-character string to match a +multi-character terminator, or to C<undef> to read through the end +of file. Setting it to C<"\n\n"> means something slightly +different than setting to C<"">, if the file contains consecutive +empty lines. Setting to C<""> will treat two or more consecutive +empty lines as a single empty line. Setting to C<"\n\n"> will +blindly assume that the next input character belongs to the next +paragraph, even if it's a newline. (Mnemonic: / delimits +line boundaries when quoting poetry.) + + local $/; # enable "slurp" mode + local $_ = <FH>; # whole file now here + s/\n[ \t]+/ /g; + +Remember: the value of C<$/> is a string, not a regex. B<awk> has to be +better for something. :-) + +Setting C<$/> to a reference to an integer, scalar containing an integer, or +scalar that's convertible to an integer will attempt to read records +instead of lines, with the maximum record size being the referenced +integer. So this: + + local $/ = \32768; # or \"32768", or \$var_containing_32768 + open my $fh, "<", $myfile or die $!; + local $_ = <$fh>; + +will read a record of no more than 32768 bytes from FILE. If you're +not reading from a record-oriented file (or your OS doesn't have +record-oriented files), then you'll likely get a full chunk of data +with every read. If a record is larger than the record size you've +set, you'll get the record back in pieces. Trying to set the record +size to zero or less will cause reading in the (rest of the) whole file. + +On VMS, record reads are done with the equivalent of C<sysread>, +so it's best not to mix record and non-record reads on the same +file. (This is unlikely to be a problem, because any file you'd +want to read in record mode is probably unusable in line mode.) +Non-VMS systems do normal I/O, so it's safe to mix record and +non-record reads of a file. + +See also L<perlport/"Newlines">. Also see C<$.>. + +=item HANDLE->autoflush(EXPR) + +=item $OUTPUT_AUTOFLUSH + +=item $| +X<$|> X<autoflush> X<flush> X<$OUTPUT_AUTOFLUSH> + +If set to nonzero, forces a flush right away and after every write +or print on the currently selected output channel. Default is 0 +(regardless of whether the channel is really buffered by the +system or not; C<$|> tells you only whether you've asked Perl +explicitly to flush after each write). STDOUT will +typically be line buffered if output is to the terminal and block +buffered otherwise. Setting this variable is useful primarily when +you are outputting to a pipe or socket, such as when you are running +a Perl program under B<rsh> and want to see the output as it's +happening. This has no effect on input buffering. See L<perlfunc/getc> +for that. See L<perldoc/select> on how to select the output channel. +See also L<IO::Handle>. (Mnemonic: when you want your pipes to be piping hot.) + +=item IO::Handle->output_field_separator EXPR + +=item $OUTPUT_FIELD_SEPARATOR + +=item $OFS + +=item $, +X<$,> X<$OFS> X<$OUTPUT_FIELD_SEPARATOR> + +The output field separator for the print operator. If defined, this +value is printed between each of print's arguments. Default is C<undef>. +(Mnemonic: what is printed when there is a "," in your print statement.) + +=item IO::Handle->output_record_separator EXPR + +=item $OUTPUT_RECORD_SEPARATOR + +=item $ORS + +=item $\ +X<$\> X<$ORS> X<$OUTPUT_RECORD_SEPARATOR> + +The output record separator for the print operator. If defined, this +value is printed after the last of print's arguments. Default is C<undef>. +(Mnemonic: you set C<$\> instead of adding "\n" at the end of the print. +Also, it's just like C<$/>, but it's what you get "back" from Perl.) + +=item $LIST_SEPARATOR + +=item $" +X<$"> X<$LIST_SEPARATOR> + +This is like C<$,> except that it applies to array and slice values +interpolated into a double-quoted string (or similar interpreted +string). Default is a space. (Mnemonic: obvious, I think.) + +=item $SUBSCRIPT_SEPARATOR + +=item $SUBSEP + +=item $; +X<$;> X<$SUBSEP> X<SUBSCRIPT_SEPARATOR> + +The subscript separator for multidimensional array emulation. If you +refer to a hash element as + + $foo{$a,$b,$c} + +it really means + + $foo{join($;, $a, $b, $c)} + +But don't put + + @foo{$a,$b,$c} # a slice--note the @ + +which means + + ($foo{$a},$foo{$b},$foo{$c}) + +Default is "\034", the same as SUBSEP in B<awk>. If your +keys contain binary data there might not be any safe value for C<$;>. +(Mnemonic: comma (the syntactic subscript separator) is a +semi-semicolon. Yeah, I know, it's pretty lame, but C<$,> is already +taken for something more important.) + +Consider using "real" multidimensional arrays as described +in L<perllol>. + +=item HANDLE->format_page_number(EXPR) + +=item $FORMAT_PAGE_NUMBER + +=item $% +X<$%> X<$FORMAT_PAGE_NUMBER> + +The current page number of the currently selected output channel. +Used with formats. +(Mnemonic: % is page number in B<nroff>.) + +=item HANDLE->format_lines_per_page(EXPR) + +=item $FORMAT_LINES_PER_PAGE + +=item $= +X<$=> X<$FORMAT_LINES_PER_PAGE> + +The current page length (printable lines) of the currently selected +output channel. Default is 60. +Used with formats. +(Mnemonic: = has horizontal lines.) + +=item HANDLE->format_lines_left(EXPR) + +=item $FORMAT_LINES_LEFT + +=item $- +X<$-> X<$FORMAT_LINES_LEFT> + +The number of lines left on the page of the currently selected output +channel. +Used with formats. +(Mnemonic: lines_on_page - lines_printed.) + +=item @LAST_MATCH_START + +=item @- +X<@-> X<@LAST_MATCH_START> + +$-[0] is the offset of the start of the last successful match. +C<$-[>I<n>C<]> is the offset of the start of the substring matched by +I<n>-th subpattern, or undef if the subpattern did not match. + +Thus after a match against $_, $& coincides with C<substr $_, $-[0], +$+[0] - $-[0]>. Similarly, $I<n> coincides with C<substr $_, $-[n], +$+[n] - $-[n]> if C<$-[n]> is defined, and $+ coincides with +C<substr $_, $-[$#-], $+[$#-] - $-[$#-]>. One can use C<$#-> to find the last +matched subgroup in the last successful match. Contrast with +C<$#+>, the number of subgroups in the regular expression. Compare +with C<@+>. + +This array holds the offsets of the beginnings of the last +successful submatches in the currently active dynamic scope. +C<$-[0]> is the offset into the string of the beginning of the +entire match. The I<n>th element of this array holds the offset +of the I<n>th submatch, so C<$-[1]> is the offset where $1 +begins, C<$-[2]> the offset where $2 begins, and so on. + +After a match against some variable $var: + +=over 5 + +=item C<$`> is the same as C<substr($var, 0, $-[0])> + +=item C<$&> is the same as C<substr($var, $-[0], $+[0] - $-[0])> + +=item C<$'> is the same as C<substr($var, $+[0])> + +=item C<$1> is the same as C<substr($var, $-[1], $+[1] - $-[1])> + +=item C<$2> is the same as C<substr($var, $-[2], $+[2] - $-[2])> + +=item C<$3> is the same as C<substr($var, $-[3], $+[3] - $-[3])> + +=back + +=item %- +X<%-> + +Similar to C<%+>, this variable allows access to the named capture buffers +in the last successful match in the currently active dynamic scope. To +each capture buffer name found in the regular expression, it associates a +reference to an array containing the list of values captured by all +buffers with that name (should there be several of them), in the order +where they appear. + +Here's an example: + + if ('1234' =~ /(?<A>1)(?<B>2)(?<A>3)(?<B>4)/) { + foreach my $bufname (sort keys %-) { + my $ary = $-{$bufname}; + foreach my $idx (0..$#$ary) { + print "\$-{$bufname}[$idx] : ", + (defined($ary->[$idx]) ? "'$ary->[$idx]'" : "undef"), + "\n"; + } + } + } + +would print out: + + $-{A}[0] : '1' + $-{A}[1] : '3' + $-{B}[0] : '2' + $-{B}[1] : '4' + +The keys of the C<%-> hash correspond to all buffer names found in +the regular expression. + +The behaviour of C<%-> is implemented via the +L<Tie::Hash::NamedCapture> module. + +B<Note:> C<%-> and C<%+> are tied views into a common internal hash +associated with the last successful regular expression. Therefore mixing +iterative access to them via C<each> may have unpredictable results. +Likewise, if the last successful match changes, then the results may be +surprising. + +=item HANDLE->format_name(EXPR) + +=item $FORMAT_NAME + +=item $~ +X<$~> X<$FORMAT_NAME> + +The name of the current report format for the currently selected output +channel. Default is the name of the filehandle. (Mnemonic: brother to +C<$^>.) + +=item HANDLE->format_top_name(EXPR) + +=item $FORMAT_TOP_NAME + +=item $^ +X<$^> X<$FORMAT_TOP_NAME> + +The name of the current top-of-page format for the currently selected +output channel. Default is the name of the filehandle with _TOP +appended. (Mnemonic: points to top of page.) + +=item IO::Handle->format_line_break_characters EXPR + +=item $FORMAT_LINE_BREAK_CHARACTERS + +=item $: +X<$:> X<FORMAT_LINE_BREAK_CHARACTERS> + +The current set of characters after which a string may be broken to +fill continuation fields (starting with ^) in a format. Default is +S<" \n-">, to break on whitespace or hyphens. (Mnemonic: a "colon" in +poetry is a part of a line.) + +=item IO::Handle->format_formfeed EXPR + +=item $FORMAT_FORMFEED + +=item $^L +X<$^L> X<$FORMAT_FORMFEED> + +What formats output as a form feed. Default is \f. + +=item $ACCUMULATOR + +=item $^A +X<$^A> X<$ACCUMULATOR> + +The current value of the write() accumulator for format() lines. A format +contains formline() calls that put their result into C<$^A>. After +calling its format, write() prints out the contents of C<$^A> and empties. +So you never really see the contents of C<$^A> unless you call +formline() yourself and then look at it. See L<perlform> and +L<perlfunc/formline()>. + +=item $CHILD_ERROR + +=item $? +X<$?> X<$CHILD_ERROR> + +The status returned by the last pipe close, backtick (C<``>) command, +successful call to wait() or waitpid(), or from the system() +operator. This is just the 16-bit status word returned by the +traditional Unix wait() system call (or else is made up to look like it). Thus, the +exit value of the subprocess is really (C<<< $? >> 8 >>>), and +C<$? & 127> gives which signal, if any, the process died from, and +C<$? & 128> reports whether there was a core dump. (Mnemonic: +similar to B<sh> and B<ksh>.) + +Additionally, if the C<h_errno> variable is supported in C, its value +is returned via $? if any C<gethost*()> function fails. + +If you have installed a signal handler for C<SIGCHLD>, the +value of C<$?> will usually be wrong outside that handler. + +Inside an C<END> subroutine C<$?> contains the value that is going to be +given to C<exit()>. You can modify C<$?> in an C<END> subroutine to +change the exit status of your program. For example: + + END { + $? = 1 if $? == 255; # die would make it 255 + } + +Under VMS, the pragma C<use vmsish 'status'> makes C<$?> reflect the +actual VMS exit status, instead of the default emulation of POSIX +status; see L<perlvms/$?> for details. + +Also see L<Error Indicators>. + +=item ${^CHILD_ERROR_NATIVE} +X<$^CHILD_ERROR_NATIVE> + +The native status returned by the last pipe close, backtick (C<``>) +command, successful call to wait() or waitpid(), or from the system() +operator. On POSIX-like systems this value can be decoded with the +WIFEXITED, WEXITSTATUS, WIFSIGNALED, WTERMSIG, WIFSTOPPED, WSTOPSIG +and WIFCONTINUED functions provided by the L<POSIX> module. + +Under VMS this reflects the actual VMS exit status; i.e. it is the same +as $? when the pragma C<use vmsish 'status'> is in effect. + +=item ${^ENCODING} +X<$^ENCODING> + +The I<object reference> to the Encode object that is used to convert +the source code to Unicode. Thanks to this variable your perl script +does not have to be written in UTF-8. Default is I<undef>. The direct +manipulation of this variable is highly discouraged. + +=item $OS_ERROR + +=item $ERRNO + +=item $! +X<$!> X<$ERRNO> X<$OS_ERROR> + +If used numerically, yields the current value of the C C<errno> +variable, or in other words, if a system or library call fails, it +sets this variable. This means that the value of C<$!> is meaningful +only I<immediately> after a B<failure>: + + if (open my $fh, "<", $filename) { + # Here $! is meaningless. + ... + } else { + # ONLY here is $! meaningful. + ... + # Already here $! might be meaningless. + } + # Since here we might have either success or failure, + # here $! is meaningless. + +In the above I<meaningless> stands for anything: zero, non-zero, +C<undef>. A successful system or library call does B<not> set +the variable to zero. + +If used as a string, yields the corresponding system error string. +You can assign a number to C<$!> to set I<errno> if, for instance, +you want C<"$!"> to return the string for error I<n>, or you want +to set the exit value for the die() operator. (Mnemonic: What just +went bang?) + +Also see L<Error Indicators>. + +=item %OS_ERROR + +=item %ERRNO + +=item %! +X<%!> + +Each element of C<%!> has a true value only if C<$!> is set to that +value. For example, C<$!{ENOENT}> is true if and only if the current +value of C<$!> is C<ENOENT>; that is, if the most recent error was +"No such file or directory" (or its moral equivalent: not all operating +systems give that exact error, and certainly not all languages). +To check if a particular key is meaningful on your system, use +C<exists $!{the_key}>; for a list of legal keys, use C<keys %!>. +See L<Errno> for more information, and also see above for the +validity of C<$!>. + +=item $EXTENDED_OS_ERROR + +=item $^E +X<$^E> X<$EXTENDED_OS_ERROR> + +Error information specific to the current operating system. At +the moment, this differs from C<$!> under only VMS, OS/2, and Win32 +(and for MacPerl). On all other platforms, C<$^E> is always just +the same as C<$!>. + +Under VMS, C<$^E> provides the VMS status value from the last +system error. This is more specific information about the last +system error than that provided by C<$!>. This is particularly +important when C<$!> is set to B<EVMSERR>. + +Under OS/2, C<$^E> is set to the error code of the last call to +OS/2 API either via CRT, or directly from perl. + +Under Win32, C<$^E> always returns the last error information +reported by the Win32 call C<GetLastError()> which describes +the last error from within the Win32 API. Most Win32-specific +code will report errors via C<$^E>. ANSI C and Unix-like calls +set C<errno> and so most portable Perl code will report errors +via C<$!>. + +Caveats mentioned in the description of C<$!> generally apply to +C<$^E>, also. (Mnemonic: Extra error explanation.) + +Also see L<Error Indicators>. + +=item $EVAL_ERROR + +=item $@ +X<$@> X<$EVAL_ERROR> + +The Perl syntax error message from the last eval() operator. +If $@ is the null string, the last eval() parsed and executed +correctly (although the operations you invoked may have failed in the +normal fashion). (Mnemonic: Where was the syntax error "at"?) + +Warning messages are not collected in this variable. You can, +however, set up a routine to process warnings by setting C<$SIG{__WARN__}> +as described below. + +Also see L<Error Indicators>. + +=item $PROCESS_ID + +=item $PID + +=item $$ +X<$$> X<$PID> X<$PROCESS_ID> + +The process number of the Perl running this script. You should +consider this variable read-only, although it will be altered +across fork() calls. (Mnemonic: same as shells.) + +Note for Linux users: on Linux, the C functions C<getpid()> and +C<getppid()> return different values from different threads. In order to +be portable, this behavior is not reflected by C<$$>, whose value remains +consistent across threads. If you want to call the underlying C<getpid()>, +you may use the CPAN module C<Linux::Pid>. + +=item $REAL_USER_ID + +=item $UID + +=item $< +X<< $< >> X<$UID> X<$REAL_USER_ID> + +The real uid of this process. (Mnemonic: it's the uid you came I<from>, +if you're running setuid.) You can change both the real uid and +the effective uid at the same time by using POSIX::setuid(). Since +changes to $< require a system call, check $! after a change attempt to +detect any possible errors. + +=item $EFFECTIVE_USER_ID + +=item $EUID + +=item $> +X<< $> >> X<$EUID> X<$EFFECTIVE_USER_ID> + +The effective uid of this process. Example: + + $< = $>; # set real to effective uid + ($<,$>) = ($>,$<); # swap real and effective uid + +You can change both the effective uid and the real uid at the same +time by using POSIX::setuid(). Changes to $> require a check to $! +to detect any possible errors after an attempted change. + +(Mnemonic: it's the uid you went I<to>, if you're running setuid.) +C<< $< >> and C<< $> >> can be swapped only on machines +supporting setreuid(). + +=item $REAL_GROUP_ID + +=item $GID + +=item $( +X<$(> X<$GID> X<$REAL_GROUP_ID> + +The real gid of this process. If you are on a machine that supports +membership in multiple groups simultaneously, gives a space separated +list of groups you are in. The first number is the one returned by +getgid(), and the subsequent ones by getgroups(), one of which may be +the same as the first number. + +However, a value assigned to C<$(> must be a single number used to +set the real gid. So the value given by C<$(> should I<not> be assigned +back to C<$(> without being forced numeric, such as by adding zero. Note +that this is different to the effective gid (C<$)>) which does take a +list. + +You can change both the real gid and the effective gid at the same +time by using POSIX::setgid(). Changes to $( require a check to $! +to detect any possible errors after an attempted change. + +(Mnemonic: parentheses are used to I<group> things. The real gid is the +group you I<left>, if you're running setgid.) + +=item $EFFECTIVE_GROUP_ID + +=item $EGID + +=item $) +X<$)> X<$EGID> X<$EFFECTIVE_GROUP_ID> + +The effective gid of this process. If you are on a machine that +supports membership in multiple groups simultaneously, gives a space +separated list of groups you are in. The first number is the one +returned by getegid(), and the subsequent ones by getgroups(), one of +which may be the same as the first number. + +Similarly, a value assigned to C<$)> must also be a space-separated +list of numbers. The first number sets the effective gid, and +the rest (if any) are passed to setgroups(). To get the effect of an +empty list for setgroups(), just repeat the new effective gid; that is, +to force an effective gid of 5 and an effectively empty setgroups() +list, say C< $) = "5 5" >. + +You can change both the effective gid and the real gid at the same +time by using POSIX::setgid() (use only a single numeric argument). +Changes to $) require a check to $! to detect any possible errors +after an attempted change. + +(Mnemonic: parentheses are used to I<group> things. The effective gid +is the group that's I<right> for you, if you're running setgid.) + +C<< $< >>, C<< $> >>, C<$(> and C<$)> can be set only on +machines that support the corresponding I<set[re][ug]id()> routine. C<$(> +and C<$)> can be swapped only on machines supporting setregid(). + +=item $PROGRAM_NAME + +=item $0 +X<$0> X<$PROGRAM_NAME> + +Contains the name of the program being executed. + +On some (read: not all) operating systems assigning to C<$0> modifies +the argument area that the C<ps> program sees. On some platforms you +may have to use special C<ps> options or a different C<ps> to see the +changes. Modifying the $0 is more useful as a way of indicating the +current program state than it is for hiding the program you're +running. (Mnemonic: same as B<sh> and B<ksh>.) + +Note that there are platform specific limitations on the maximum +length of C<$0>. In the most extreme case it may be limited to the +space occupied by the original C<$0>. + +In some platforms there may be arbitrary amount of padding, for +example space characters, after the modified name as shown by C<ps>. +In some platforms this padding may extend all the way to the original +length of the argument area, no matter what you do (this is the case +for example with Linux 2.2). + +Note for BSD users: setting C<$0> does not completely remove "perl" +from the ps(1) output. For example, setting C<$0> to C<"foobar"> may +result in C<"perl: foobar (perl)"> (whether both the C<"perl: "> prefix +and the " (perl)" suffix are shown depends on your exact BSD variant +and version). This is an operating system feature, Perl cannot help it. + +In multithreaded scripts Perl coordinates the threads so that any +thread may modify its copy of the C<$0> and the change becomes visible +to ps(1) (assuming the operating system plays along). Note that +the view of C<$0> the other threads have will not change since they +have their own copies of it. + +If the program has been given to perl via the switches C<-e> or C<-E>, +C<$0> will contain the string C<"-e">. + +=item $[ +X<$[> + +The index of the first element in an array, and of the first character +in a substring. Default is 0, but you could theoretically set it +to 1 to make Perl behave more like B<awk> (or Fortran) when +subscripting and when evaluating the index() and substr() functions. +(Mnemonic: [ begins subscripts.) + +As of release 5 of Perl, assignment to C<$[> is treated as a compiler +directive, and cannot influence the behavior of any other file. +(That's why you can only assign compile-time constants to it.) +Its use is highly discouraged. + +Note that, unlike other compile-time directives (such as L<strict>), +assignment to C<$[> can be seen from outer lexical scopes in the same file. +However, you can use local() on it to strictly bind its value to a +lexical block. + +=item $] +X<$]> + +The version + patchlevel / 1000 of the Perl interpreter. This variable +can be used to determine whether the Perl interpreter executing a +script is in the right range of versions. (Mnemonic: Is this version +of perl in the right bracket?) Example: + + warn "No checksumming!\n" if $] < 3.019; + +See also the documentation of C<use VERSION> and C<require VERSION> +for a convenient way to fail if the running Perl interpreter is too old. + +The floating point representation can sometimes lead to inaccurate +numeric comparisons. See C<$^V> for a more modern representation of +the Perl version that allows accurate string comparisons. + +=item $COMPILING + +=item $^C +X<$^C> X<$COMPILING> + +The current value of the flag associated with the B<-c> switch. +Mainly of use with B<-MO=...> to allow code to alter its behavior +when being compiled, such as for example to AUTOLOAD at compile +time rather than normal, deferred loading. Setting +C<$^C = 1> is similar to calling C<B::minus_c>. + +=item $DEBUGGING + +=item $^D +X<$^D> X<$DEBUGGING> + +The current value of the debugging flags. (Mnemonic: value of B<-D> +switch.) May be read or set. Like its command-line equivalent, you can use +numeric or symbolic values, eg C<$^D = 10> or C<$^D = "st">. + +=item ${^RE_DEBUG_FLAGS} + +The current value of the regex debugging flags. Set to 0 for no debug output +even when the re 'debug' module is loaded. See L<re> for details. + +=item ${^RE_TRIE_MAXBUF} + +Controls how certain regex optimisations are applied and how much memory they +utilize. This value by default is 65536 which corresponds to a 512kB temporary +cache. Set this to a higher value to trade memory for speed when matching +large alternations. Set it to a lower value if you want the optimisations to +be as conservative of memory as possible but still occur, and set it to a +negative value to prevent the optimisation and conserve the most memory. +Under normal situations this variable should be of no interest to you. + +=item $SYSTEM_FD_MAX + +=item $^F +X<$^F> X<$SYSTEM_FD_MAX> + +The maximum system file descriptor, ordinarily 2. System file +descriptors are passed to exec()ed processes, while higher file +descriptors are not. Also, during an open(), system file descriptors are +preserved even if the open() fails. (Ordinary file descriptors are +closed before the open() is attempted.) The close-on-exec +status of a file descriptor will be decided according to the value of +C<$^F> when the corresponding file, pipe, or socket was opened, not the +time of the exec(). + +=item $^H + +WARNING: This variable is strictly for internal use only. Its availability, +behavior, and contents are subject to change without notice. + +This variable contains compile-time hints for the Perl interpreter. At the +end of compilation of a BLOCK the value of this variable is restored to the +value when the interpreter started to compile the BLOCK. + +When perl begins to parse any block construct that provides a lexical scope +(e.g., eval body, required file, subroutine body, loop body, or conditional +block), the existing value of $^H is saved, but its value is left unchanged. +When the compilation of the block is completed, it regains the saved value. +Between the points where its value is saved and restored, code that +executes within BEGIN blocks is free to change the value of $^H. + +This behavior provides the semantic of lexical scoping, and is used in, +for instance, the C<use strict> pragma. + +The contents should be an integer; different bits of it are used for +different pragmatic flags. Here's an example: + + sub add_100 { $^H |= 0x100 } + + sub foo { + BEGIN { add_100() } + bar->baz($boon); + } + +Consider what happens during execution of the BEGIN block. At this point +the BEGIN block has already been compiled, but the body of foo() is still +being compiled. The new value of $^H will therefore be visible only while +the body of foo() is being compiled. + +Substitution of the above BEGIN block with: + + BEGIN { require strict; strict->import('vars') } + +demonstrates how C<use strict 'vars'> is implemented. Here's a conditional +version of the same lexical pragma: + + BEGIN { require strict; strict->import('vars') if $condition } + +=item %^H + +The %^H hash provides the same scoping semantic as $^H. This makes it +useful for implementation of lexically scoped pragmas. See L<perlpragma>. + +=item $INPLACE_EDIT + +=item $^I +X<$^I> X<$INPLACE_EDIT> + +The current value of the inplace-edit extension. Use C<undef> to disable +inplace editing. (Mnemonic: value of B<-i> switch.) + +=item $^M +X<$^M> + +By default, running out of memory is an untrappable, fatal error. +However, if suitably built, Perl can use the contents of C<$^M> +as an emergency memory pool after die()ing. Suppose that your Perl +were compiled with C<-DPERL_EMERGENCY_SBRK> and used Perl's malloc. +Then + + $^M = 'a' x (1 << 16); + +would allocate a 64K buffer for use in an emergency. See the +F<INSTALL> file in the Perl distribution for information on how to +add custom C compilation flags when compiling perl. To discourage casual +use of this advanced feature, there is no L<English|English> long name for +this variable. + +=item $OSNAME + +=item $^O +X<$^O> X<$OSNAME> + +The name of the operating system under which this copy of Perl was +built, as determined during the configuration process. The value +is identical to C<$Config{'osname'}>. See also L<Config> and the +B<-V> command-line switch documented in L<perlrun>. + +In Windows platforms, $^O is not very helpful: since it is always +C<MSWin32>, it doesn't tell the difference between +95/98/ME/NT/2000/XP/CE/.NET. Use Win32::GetOSName() or +Win32::GetOSVersion() (see L<Win32> and L<perlport>) to distinguish +between the variants. + +=item ${^OPEN} + +An internal variable used by PerlIO. A string in two parts, separated +by a C<\0> byte, the first part describes the input layers, the second +part describes the output layers. + +=item $PERLDB + +=item $^P +X<$^P> X<$PERLDB> + +The internal variable for debugging support. The meanings of the +various bits are subject to change, but currently indicate: + +=over 6 + +=item 0x01 + +Debug subroutine enter/exit. + +=item 0x02 + +Line-by-line debugging. Causes DB::DB() subroutine to be called for each +statement executed. Also causes saving source code lines (like 0x400). + +=item 0x04 + +Switch off optimizations. + +=item 0x08 + +Preserve more data for future interactive inspections. + +=item 0x10 + +Keep info about source lines on which a subroutine is defined. + +=item 0x20 + +Start with single-step on. + +=item 0x40 + +Use subroutine address instead of name when reporting. + +=item 0x80 + +Report C<goto &subroutine> as well. + +=item 0x100 + +Provide informative "file" names for evals based on the place they were compiled. + +=item 0x200 + +Provide informative names to anonymous subroutines based on the place they +were compiled. + +=item 0x400 + +Save source code lines into C<@{"_<$filename"}>. + +=back + +Some bits may be relevant at compile-time only, some at +run-time only. This is a new mechanism and the details may change. +See also L<perldebguts>. + +=item $LAST_REGEXP_CODE_RESULT + +=item $^R +X<$^R> X<$LAST_REGEXP_CODE_RESULT> + +The result of evaluation of the last successful C<(?{ code })> +regular expression assertion (see L<perlre>). May be written to. + +=item $EXCEPTIONS_BEING_CAUGHT + +=item $^S +X<$^S> X<$EXCEPTIONS_BEING_CAUGHT> + +Current state of the interpreter. + + $^S State + --------- ------------------- + undef Parsing module/eval + true (1) Executing an eval + false (0) Otherwise + +The first state may happen in $SIG{__DIE__} and $SIG{__WARN__} handlers. + +=item $BASETIME + +=item $^T +X<$^T> X<$BASETIME> + +The time at which the program began running, in seconds since the +epoch (beginning of 1970). The values returned by the B<-M>, B<-A>, +and B<-C> filetests are based on this value. + +=item ${^TAINT} + +Reflects if taint mode is on or off. 1 for on (the program was run with +B<-T>), 0 for off, -1 when only taint warnings are enabled (i.e. with +B<-t> or B<-TU>). This variable is read-only. + +=item ${^UNICODE} + +Reflects certain Unicode settings of Perl. See L<perlrun> +documentation for the C<-C> switch for more information about +the possible values. This variable is set during Perl startup +and is thereafter read-only. + +=item ${^UTF8CACHE} + +This variable controls the state of the internal UTF-8 offset caching code. +1 for on (the default), 0 for off, -1 to debug the caching code by checking +all its results against linear scans, and panicking on any discrepancy. + +=item ${^UTF8LOCALE} + +This variable indicates whether an UTF-8 locale was detected by perl at +startup. This information is used by perl when it's in +adjust-utf8ness-to-locale mode (as when run with the C<-CL> command-line +switch); see L<perlrun> for more info on this. + +=item $PERL_VERSION + +=item $^V +X<$^V> X<$PERL_VERSION> + +The revision, version, and subversion of the Perl interpreter, represented +as a C<version> object. + +This variable first appeared in perl 5.6.0; earlier versions of perl will +see an undefined value. Before perl 5.10.0 $^V was represented as a v-string. + +$^V can be used to determine whether the Perl interpreter executing a +script is in the right range of versions. (Mnemonic: use ^V for Version +Control.) Example: + + warn "Hashes not randomized!\n" if !$^V or $^V lt v5.8.1 + +To convert C<$^V> into its string representation use sprintf()'s +C<"%vd"> conversion: + + printf "version is v%vd\n", $^V; # Perl's version + +See the documentation of C<use VERSION> and C<require VERSION> +for a convenient way to fail if the running Perl interpreter is too old. + +See also C<$]> for an older representation of the Perl version. + +=item $WARNING + +=item $^W +X<$^W> X<$WARNING> + +The current value of the warning switch, initially true if B<-w> +was used, false otherwise, but directly modifiable. (Mnemonic: +related to the B<-w> switch.) See also L<warnings>. + +=item ${^WARNING_BITS} + +The current set of warning checks enabled by the C<use warnings> pragma. +See the documentation of C<warnings> for more details. + +=item ${^WIN32_SLOPPY_STAT} + +If this variable is set to a true value, then stat() on Windows will +not try to open the file. This means that the link count cannot be +determined and file attributes may be out of date if additional +hardlinks to the file exist. On the other hand, not opening the file +is considerably faster, especially for files on network drives. + +This variable could be set in the F<sitecustomize.pl> file to +configure the local Perl installation to use "sloppy" stat() by +default. See L<perlrun> for more information about site +customization. + +=item $EXECUTABLE_NAME + +=item $^X +X<$^X> X<$EXECUTABLE_NAME> + +The name used to execute the current copy of Perl, from C's +C<argv[0]> or (where supported) F</proc/self/exe>. + +Depending on the host operating system, the value of $^X may be +a relative or absolute pathname of the perl program file, or may +be the string used to invoke perl but not the pathname of the +perl program file. Also, most operating systems permit invoking +programs that are not in the PATH environment variable, so there +is no guarantee that the value of $^X is in PATH. For VMS, the +value may or may not include a version number. + +You usually can use the value of $^X to re-invoke an independent +copy of the same perl that is currently running, e.g., + + @first_run = `$^X -le "print int rand 100 for 1..100"`; + +But recall that not all operating systems support forking or +capturing of the output of commands, so this complex statement +may not be portable. + +It is not safe to use the value of $^X as a path name of a file, +as some operating systems that have a mandatory suffix on +executable files do not require use of the suffix when invoking +a command. To convert the value of $^X to a path name, use the +following statements: + + # Build up a set of file names (not command names). + use Config; + $this_perl = $^X; + if ($^O ne 'VMS') + {$this_perl .= $Config{_exe} + unless $this_perl =~ m/$Config{_exe}$/i;} + +Because many operating systems permit anyone with read access to +the Perl program file to make a copy of it, patch the copy, and +then execute the copy, the security-conscious Perl programmer +should take care to invoke the installed copy of perl, not the +copy referenced by $^X. The following statements accomplish +this goal, and produce a pathname that can be invoked as a +command or referenced as a file. + + use Config; + $secure_perl_path = $Config{perlpath}; + if ($^O ne 'VMS') + {$secure_perl_path .= $Config{_exe} + unless $secure_perl_path =~ m/$Config{_exe}$/i;} + +=item ARGV +X<ARGV> + +The special filehandle that iterates over command-line filenames in +C<@ARGV>. Usually written as the null filehandle in the angle operator +C<< <> >>. Note that currently C<ARGV> only has its magical effect +within the C<< <> >> operator; elsewhere it is just a plain filehandle +corresponding to the last file opened by C<< <> >>. In particular, +passing C<\*ARGV> as a parameter to a function that expects a filehandle +may not cause your function to automatically read the contents of all the +files in C<@ARGV>. + +=item $ARGV +X<$ARGV> + +contains the name of the current file when reading from <>. + +=item @ARGV +X<@ARGV> + +The array @ARGV contains the command-line arguments intended for +the script. C<$#ARGV> is generally the number of arguments minus +one, because C<$ARGV[0]> is the first argument, I<not> the program's +command name itself. See C<$0> for the command name. + +=item ARGVOUT +X<ARGVOUT> + +The special filehandle that points to the currently open output file +when doing edit-in-place processing with B<-i>. Useful when you have +to do a lot of inserting and don't want to keep modifying $_. See +L<perlrun> for the B<-i> switch. + +=item @F +X<@F> + +The array @F contains the fields of each line read in when autosplit +mode is turned on. See L<perlrun> for the B<-a> switch. This array +is package-specific, and must be declared or given a full package name +if not in package main when running under C<strict 'vars'>. + +=item @INC +X<@INC> + +The array @INC contains the list of places that the C<do EXPR>, +C<require>, or C<use> constructs look for their library files. It +initially consists of the arguments to any B<-I> command-line +switches, followed by the default Perl library, probably +F</usr/local/lib/perl>, followed by ".", to represent the current +directory. ("." will not be appended if taint checks are enabled, either by +C<-T> or by C<-t>.) If you need to modify this at runtime, you should use +the C<use lib> pragma to get the machine-dependent library properly +loaded also: + + use lib '/mypath/libdir/'; + use SomeMod; + +You can also insert hooks into the file inclusion system by putting Perl +code directly into @INC. Those hooks may be subroutine references, array +references or blessed objects. See L<perlfunc/require> for details. + +=item @ARG + +=item @_ +X<@_> X<@ARG> + +Within a subroutine the array @_ contains the parameters passed to that +subroutine. See L<perlsub>. + +=item %INC +X<%INC> + +The hash %INC contains entries for each filename included via the +C<do>, C<require>, or C<use> operators. The key is the filename +you specified (with module names converted to pathnames), and the +value is the location of the file found. The C<require> +operator uses this hash to determine whether a particular file has +already been included. + +If the file was loaded via a hook (e.g. a subroutine reference, see +L<perlfunc/require> for a description of these hooks), this hook is +by default inserted into %INC in place of a filename. Note, however, +that the hook may have set the %INC entry by itself to provide some more +specific info. + +=item %ENV + +=item $ENV{expr} +X<%ENV> + +The hash %ENV contains your current environment. Setting a +value in C<ENV> changes the environment for any child processes +you subsequently fork() off. + +=item %SIG + +=item $SIG{expr} +X<%SIG> + +The hash C<%SIG> contains signal handlers for signals. For example: + + sub handler { # 1st argument is signal name + my($sig) = @_; + print "Caught a SIG$sig--shutting down\n"; + close(LOG); + exit(0); + } + + $SIG{'INT'} = \&handler; + $SIG{'QUIT'} = \&handler; + ... + $SIG{'INT'} = 'DEFAULT'; # restore default action + $SIG{'QUIT'} = 'IGNORE'; # ignore SIGQUIT + +Using a value of C<'IGNORE'> usually has the effect of ignoring the +signal, except for the C<CHLD> signal. See L<perlipc> for more about +this special case. + +Here are some other examples: + + $SIG{"PIPE"} = "Plumber"; # assumes main::Plumber (not recommended) + $SIG{"PIPE"} = \&Plumber; # just fine; assume current Plumber + $SIG{"PIPE"} = *Plumber; # somewhat esoteric + $SIG{"PIPE"} = Plumber(); # oops, what did Plumber() return?? + +Be sure not to use a bareword as the name of a signal handler, +lest you inadvertently call it. + +If your system has the sigaction() function then signal handlers are +installed using it. This means you get reliable signal handling. + +The default delivery policy of signals changed in Perl 5.8.0 from +immediate (also known as "unsafe") to deferred, also known as +"safe signals". See L<perlipc> for more information. + +Certain internal hooks can be also set using the %SIG hash. The +routine indicated by C<$SIG{__WARN__}> is called when a warning message is +about to be printed. The warning message is passed as the first +argument. The presence of a C<__WARN__> hook causes the ordinary printing +of warnings to C<STDERR> to be suppressed. You can use this to save warnings +in a variable, or turn warnings into fatal errors, like this: + + local $SIG{__WARN__} = sub { die $_[0] }; + eval $proggie; + +As the C<'IGNORE'> hook is not supported by C<__WARN__>, you can +disable warnings using the empty subroutine: + + local $SIG{__WARN__} = sub {}; + +The routine indicated by C<$SIG{__DIE__}> is called when a fatal exception +is about to be thrown. The error message is passed as the first +argument. When a C<__DIE__> hook routine returns, the exception +processing continues as it would have in the absence of the hook, +unless the hook routine itself exits via a C<goto>, a loop exit, or a C<die()>. +The C<__DIE__> handler is explicitly disabled during the call, so that you +can die from a C<__DIE__> handler. Similarly for C<__WARN__>. + +Due to an implementation glitch, the C<$SIG{__DIE__}> hook is called +even inside an eval(). Do not use this to rewrite a pending exception +in C<$@>, or as a bizarre substitute for overriding C<CORE::GLOBAL::die()>. +This strange action at a distance may be fixed in a future release +so that C<$SIG{__DIE__}> is only called if your program is about +to exit, as was the original intent. Any other use is deprecated. + +C<__DIE__>/C<__WARN__> handlers are very special in one respect: +they may be called to report (probable) errors found by the parser. +In such a case the parser may be in inconsistent state, so any +attempt to evaluate Perl code from such a handler will probably +result in a segfault. This means that warnings or errors that +result from parsing Perl should be used with extreme caution, like +this: + + require Carp if defined $^S; + Carp::confess("Something wrong") if defined &Carp::confess; + die "Something wrong, but could not load Carp to give backtrace... + To see backtrace try starting Perl with -MCarp switch"; + +Here the first line will load Carp I<unless> it is the parser who +called the handler. The second line will print backtrace and die if +Carp was available. The third line will be executed only if Carp was +not available. + +See L<perlfunc/die>, L<perlfunc/warn>, L<perlfunc/eval>, and +L<warnings> for additional information. + +=back + +=head2 Error Indicators +X<error> X<exception> + +The variables C<$@>, C<$!>, C<$^E>, and C<$?> contain information +about different types of error conditions that may appear during +execution of a Perl program. The variables are shown ordered by +the "distance" between the subsystem which reported the error and +the Perl process. They correspond to errors detected by the Perl +interpreter, C library, operating system, or an external program, +respectively. + +To illustrate the differences between these variables, consider the +following Perl expression, which uses a single-quoted string: + + eval q{ + open my $pipe, "/cdrom/install |" or die $!; + my @res = <$pipe>; + close $pipe or die "bad pipe: $?, $!"; + }; + +After execution of this statement all 4 variables may have been set. + +C<$@> is set if the string to be C<eval>-ed did not compile (this +may happen if C<open> or C<close> were imported with bad prototypes), +or if Perl code executed during evaluation die()d . In these cases +the value of $@ is the compile error, or the argument to C<die> +(which will interpolate C<$!> and C<$?>). (See also L<Fatal>, +though.) + +When the eval() expression above is executed, open(), C<< <PIPE> >>, +and C<close> are translated to calls in the C run-time library and +thence to the operating system kernel. C<$!> is set to the C library's +C<errno> if one of these calls fails. + +Under a few operating systems, C<$^E> may contain a more verbose +error indicator, such as in this case, "CDROM tray not closed." +Systems that do not support extended error messages leave C<$^E> +the same as C<$!>. + +Finally, C<$?> may be set to non-0 value if the external program +F</cdrom/install> fails. The upper eight bits reflect specific +error conditions encountered by the program (the program's exit() +value). The lower eight bits reflect mode of failure, like signal +death and core dump information See wait(2) for details. In +contrast to C<$!> and C<$^E>, which are set only if error condition +is detected, the variable C<$?> is set on each C<wait> or pipe +C<close>, overwriting the old value. This is more like C<$@>, which +on every eval() is always set on failure and cleared on success. + +For more details, see the individual descriptions at C<$@>, C<$!>, C<$^E>, +and C<$?>. + +=head2 Technical Note on the Syntax of Variable Names + +Variable names in Perl can have several formats. Usually, they +must begin with a letter or underscore, in which case they can be +arbitrarily long (up to an internal limit of 251 characters) and +may contain letters, digits, underscores, or the special sequence +C<::> or C<'>. In this case, the part before the last C<::> or +C<'> is taken to be a I<package qualifier>; see L<perlmod>. + +Perl variable names may also be a sequence of digits or a single +punctuation or control character. These names are all reserved for +special uses by Perl; for example, the all-digits names are used +to hold data captured by backreferences after a regular expression +match. Perl has a special syntax for the single-control-character +names: It understands C<^X> (caret C<X>) to mean the control-C<X> +character. For example, the notation C<$^W> (dollar-sign caret +C<W>) is the scalar variable whose name is the single character +control-C<W>. This is better than typing a literal control-C<W> +into your program. + +Finally, new in Perl 5.6, Perl variable names may be alphanumeric +strings that begin with control characters (or better yet, a caret). +These variables must be written in the form C<${^Foo}>; the braces +are not optional. C<${^Foo}> denotes the scalar variable whose +name is a control-C<F> followed by two C<o>'s. These variables are +reserved for future special uses by Perl, except for the ones that +begin with C<^_> (control-underscore or caret-underscore). No +control-character name that begins with C<^_> will acquire a special +meaning in any future version of Perl; such names may therefore be +used safely in programs. C<$^_> itself, however, I<is> reserved. + +Perl identifiers that begin with digits, control characters, or +punctuation characters are exempt from the effects of the C<package> +declaration and are always forced to be in package C<main>; they are +also exempt from C<strict 'vars'> errors. A few other names are also +exempt in these ways: + + ENV STDIN + INC STDOUT + ARGV STDERR + ARGVOUT _ + SIG + +In particular, the new special C<${^_XYZ}> variables are always taken +to be in package C<main>, regardless of any C<package> declarations +presently in scope. + +=head1 BUGS + +Due to an unfortunate accident of Perl's implementation, C<use +English> imposes a considerable performance penalty on all regular +expression matches in a program, regardless of whether they occur +in the scope of C<use English>. For that reason, saying C<use +English> in libraries is strongly discouraged. See the +Devel::SawAmpersand module documentation from CPAN +( http://www.cpan.org/modules/by-module/Devel/ ) +for more information. Writing C<use English '-no_match_vars';> +avoids the performance penalty. + +Having to even think about the C<$^S> variable in your exception +handlers is simply wrong. C<$SIG{__DIE__}> as currently implemented +invites grievous and difficult to track down errors. Avoid it +and use an C<END{}> or CORE::GLOBAL::die override instead. |