diff options
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | pod/perlfunc.pod | 9 | ||||
-rw-r--r-- | pod/perlop.pod | 70 | ||||
-rw-r--r-- | pod/perlre.pod | 1 | ||||
-rw-r--r-- | pod/perlunicode.pod | 14 | ||||
-rwxr-xr-x | t/op/each.t | 4 | ||||
-rw-r--r-- | t/op/length.t | 2 | ||||
-rw-r--r-- | t/op/qu.t | 24 | ||||
-rw-r--r-- | toke.c | 5 |
9 files changed, 38 insertions, 92 deletions
@@ -1596,7 +1596,6 @@ t/op/pat.t See if esoteric patterns work t/op/pos.t See if pos works t/op/push.t See if push and pop work t/op/pwent.t See if getpw*() functions work -t/op/qu.t See if qu works t/op/quotemeta.t See if quotemeta works t/op/rand.t See if rand works t/op/range.t See if .. works diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod index c75818e04d..725b50ecd1 100644 --- a/pod/perlfunc.pod +++ b/pod/perlfunc.pod @@ -96,9 +96,8 @@ than one place. =item Functions for SCALARs or strings C<chomp>, C<chop>, C<chr>, C<crypt>, C<hex>, C<index>, C<lc>, C<lcfirst>, -C<length>, C<oct>, C<ord>, C<pack>, C<q/STRING/>, C<qq/STRING/>, C<qu/STRING/>, -C<reverse>, C<rindex>, C<sprintf>, C<substr>, C<tr///>, C<uc>, C<ucfirst>, -C<y///> +C<length>, C<oct>, C<ord>, C<pack>, C<q/STRING/>, C<qq/STRING/>, C<reverse>, +C<rindex>, C<sprintf>, C<substr>, C<tr///>, C<uc>, C<ucfirst>, C<y///> =item Regular expressions and pattern matching @@ -3463,12 +3462,10 @@ but is more efficient. Returns the new number of elements in the array. =item qr/STRING/ -=item qu/STRING/ +=item qx/STRING/ =item qw/STRING/ -=item qx/STRING/ - Generalized quotes. See L<perlop/"Regexp Quote-Like Operators">. =item quotemeta EXPR diff --git a/pod/perlop.pod b/pod/perlop.pod index 2bc889d186..8f2ecde031 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -645,7 +645,6 @@ any pair of delimiters you choose. Customary Generic Meaning Interpolates '' q{} Literal no "" qq{} Literal yes - qu{} Literal yes (UTF-8, see below) `` qx{} Command yes (unless '' is delimiter) qw{} Word list no // m{} Pattern match yes (unless '' is delimiter) @@ -1012,48 +1011,6 @@ Options are: See L<perlre> for additional information on valid syntax for STRING, and for a detailed look at the semantics of regular expressions. -=item qw/STRING/ - -Evaluates to a list of the words extracted out of STRING, using embedded -whitespace as the word delimiters. It can be understood as being roughly -equivalent to: - - split(' ', q/STRING/); - -the difference being that it generates a real list at compile time. So -this expression: - - qw(foo bar baz) - -is semantically equivalent to the list: - - 'foo', 'bar', 'baz' - -Some frequently seen examples: - - use POSIX qw( setlocale localeconv ) - @EXPORT = qw( foo bar baz ); - -A common mistake is to try to separate the words with comma or to -put comments into a multi-line C<qw>-string. For this reason, the -C<use warnings> pragma and the B<-w> switch (that is, the C<$^W> variable) -produces warnings if the STRING contains the "," or the "#" character. - -=item qu/STRING/ - -Like L<qq> but explicitly generates UTF-8 from the \0ooo, \xHH, and -\x{HH} constructs if the code point is in the 0x80..0xff range (and -of course for the 0x100.. range). - -Normally you do not need to use this because whether characters are -internally encoded in UTF-8 should be transparent, and you can just -just use qq, also known as "". - -(In qq/STRING/ the \0ooo, \xHH, and the \x{HHH...} constructs -generate bytes for the 0x80..0xff range. For the whole 0x00..0xff -range the generated bytes are host-dependent: in ISO 8859-1 they will -be ISO 8859-1, in EBCDIC they will EBCDIC, and so on.) - =item qx/STRING/ =item `STRING` @@ -1135,6 +1092,33 @@ Just understand what you're getting yourself into. See L<"I/O Operators"> for more discussion. +=item qw/STRING/ + +Evaluates to a list of the words extracted out of STRING, using embedded +whitespace as the word delimiters. It can be understood as being roughly +equivalent to: + + split(' ', q/STRING/); + +the difference being that it generates a real list at compile time. So +this expression: + + qw(foo bar baz) + +is semantically equivalent to the list: + + 'foo', 'bar', 'baz' + +Some frequently seen examples: + + use POSIX qw( setlocale localeconv ) + @EXPORT = qw( foo bar baz ); + +A common mistake is to try to separate the words with comma or to +put comments into a multi-line C<qw>-string. For this reason, the +C<use warnings> pragma and the B<-w> switch (that is, the C<$^W> variable) +produces warnings if the STRING contains the "," or the "#" character. + =item s/PATTERN/REPLACEMENT/egimosx Searches a string for a pattern, and if found, replaces that pattern diff --git a/pod/perlre.pod b/pod/perlre.pod index 02dd2cda5d..ce2b9bd952 100644 --- a/pod/perlre.pod +++ b/pod/perlre.pod @@ -179,7 +179,6 @@ In addition, Perl defines the following: \X Match eXtended Unicode "combining character sequence", equivalent to C<(?:\PM\pM*)> \C Match a single C char (octet) even under utf8. - (Currently this does not work correctly.) A C<\w> matches a single alphanumeric character or C<_>, not a whole word. Use C<\w+> to match a string of Perl-identifier characters (which isn't diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod index b8bbc5707c..30a4482260 100644 --- a/pod/perlunicode.pod +++ b/pod/perlunicode.pod @@ -16,8 +16,7 @@ The following areas need further work. There is currently no easy way to mark data read from a file or other external source as being utf8. This will be one of the major areas of -focus in the near future. Unfortunately it is unlikely that the Perl -5.6 and earlier will ever gain this capability. +focus in the near future. =item Regular Expressions @@ -67,8 +66,7 @@ or from literals and constants in the source text. If the C<-C> command line switch is used, (or the ${^WIDE_SYSTEM_CALLS} global flag is set to C<1>), all system calls will use the corresponding wide character APIs. This is currently only implemented -on Windows as other platforms do not have a unified way of handling -wide character APIs. +on Windows. Regardless of the above, the C<bytes> pragma can always be used to force byte semantics in a particular lexical scope. See L<bytes>. @@ -129,7 +127,8 @@ attempt to canonicalize variable names for you.) Regular expressions match characters instead of bytes. For instance, "." matches a character instead of a byte. (However, the C<\C> pattern -is available to force a match a single byte ("C<char>" in C, hence C<\C>).) +is provided to force a match a single byte ("C<char>" in C, hence +C<\C>).) =item * @@ -217,10 +216,7 @@ And finally, C<scalar reverse()> reverses by character rather than by byte. =head2 Character encodings for input and output -This feature is in the process of getting implemented. - -(For Perl 5.6 and earlier the support is unlikely to get integrated -to the core language and some external module will be required.) +[XXX: This feature is not yet implemented.] =head1 CAVEATS diff --git a/t/op/each.t b/t/op/each.t index 397176a40d..2e80dcd009 100755 --- a/t/op/each.t +++ b/t/op/each.t @@ -163,7 +163,7 @@ print "ok 23\n"; print "#$u{$_}\n" for keys %u; # Used to core dump before change #8056. print "ok 24\n"; -$d = qu"\xe3\x81\x82"; +$d = pack("U*", 0xe3, 0x81, 0x82); %u = ($d => "downgrade"); for (keys %u) { use bytes; @@ -172,6 +172,6 @@ for (keys %u) { } { use bytes; - print "not " if length($d) ne 6 or $d ne qu"\xe3\x81\x82"; + print "not " if length($d) ne 6; print "ok 26\n"; } diff --git a/t/op/length.t b/t/op/length.t index 46f0c59698..df80fcd039 100644 --- a/t/op/length.t +++ b/t/op/length.t @@ -33,7 +33,7 @@ print "ok 3\n"; } { - my $a = qu"\x{80}"; # make "\x{80}" to produce UTF-8 + my $a = pack("U", 0x80); print "not " unless length($a) == 1; print "ok 6\n"; diff --git a/t/op/qu.t b/t/op/qu.t deleted file mode 100644 index 280020445c..0000000000 --- a/t/op/qu.t +++ /dev/null @@ -1,24 +0,0 @@ -print "1..6\n"; - -my $foo = "foo"; - -print "not " unless qu(abc$foo) eq "abcfoo"; -print "ok 1\n"; - -# qu is always Unicode, even in EBCDIC, so \x41 is 'A' and \x{61} is 'a'. - -print "not " unless qu(abc\x41) eq "abcA"; -print "ok 2\n"; - -print "not " unless qu(abc\x{61}$foo) eq "abcafoo"; -print "ok 3\n"; - -print "not " unless qu(\x{41}\x{100}\x61\x{200}) eq "A\x{100}a\x{200}"; -print "ok 4\n"; - -print "not " unless join(" ", unpack("C*", qu(\x80))) eq "194 128"; -print "ok 5\n"; - -print "not " unless join(" ", unpack("C*", qu(\x{100}))) eq "196 128"; -print "ok 6\n"; - @@ -4744,11 +4744,7 @@ Perl_yylex(pTHX) TOKEN('('); case KEY_qq: - case KEY_qu: s = scan_str(s,FALSE,FALSE); - if (tmp == KEY_qu && - is_utf8_string((U8*)SvPVX(PL_lex_stuff), SvCUR(PL_lex_stuff))) - SvUTF8_on(PL_lex_stuff); if (!s) missingterm((char*)0); yylval.ival = OP_STRINGIFY; @@ -5581,7 +5577,6 @@ Perl_keyword(pTHX_ register char *d, I32 len) if (strEQ(d,"q")) return KEY_q; if (strEQ(d,"qr")) return KEY_qr; if (strEQ(d,"qq")) return KEY_qq; - if (strEQ(d,"qu")) return KEY_qu; if (strEQ(d,"qw")) return KEY_qw; if (strEQ(d,"qx")) return KEY_qx; } |