diff options
-rw-r--r-- | Porting/Maintainers.pl | 8 | ||||
-rw-r--r-- | lib/Text/ParseWords.pm | 33 | ||||
-rwxr-xr-x | lib/Text/ParseWords.t | 254 | ||||
-rw-r--r-- | lib/Text/ParseWords/taint.t | 15 |
4 files changed, 164 insertions, 146 deletions
diff --git a/Porting/Maintainers.pl b/Porting/Maintainers.pl index e33acff91a..f32ebd99be 100644 --- a/Porting/Maintainers.pl +++ b/Porting/Maintainers.pl @@ -18,6 +18,7 @@ package Maintainers; 'arandal' => 'Allison Randal <allison@perl.org>', 'audreyt' => 'Audrey Tang <cpan@audreyt.org>', 'avar' => 'Ævar Arnfjörð Bjarmason <avar@cpan.org>', + 'chorny' => "Alexandr Ciornii <alexchorny\100gmail.com>", 'corion' => 'Max Maischein <corion@corion.net>', 'craig' => 'Craig Berry <craigberry@mac.com>', 'dankogai' => 'Dan Kogai <dankogai@cpan.org>', @@ -836,6 +837,13 @@ package Maintainers; 'CPAN' => 1, }, + 'Text::ParseWords' => + { + 'MAINTAINER' => 'chorny', + 'FILES' => q[lib/Text/ParseWords{.pm,.t,}], + 'CPAN' => 1, + }, + 'Text::Soundex' => { 'MAINTAINER' => 'markm', diff --git a/lib/Text/ParseWords.pm b/lib/Text/ParseWords.pm index 6235d3cb90..f1b5937904 100644 --- a/lib/Text/ParseWords.pm +++ b/lib/Text/ParseWords.pm @@ -1,14 +1,15 @@ package Text::ParseWords; -use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE); -$VERSION = "3.26"; +use strict; +require 5.006; +our $VERSION = "3.27"; -require 5.000; use Exporter; -@ISA = qw(Exporter); -@EXPORT = qw(shellwords quotewords nested_quotewords parse_line); -@EXPORT_OK = qw(old_shellwords); +our @ISA = qw(Exporter); +our @EXPORT = qw(shellwords quotewords nested_quotewords parse_line); +our @EXPORT_OK = qw(old_shellwords); +our $PERL_SINGLE_QUOTE; sub shellwords { @@ -181,11 +182,11 @@ Text::ParseWords - parse text into an array of tokens or array of arrays =head1 SYNOPSIS use Text::ParseWords; - @lists = &nested_quotewords($delim, $keep, @lines); - @words = "ewords($delim, $keep, @lines); - @words = &shellwords(@lines); - @words = &parse_line($delim, $keep, $line); - @words = &old_shellwords(@lines); # DEPRECATED! + @lists = nested_quotewords($delim, $keep, @lines); + @words = quotewords($delim, $keep, @lines); + @words = shellwords(@lines); + @words = parse_line($delim, $keep, $line); + @words = old_shellwords(@lines); # DEPRECATED! =head1 DESCRIPTION @@ -222,7 +223,7 @@ Unix shells. The sample program: use Text::ParseWords; - @words = "ewords('\s+', 0, q{this is "a test" of\ quotewords \"for you}); + @words = quotewords('\s+', 0, q{this is "a test" of\ quotewords \"for you}); $i = 0; foreach (@words) { print "$i: <$_>\n"; @@ -269,13 +270,15 @@ backslashed double-quote) =back -Replacing C<"ewords('\s+', 0, q{this is...})> -with C<&shellwords(q{this is...})> +Replacing C<quotewords('\s+', 0, q{this is...})> +with C<shellwords(q{this is...})> is a simpler way to accomplish the same thing. =head1 AUTHORS -Maintainer is Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original +Maintainer: Alexandr Ciornii <alexchornyATgmail.com>. + +Previous maintainer: Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original author unknown). Much of the code for &parse_line() (including the primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>. diff --git a/lib/Text/ParseWords.t b/lib/Text/ParseWords.t index 57bdbd0309..eeee6ee529 100755 --- a/lib/Text/ParseWords.t +++ b/lib/Text/ParseWords.t @@ -1,125 +1,129 @@ -#!./perl - -BEGIN { - chdir 't' if -d 't'; - @INC = '../lib'; -} - -use warnings; -use Text::ParseWords; -use Test::More tests => 27; - -@words = shellwords(qq(foo "bar quiz" zoo)); -is($words[0], 'foo'); -is($words[1], 'bar quiz'); -is($words[2], 'zoo'); - -{ - # Gonna get some undefined things back - no warnings 'uninitialized' ; - - # Test quotewords() with other parameters and null last field - @words = quotewords(':+', 1, 'foo:::"bar:foo":zoo zoo:'); - is(join(";", @words), qq(foo;"bar:foo";zoo zoo;)); -} - -# Test $keep eq 'delimiters' and last field zero -@words = quotewords('\s+', 'delimiters', '4 3 2 1 0'); -is(join(";", @words), qq(4; ;3; ;2; ;1; ;0)); - -# Big ol' nasty test (thanks, Joerk!) -$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd" eee\\\\\\"ffff" "gg"'; - -# First with $keep == 1 -$result = join('|', parse_line('\s+', 1, $string)); -is($result, 'aaaa"bbbbb"|cc\\ cc|\\\\\\"dddd" eee\\\\\\"ffff"|"gg"'); - -# Now, $keep == 0 -$result = join('|', parse_line('\s+', 0, $string)); -is($result, 'aaaabbbbb|cc cc|\\"dddd eee\\"ffff|gg'); - -# Now test single quote behavior -$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd\' eee\\\\\\"ffff\' gg'; -$result = join('|', parse_line('\s+', 0, $string)); -is($result, 'aaaabbbbb|cc cc|\\"dddd eee\\\\\\"ffff|gg'); - -# Make sure @nested_quotewords does the right thing -@lists = nested_quotewords('\s+', 0, 'a b c', '1 2 3', 'x y z'); -is (@lists, 3); -is (@{$lists[0]}, 3); -is (@{$lists[1]}, 3); -is (@{$lists[2]}, 3); - -# Now test error return -$string = 'foo bar baz"bach blech boop'; - -@words = shellwords($string); -is(@words, 0); - -@words = parse_line('s+', 0, $string); -is(@words, 0); - -@words = quotewords('s+', 0, $string); -is(@words, 0); - -{ - # Gonna get some more undefined things back - no warnings 'uninitialized' ; - - @words = nested_quotewords('s+', 0, $string); - is(@words, 0); - - # Now test empty fields - $result = join('|', parse_line(':', 0, 'foo::0:"":::')); - is($result, 'foo||0||||'); - - # Test for 0 in quotes without $keep - $result = join('|', parse_line(':', 0, ':"0":')); - is($result, '|0|'); - - # Test for \001 in quoted string - $result = join('|', parse_line(':', 0, ':"' . "\001" . '":')); - is($result, "|\1|"); - -} - -# Now test perlish single quote behavior -$Text::ParseWords::PERL_SINGLE_QUOTE = 1; -$string = 'aaaa"bbbbb" cc\ cc \\\\\"dddd\' eee\\\\\"\\\'ffff\' gg'; -$result = join('|', parse_line('\s+', 0, $string)); -is($result, 'aaaabbbbb|cc cc|\"dddd eee\\\\"\'ffff|gg'); - -# test whitespace in the delimiters -@words = quotewords(' ', 1, '4 3 2 1 0'); -is(join(";", @words), qq(4;3;2;1;0)); - -# [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text -$string = qq{"field1" "field2\\\nstill field2" "field3"}; - -$result = join('|', parse_line("\t", 1, $string)); -is($result, qq{"field1"|"field2\\\nstill field2"|"field3"}); - -$result = join('|', parse_line("\t", 0, $string)); -is($result, "field1|field2\nstill field2|field3"); - -# unicode -$string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"}; -$result = join('|', parse_line("\x{1234}", 0, $string)); -is($result, "field1|field2\x{1234}still field2|field3"); - -# missing quote after matching regex used to hang after change #22997 -"1234" =~ /(1)(2)(3)(4)/; -$string = qq{"missing quote}; -$result = join('|', shellwords($string)); -is($result, ""); - -# make sure shellwords strips out leading whitespace and trailng undefs -# from parse_line, so it's behavior is more like /bin/sh -$result = join('|', shellwords(" aa \\ \\ bb ", " \\ ", "cc dd ee\\ ")); -is($result, "aa| | bb| |cc|dd|ee "); - -$SIG{ALRM} = sub {die "Timeout!"}; -alarm(3); -@words = Text::ParseWords::old_shellwords("foo\\"); -is(@words, 1); -alarm(0); +#!./perl
+
+BEGIN {
+ if( $ENV{PERL_CORE} ) {
+ chdir 't' if -d 't';
+ @INC = '../lib';
+ }
+}
+
+use warnings;
+use Text::ParseWords;
+use Test::More tests => 27;
+
+@words = shellwords(qq(foo "bar quiz" zoo));
+is($words[0], 'foo');
+is($words[1], 'bar quiz');
+is($words[2], 'zoo');
+
+{
+ # Gonna get some undefined things back
+ no warnings 'uninitialized' ;
+
+ # Test quotewords() with other parameters and null last field
+ @words = quotewords(':+', 1, 'foo:::"bar:foo":zoo zoo:');
+ is(join(";", @words), qq(foo;"bar:foo";zoo zoo;));
+}
+
+# Test $keep eq 'delimiters' and last field zero
+@words = quotewords('\s+', 'delimiters', '4 3 2 1 0');
+is(join(";", @words), qq(4; ;3; ;2; ;1; ;0));
+
+# Big ol' nasty test (thanks, Joerk!)
+$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd" eee\\\\\\"ffff" "gg"';
+
+# First with $keep == 1
+$result = join('|', parse_line('\s+', 1, $string));
+is($result, 'aaaa"bbbbb"|cc\\ cc|\\\\\\"dddd" eee\\\\\\"ffff"|"gg"');
+
+# Now, $keep == 0
+$result = join('|', parse_line('\s+', 0, $string));
+is($result, 'aaaabbbbb|cc cc|\\"dddd eee\\"ffff|gg');
+
+# Now test single quote behavior
+$string = 'aaaa"bbbbb" cc\\ cc \\\\\\"dddd\' eee\\\\\\"ffff\' gg';
+$result = join('|', parse_line('\s+', 0, $string));
+is($result, 'aaaabbbbb|cc cc|\\"dddd eee\\\\\\"ffff|gg');
+
+# Make sure @nested_quotewords does the right thing
+@lists = nested_quotewords('\s+', 0, 'a b c', '1 2 3', 'x y z');
+is (@lists, 3);
+is (@{$lists[0]}, 3);
+is (@{$lists[1]}, 3);
+is (@{$lists[2]}, 3);
+
+# Now test error return
+$string = 'foo bar baz"bach blech boop';
+
+@words = shellwords($string);
+is(@words, 0);
+
+@words = parse_line('s+', 0, $string);
+is(@words, 0);
+
+@words = quotewords('s+', 0, $string);
+is(@words, 0);
+
+{
+ # Gonna get some more undefined things back
+ no warnings 'uninitialized' ;
+
+ @words = nested_quotewords('s+', 0, $string);
+ is(@words, 0);
+
+ # Now test empty fields
+ $result = join('|', parse_line(':', 0, 'foo::0:"":::'));
+ is($result, 'foo||0||||');
+
+ # Test for 0 in quotes without $keep
+ $result = join('|', parse_line(':', 0, ':"0":'));
+ is($result, '|0|');
+
+ # Test for \001 in quoted string
+ $result = join('|', parse_line(':', 0, ':"' . "\001" . '":'));
+ is($result, "|\1|");
+
+}
+
+# Now test perlish single quote behavior
+$Text::ParseWords::PERL_SINGLE_QUOTE = 1;
+$string = 'aaaa"bbbbb" cc\ cc \\\\\"dddd\' eee\\\\\"\\\'ffff\' gg';
+$result = join('|', parse_line('\s+', 0, $string));
+is($result, 'aaaabbbbb|cc cc|\"dddd eee\\\\"\'ffff|gg');
+
+# test whitespace in the delimiters
+@words = quotewords(' ', 1, '4 3 2 1 0');
+is(join(";", @words), qq(4;3;2;1;0));
+
+# [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text
+$string = qq{"field1" "field2\\\nstill field2" "field3"};
+
+$result = join('|', parse_line("\t", 1, $string));
+is($result, qq{"field1"|"field2\\\nstill field2"|"field3"});
+
+$result = join('|', parse_line("\t", 0, $string));
+is($result, "field1|field2\nstill field2|field3");
+
+SKIP: { # unicode
+ skip "No unicode",1 if $]<5.008;
+ $string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"};
+ $result = join('|', parse_line("\x{1234}", 0, $string));
+ is($result, "field1|field2\x{1234}still field2|field3",'Unicode');
+}
+
+# missing quote after matching regex used to hang after change #22997
+"1234" =~ /(1)(2)(3)(4)/;
+$string = qq{"missing quote};
+$result = join('|', shellwords($string));
+is($result, "");
+
+# make sure shellwords strips out leading whitespace and trailng undefs
+# from parse_line, so it's behavior is more like /bin/sh
+$result = join('|', shellwords(" aa \\ \\ bb ", " \\ ", "cc dd ee\\ "));
+is($result, "aa| | bb| |cc|dd|ee ");
+
+$SIG{ALRM} = sub {die "Timeout!"};
+alarm(3);
+@words = Text::ParseWords::old_shellwords("foo\\");
+is(@words, 1);
+alarm(0);
diff --git a/lib/Text/ParseWords/taint.t b/lib/Text/ParseWords/taint.t index 27f6de50cb..029f27dd68 100644 --- a/lib/Text/ParseWords/taint.t +++ b/lib/Text/ParseWords/taint.t @@ -2,12 +2,15 @@ # [perl #33173] shellwords.pl and tainting BEGIN { - chdir 't' if -d 't'; - @INC = '../lib'; - require Config; - if ($Config::Config{extensions} !~ /\bList\/Util\b/) { - print "1..0 # Skip: Scalar::Util was not built\n"; - exit 0; + if ( $ENV{PERL_CORE} ) { + chdir 't' if -d 't'; + @INC = '../lib'; + require Config; + no warnings 'once'; + if ($Config::Config{extensions} !~ /\bList\/Util\b/) { + print "1..0 # Skip: Scalar::Util was not built\n"; + exit 0; + } } } |