diff options
author | Nicholas Clark <nick@ccl4.org> | 2004-07-04 20:23:50 +0000 |
---|---|---|
committer | Nicholas Clark <nick@ccl4.org> | 2004-07-04 20:23:50 +0000 |
commit | dce071b572883cc225e8123856c336fbf5535f4e (patch) | |
tree | a41ff8779aefbdc239a875e3b3981e9052768ae9 | |
parent | 60e3e0e40e5ad38260e746cde45989d7ca4ba1a6 (diff) | |
download | perl-dce071b572883cc225e8123856c336fbf5535f4e.tar.gz |
Integrate:
[ 22992]
Fix for: [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text
Use the suggested regex fix, plus some tests.
[ 22997]
Cleanup the main regex in Text::ParseWords and make the
parse_line() routine faster. Add a Unicode test case.
p4raw-link: @22997 on //depot/perl: 429b060a3290b7ecf98534144fcaf0fb46b2afe3
p4raw-link: @22992 on //depot/perl: a8c6c617075a77facc3560cfdaa8948a894f9baf
p4raw-id: //depot/maint-5.8/perl@23034
p4raw-integrated: from //depot/perl@22997 'copy in'
lib/Text/ParseWords.pm (@22992..)
p4raw-integrated: from //depot/perl@22992 'ignore'
lib/Text/ParseWords.t (@10676..)
-rw-r--r-- | lib/Text/ParseWords.pm | 33 | ||||
-rwxr-xr-x | lib/Text/ParseWords.t | 19 |
2 files changed, 32 insertions, 20 deletions
diff --git a/lib/Text/ParseWords.pm b/lib/Text/ParseWords.pm index e758bc6ba2..cca28bff66 100644 --- a/lib/Text/ParseWords.pm +++ b/lib/Text/ParseWords.pm @@ -1,7 +1,7 @@ package Text::ParseWords; use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE); -$VERSION = "3.21"; +$VERSION = "3.23"; require 5.000; @@ -53,32 +53,27 @@ sub parse_line { use re 'taint'; # if it's tainted, leave it as such my($delimiter, $keep, $line) = @_; - my($quote, $quoted, $unquoted, $delim, $word, @pieces); + my($word, @pieces); while (length($line)) { - - ($quote, $quoted, undef, $unquoted, $delim, undef) = - $line =~ m/^(["']) # a $quote - ((?:\\.|(?!\1)[^\\])*) # and $quoted text - \1 # followed by the same quote - ([\000-\377]*) # and the rest - | # --OR-- - ^((?:\\.|[^\\"'])*?) # an $unquoted text - (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["'])) - # plus EOL, delimiter, or quote - ([\000-\377]*) # the rest - /x; # extended layout - return() unless( $quote || length($unquoted) || length($delim)); - - $line = $+; + $line =~ s/^(["']) # a $quote + ((?:\\.|(?!\1)[^\\])*) # and $quoted text + \1 # followed by the same quote + | # --OR-- + ^((?:\\.|[^\\"'])*?) # an $unquoted text + (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["'])) + # plus EOL, delimiter, or quote + //xs; # extended layout + my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4); + return() unless( defined($quote) || length($unquoted) || length($delim)); if ($keep) { $quoted = "$quote$quoted$quote"; } else { - $unquoted =~ s/\\(.)/$1/g; + $unquoted =~ s/\\(.)/$1/sg; if (defined $quote) { - $quoted =~ s/\\(.)/$1/g if ($quote eq '"'); + $quoted =~ s/\\(.)/$1/sg if ($quote eq '"'); $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'"); } } diff --git a/lib/Text/ParseWords.t b/lib/Text/ParseWords.t index 261d81f3a4..c776e66302 100755 --- a/lib/Text/ParseWords.t +++ b/lib/Text/ParseWords.t @@ -8,7 +8,7 @@ BEGIN { use warnings; use Text::ParseWords; -print "1..18\n"; +print "1..21\n"; @words = shellwords(qq(foo "bar quiz" zoo)); print "not " if $words[0] ne 'foo'; @@ -108,3 +108,20 @@ print "ok 17\n"; @words = quotewords(' ', 1, '4 3 2 1 0'); print "not " unless join(";", @words) eq qq(4;3;2;1;0); print "ok 18\n"; + +# [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text +$string = qq{"field1" "field2\\\nstill field2" "field3"}; + +$result = join('|', parse_line("\t", 1, $string)); +print "not " unless $result eq qq{"field1"|"field2\\\nstill field2"|"field3"}; +print "ok 19\n"; + +$result = join('|', parse_line("\t", 0, $string)); +print "not " unless $result eq "field1|field2\nstill field2|field3"; +print "ok 20\n"; + +# unicode +$string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"}; +$result = join('|', parse_line("\x{1234}", 0, $string)); +print "not " unless $result eq "field1|field2\x{1234}still field2|field3"; +print "ok 21\n"; |