summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Clark <nick@ccl4.org>2004-07-04 20:23:50 +0000
committerNicholas Clark <nick@ccl4.org>2004-07-04 20:23:50 +0000
commitdce071b572883cc225e8123856c336fbf5535f4e (patch)
treea41ff8779aefbdc239a875e3b3981e9052768ae9
parent60e3e0e40e5ad38260e746cde45989d7ca4ba1a6 (diff)
downloadperl-dce071b572883cc225e8123856c336fbf5535f4e.tar.gz
Integrate:
[ 22992] Fix for: [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text Use the suggested regex fix, plus some tests. [ 22997] Cleanup the main regex in Text::ParseWords and make the parse_line() routine faster. Add a Unicode test case. p4raw-link: @22997 on //depot/perl: 429b060a3290b7ecf98534144fcaf0fb46b2afe3 p4raw-link: @22992 on //depot/perl: a8c6c617075a77facc3560cfdaa8948a894f9baf p4raw-id: //depot/maint-5.8/perl@23034 p4raw-integrated: from //depot/perl@22997 'copy in' lib/Text/ParseWords.pm (@22992..) p4raw-integrated: from //depot/perl@22992 'ignore' lib/Text/ParseWords.t (@10676..)
-rw-r--r--lib/Text/ParseWords.pm33
-rwxr-xr-xlib/Text/ParseWords.t19
2 files changed, 32 insertions, 20 deletions
diff --git a/lib/Text/ParseWords.pm b/lib/Text/ParseWords.pm
index e758bc6ba2..cca28bff66 100644
--- a/lib/Text/ParseWords.pm
+++ b/lib/Text/ParseWords.pm
@@ -1,7 +1,7 @@
package Text::ParseWords;
use vars qw($VERSION @ISA @EXPORT $PERL_SINGLE_QUOTE);
-$VERSION = "3.21";
+$VERSION = "3.23";
require 5.000;
@@ -53,32 +53,27 @@ sub parse_line {
use re 'taint'; # if it's tainted, leave it as such
my($delimiter, $keep, $line) = @_;
- my($quote, $quoted, $unquoted, $delim, $word, @pieces);
+ my($word, @pieces);
while (length($line)) {
-
- ($quote, $quoted, undef, $unquoted, $delim, undef) =
- $line =~ m/^(["']) # a $quote
- ((?:\\.|(?!\1)[^\\])*) # and $quoted text
- \1 # followed by the same quote
- ([\000-\377]*) # and the rest
- | # --OR--
- ^((?:\\.|[^\\"'])*?) # an $unquoted text
- (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))
- # plus EOL, delimiter, or quote
- ([\000-\377]*) # the rest
- /x; # extended layout
- return() unless( $quote || length($unquoted) || length($delim));
-
- $line = $+;
+ $line =~ s/^(["']) # a $quote
+ ((?:\\.|(?!\1)[^\\])*) # and $quoted text
+ \1 # followed by the same quote
+ | # --OR--
+ ^((?:\\.|[^\\"'])*?) # an $unquoted text
+ (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))
+ # plus EOL, delimiter, or quote
+ //xs; # extended layout
+ my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4);
+ return() unless( defined($quote) || length($unquoted) || length($delim));
if ($keep) {
$quoted = "$quote$quoted$quote";
}
else {
- $unquoted =~ s/\\(.)/$1/g;
+ $unquoted =~ s/\\(.)/$1/sg;
if (defined $quote) {
- $quoted =~ s/\\(.)/$1/g if ($quote eq '"');
+ $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
}
}
diff --git a/lib/Text/ParseWords.t b/lib/Text/ParseWords.t
index 261d81f3a4..c776e66302 100755
--- a/lib/Text/ParseWords.t
+++ b/lib/Text/ParseWords.t
@@ -8,7 +8,7 @@ BEGIN {
use warnings;
use Text::ParseWords;
-print "1..18\n";
+print "1..21\n";
@words = shellwords(qq(foo "bar quiz" zoo));
print "not " if $words[0] ne 'foo';
@@ -108,3 +108,20 @@ print "ok 17\n";
@words = quotewords(' ', 1, '4 3 2 1 0');
print "not " unless join(";", @words) eq qq(4;3;2;1;0);
print "ok 18\n";
+
+# [perl #30442] Text::ParseWords does not handle backslashed newline inside quoted text
+$string = qq{"field1" "field2\\\nstill field2" "field3"};
+
+$result = join('|', parse_line("\t", 1, $string));
+print "not " unless $result eq qq{"field1"|"field2\\\nstill field2"|"field3"};
+print "ok 19\n";
+
+$result = join('|', parse_line("\t", 0, $string));
+print "not " unless $result eq "field1|field2\nstill field2|field3";
+print "ok 20\n";
+
+# unicode
+$string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"};
+$result = join('|', parse_line("\x{1234}", 0, $string));
+print "not " unless $result eq "field1|field2\x{1234}still field2|field3";
+print "ok 21\n";