diff options
Diffstat (limited to 'lib/Text/ParseWords.pm')
-rw-r--r-- | lib/Text/ParseWords.pm | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/lib/Text/ParseWords.pm b/lib/Text/ParseWords.pm new file mode 100644 index 0000000000..89278501d1 --- /dev/null +++ b/lib/Text/ParseWords.pm @@ -0,0 +1,170 @@ +package Text::ParseWords; + +require 5.000; +require Exporter; +require AutoLoader; +use Carp; + +@ISA = qw(Exporter AutoLoader); +@EXPORT = qw(shellwords quotewords); +@EXPORT_OK = qw(old_shellwords); + +# This code needs updating to use new Perl 5 features (regexp etc). + +# ParseWords.pm +# +# Usage: +# use ParseWords; +# @words = "ewords($delim, $keep, @lines); +# @words = &shellwords(@lines); +# @words = &old_shellwords(@lines); + +# Hal Pomeranz (pomeranz@netcom.com), 23 March 1994 +# Permission to use and distribute under the same terms as Perl. +# No warranty expressed or implied. + +# Basically an update and generalization of the old shellwords.pl. +# Much code shamelessly stolen from the old version (author unknown). +# +# "ewords() accepts a delimiter (which can be a regular expression) +# and a list of lines and then breaks those lines up into a list of +# words ignoring delimiters that appear inside quotes. +# +# The $keep argument is a boolean flag. If true, the quotes are kept +# with each word, otherwise quotes are stripped in the splitting process. +# $keep also defines whether unprotected backslashes are retained. +# + +1; +__END__ + + +sub shellwords { + + # A &shellwords() replacement is included to demonstrate the new package. + # This version differs from the original in that it will _NOT_ default + # to using $_ if no arguments are given. I personally find the old behavior + # to be a mis-feature. + + local(@lines) = @_; + $lines[$#lines] =~ s/\s+$//; + "ewords('\s+', 0, @lines); +} + + + +sub quotewords { + +# "ewords() works by simply jamming all of @lines into a single +# string in $_ and then pulling off words a bit at a time until $_ +# is exhausted. +# +# The inner "for" loop builds up each word (or $field) one $snippet +# at a time. A $snippet is a quoted string, a backslashed character, +# or an unquoted string. We fall out of the "for" loop when we reach +# the end of $_ or when we hit a delimiter. Falling out of the "for" +# loop, we push the $field we've been building up onto the list of +# @words we'll be returning, and then loop back and pull another word +# off of $_. +# +# The first two cases inside the "for" loop deal with quoted strings. +# The first case matches a double quoted string, removes it from $_, +# and assigns the double quoted string to $snippet in the body of the +# conditional. The second case handles single quoted strings. In +# the third case we've found a quote at the current beginning of $_, +# but it didn't match the quoted string regexps in the first two cases, +# so it must be an unbalanced quote and we croak with an error (which can +# be caught by eval()). +# +# The next case handles backslashed characters, and the next case is the +# exit case on reaching the end of the string or finding a delimiter. +# +# Otherwise, we've found an unquoted thing and we pull of characters one +# at a time until we reach something that could start another $snippet-- +# a quote of some sort, a backslash, or the delimiter. This one character +# at a time behavior was necessary if the delimiter was going to be a +# regexp (love to hear it if you can figure out a better way). + + local($delim, $keep, @lines) = @_; + local(@words,$snippet,$field,$_); + + $_ = join('', @lines); + while ($_) { + $field = ''; + for (;;) { + $snippet = ''; + if (s/^"(([^"\\]|\\[\\"])*)"//) { + $snippet = $1; + $snippet = "\"$snippet\"" if ($keep); + } + elsif (s/^'(([^'\\]|\\[\\'])*)'//) { + $snippet = $1; + $snippet = "'$snippet'" if ($keep); + } + elsif (/^["']/) { + croak "Unmatched quote"; + } + elsif (s/^\\(.)//) { + $snippet = $1; + $snippet = "\\$snippet" if ($keep); + } + elsif (!$_ || s/^$delim//) { + last; + } + else { + while ($_ && !(/^$delim/ || /^['"\\]/)) { + $snippet .= substr($_, 0, 1); + substr($_, 0, 1) = ''; + } + } + $field .= $snippet; + } + push(@words, $field); + } + @words; +} + + +sub old_shellwords { + + # Usage: + # use ParseWords; + # @words = old_shellwords($line); + # or + # @words = old_shellwords(@lines); + + local($_) = join('', @_); + my(@words,$snippet,$field); + + s/^\s+//; + while ($_ ne '') { + $field = ''; + for (;;) { + if (s/^"(([^"\\]|\\.)*)"//) { + ($snippet = $1) =~ s#\\(.)#$1#g; + } + elsif (/^"/) { + croak "Unmatched double quote: $_"; + } + elsif (s/^'(([^'\\]|\\.)*)'//) { + ($snippet = $1) =~ s#\\(.)#$1#g; + } + elsif (/^'/) { + croak "Unmatched single quote: $_"; + } + elsif (s/^\\(.)//) { + $snippet = $1; + } + elsif (s/^([^\s\\'"]+)//) { + $snippet = $1; + } + else { + s/^\s+//; + last; + } + $field .= $snippet; + } + push(@words, $field); + } + @words; +} |