diff options
author | Andy Dougherty <doughera@lafcol.lafayette.edu> | 1996-02-28 16:49:33 -0800 |
---|---|---|
committer | Andy Dougherty <doughera@lafcol.lafayette.edu> | 1996-02-28 16:49:33 -0800 |
commit | a5f75d667838e8e7bb037880391f5c44476d33b4 (patch) | |
tree | 5005e888355c1508bc47da697efe119c1615b123 /lib/Text | |
parent | 2920c5d2b358b11ace52104b6944bfa0e89256a7 (diff) | |
download | perl-a5f75d667838e8e7bb037880391f5c44476d33b4.tar.gz |
perl 5.002perl-5.002
[editor's note: changes seem to be mostly module updates,
documentation changes and some perl API macro additions]
Diffstat (limited to 'lib/Text')
-rw-r--r-- | lib/Text/ParseWords.pm | 128 |
1 files changed, 65 insertions, 63 deletions
diff --git a/lib/Text/ParseWords.pm b/lib/Text/ParseWords.pm index 89278501d1..97d7beb896 100644 --- a/lib/Text/ParseWords.pm +++ b/lib/Text/ParseWords.pm @@ -9,43 +9,76 @@ use Carp; @EXPORT = qw(shellwords quotewords); @EXPORT_OK = qw(old_shellwords); -# This code needs updating to use new Perl 5 features (regexp etc). - -# ParseWords.pm -# -# Usage: -# use ParseWords; -# @words = "ewords($delim, $keep, @lines); -# @words = &shellwords(@lines); -# @words = &old_shellwords(@lines); - -# Hal Pomeranz (pomeranz@netcom.com), 23 March 1994 -# Permission to use and distribute under the same terms as Perl. -# No warranty expressed or implied. - -# Basically an update and generalization of the old shellwords.pl. -# Much code shamelessly stolen from the old version (author unknown). -# -# "ewords() accepts a delimiter (which can be a regular expression) -# and a list of lines and then breaks those lines up into a list of -# words ignoring delimiters that appear inside quotes. -# -# The $keep argument is a boolean flag. If true, the quotes are kept -# with each word, otherwise quotes are stripped in the splitting process. -# $keep also defines whether unprotected backslashes are retained. -# +=head1 NAME -1; -__END__ +Text::ParseWords - parse text into an array of tokens +=head1 SYNOPSIS -sub shellwords { + use Text::ParseWords; + @words = "ewords($delim, $keep, @lines); + @words = &shellwords(@lines); + @words = &old_shellwords(@lines); + +=head1 DESCRIPTION + +"ewords() accepts a delimiter (which can be a regular expression) +and a list of lines and then breaks those lines up into a list of +words ignoring delimiters that appear inside quotes. + +The $keep argument is a boolean flag. If true, the quotes are kept +with each word, otherwise quotes are stripped in the splitting process. +$keep also defines whether unprotected backslashes are retained. + +A &shellwords() replacement is included to demonstrate the new package. +This version differs from the original in that it will _NOT_ default +to using $_ if no arguments are given. I personally find the old behavior +to be a mis-feature. + + +"ewords() works by simply jamming all of @lines into a single +string in $_ and then pulling off words a bit at a time until $_ +is exhausted. + +The inner "for" loop builds up each word (or $field) one $snippet +at a time. A $snippet is a quoted string, a backslashed character, +or an unquoted string. We fall out of the "for" loop when we reach +the end of $_ or when we hit a delimiter. Falling out of the "for" +loop, we push the $field we've been building up onto the list of +@words we'll be returning, and then loop back and pull another word +off of $_. + +The first two cases inside the "for" loop deal with quoted strings. +The first case matches a double quoted string, removes it from $_, +and assigns the double quoted string to $snippet in the body of the +conditional. The second case handles single quoted strings. In +the third case we've found a quote at the current beginning of $_, +but it didn't match the quoted string regexps in the first two cases, +so it must be an unbalanced quote and we croak with an error (which can +be caught by eval()). - # A &shellwords() replacement is included to demonstrate the new package. - # This version differs from the original in that it will _NOT_ default - # to using $_ if no arguments are given. I personally find the old behavior - # to be a mis-feature. +The next case handles backslashed characters, and the next case is the +exit case on reaching the end of the string or finding a delimiter. +Otherwise, we've found an unquoted thing and we pull of characters one +at a time until we reach something that could start another $snippet-- +a quote of some sort, a backslash, or the delimiter. This one character +at a time behavior was necessary if the delimiter was going to be a +regexp (love to hear it if you can figure out a better way). + +=head1 AUTHORS + +Hal Pomeranz (pomeranz@netcom.com), 23 March 1994 + +Basically an update and generalization of the old shellwords.pl. +Much code shamelessly stolen from the old version (author unknown). + +=cut + +1; +__END__ + +sub shellwords { local(@lines) = @_; $lines[$#lines] =~ s/\s+$//; "ewords('\s+', 0, @lines); @@ -54,37 +87,6 @@ sub shellwords { sub quotewords { - -# "ewords() works by simply jamming all of @lines into a single -# string in $_ and then pulling off words a bit at a time until $_ -# is exhausted. -# -# The inner "for" loop builds up each word (or $field) one $snippet -# at a time. A $snippet is a quoted string, a backslashed character, -# or an unquoted string. We fall out of the "for" loop when we reach -# the end of $_ or when we hit a delimiter. Falling out of the "for" -# loop, we push the $field we've been building up onto the list of -# @words we'll be returning, and then loop back and pull another word -# off of $_. -# -# The first two cases inside the "for" loop deal with quoted strings. -# The first case matches a double quoted string, removes it from $_, -# and assigns the double quoted string to $snippet in the body of the -# conditional. The second case handles single quoted strings. In -# the third case we've found a quote at the current beginning of $_, -# but it didn't match the quoted string regexps in the first two cases, -# so it must be an unbalanced quote and we croak with an error (which can -# be caught by eval()). -# -# The next case handles backslashed characters, and the next case is the -# exit case on reaching the end of the string or finding a delimiter. -# -# Otherwise, we've found an unquoted thing and we pull of characters one -# at a time until we reach something that could start another $snippet-- -# a quote of some sort, a backslash, or the delimiter. This one character -# at a time behavior was necessary if the delimiter was going to be a -# regexp (love to hear it if you can figure out a better way). - local($delim, $keep, @lines) = @_; local(@words,$snippet,$field,$_); |