diff options
Diffstat (limited to 'eg/htext')
-rwxr-xr-x | eg/htext | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/eg/htext b/eg/htext new file mode 100755 index 0000000..e4d276d --- /dev/null +++ b/eg/htext @@ -0,0 +1,29 @@ +#!/usr/bin/perl -w + +# Extract all plain text from an HTML file + +use strict; +use HTML::Parser 3.00 (); + +my %inside; + +sub tag +{ + my($tag, $num) = @_; + $inside{$tag} += $num; + print " "; # not for all tags +} + +sub text +{ + return if $inside{script} || $inside{style}; + print $_[0]; +} + +HTML::Parser->new(api_version => 3, + handlers => [start => [\&tag, "tagname, '+1'"], + end => [\&tag, "tagname, '-1'"], + text => [\&text, "dtext"], + ], + marked_sections => 1, + )->parse_file(shift) || die "Can't open file: $!\n";; |