use Test::More tests => 17; use strict; use HTML::TokeParser; # First we create an HTML document to test my $file = "ttest$$.htm"; die "$file already exists" if -e $file; open(F, ">$file") or die "Can't create $file: $!"; print F <<'EOT'; close(F); This is the <title>

This is the title again

And this is a link to the Perl Institute
EOT END { unlink($file) || warn "Can't unlink $file: $!"; } my $p; $p = HTML::TokeParser->new($file) || die "Can't open $file: $!"; ok($p->unbroken_text); if ($p->get_tag("foo", "title")) { my $title = $p->get_trimmed_text; #diag "Title: $title"; is($title, "This is the "); } undef($p); # Test with reference to glob open(F, $file) || die "Can't open $file: $!"; $p = HTML::TokeParser->new(\*F); my $scount = 0; my $ecount = 0; my $tcount = 0; my $pcount = 0; while (my $token = $p->get_token) { $scount++ if $token->[0] eq "S"; $ecount++ if $token->[0] eq "E"; $pcount++ if $token->[0] eq "PI"; } undef($p); close F; # Test with glob open(F, $file) || die "Can't open $file: $!"; $p = HTML::TokeParser->new(*F); $tcount++ while $p->get_tag; undef($p); close F; # Test with plain file name $p = HTML::TokeParser->new($file) || die; $tcount++ while $p->get_tag; undef($p); #diag "Number of tokens found: $tcount/2 = $scount + $ecount"; is($tcount, 34); is($scount, 10); is($ecount, 7); is($pcount, 1); is($tcount/2, $scount + $ecount); ok(!HTML::TokeParser->new("/noT/thEre/$$")); $p = HTML::TokeParser->new($file) || die; $p->get_tag("a"); my $atext = $p->get_text; undef($p); is($atext, "Perl\240Institute"); # test parsing of embeded document $p = HTML::TokeParser->new(\<<HTML); <title>Title

Heading

HTML ok($p->get_tag("h1")); is($p->get_trimmed_text, "Heading"); undef($p); # test parsing of large embedded documents my $doc = "foo is bar\n\n\n" x 2022; #use Time::HiRes qw(time); my $start = time; $p = HTML::TokeParser->new(\$doc); #diag "Construction time: ", time - $start; my $count; while (my $t = $p->get_token) { $count++ if $t->[0] eq "S"; } #diag "Parse time: ", time - $start; is($count, 2022); $p = HTML::TokeParser->new(\<<'EOT');

This is a heading

This is some
text.
This is some more text.

This is even some more. EOT $p->get_tag("/h1"); my $t = $p->get_trimmed_text("br", "p"); is($t, "This is some text."); $p->get_tag; $t = $p->get_trimmed_text("br", "p"); is($t,"This is some more text."); undef($p); $p = HTML::TokeParser->new(\<<'EOT');

This is a bold heading

This is some italic text.
This is some more text.

This is even some more. EOT $p->get_tag("h1"); $t = $p->get_phrase; is($t, "This is a bold heading"); $t = $p->get_phrase; is($t, ""); $p->get_tag; $t = $p->get_phrase; is($t, "This is some italic text. This is some more text."); undef($p);