summaryrefslogtreecommitdiff
path: root/t/skipped-text.t
diff options
context:
space:
mode:
Diffstat (limited to 't/skipped-text.t')
-rw-r--r--t/skipped-text.t89
1 files changed, 89 insertions, 0 deletions
diff --git a/t/skipped-text.t b/t/skipped-text.t
new file mode 100644
index 0000000..bc39915
--- /dev/null
+++ b/t/skipped-text.t
@@ -0,0 +1,89 @@
+use Test::More tests => 4;
+
+use strict;
+use HTML::Parser;
+
+my $p = HTML::Parser->new(api_version => 3);
+
+$p->report_tags("a");
+
+my @doc;
+
+$p->handler(start => \&a_handler, "skipped_text, text");
+$p->handler(end_document => \@doc, '@{skipped_text}');
+
+$p->parse(<<EOT)->eof;
+<title>hi</title>
+<h1><a href="foo">link</a></h1>
+and <a foo="">some</a> text.
+EOT
+
+sub a_handler {
+ push(@doc, shift);
+ my $text = shift;
+ push(@doc, uc($text));
+}
+
+
+is(join("", @doc), <<'EOT');
+<title>hi</title>
+<h1><A HREF="FOO">link</a></h1>
+and <A FOO="">some</a> text.
+EOT
+
+#
+# Comment stripper. Interaction with "" handlers.
+#
+my $doc = <<EOT;
+<html>text</html>
+<!-- comment -->
+and some more <b>text</b>.
+EOT
+(my $expected = $doc) =~ s/<!--.*?-->//;
+
+$p = HTML::Parser->new(api_version => 3);
+$p->handler(comment => "");
+$p->handler(end_document => sub {
+ my $stripped = shift;
+ #diag $stripped;
+ is($stripped, $expected);
+ }, "skipped_text");
+for (split(//, $doc)) {
+ $p->parse($_);
+}
+$p->eof;
+
+#
+# Interaction with unbroken text
+#
+my @x;
+$p = HTML::Parser->new(api_version => 3, unbroken_text => 1);
+$p->handler(text => \@x, '@{"X", skipped_text, text}');
+$p->handler(end => "");
+$p->handler(end_document => \@x, '@{"Y", skipped_text}');
+
+$doc = "a a<a>b b</a>c c<x>d d</x>e";
+
+for (split(//, $doc)) {
+ $p->parse($_);
+}
+$p->eof;
+
+#diag join(":", @x);
+is(join(":", @x), "X::a a:X:<a>:b bc c:X:<x>:d de:Y:");
+
+#
+# The crash that Chip found
+#
+
+my $skipped;
+$p = HTML::Parser->new(
+ ignore_tags => ["foo"],
+ start_h => [sub {$skipped = shift}, "skipped_text"],
+);
+
+$p->parse("\x{100}<foo>");
+$p->parse("plain");
+$p->parse("<bar>");
+$p->eof;
+is($skipped, "\x{100}<foo>plain");