diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2013-05-08 22:21:52 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2013-05-08 22:21:52 +0000 |
commit | 2f253cfc85ffd55a8acb988e91f0bc5ab348124c (patch) | |
tree | 4734ccd522c71dd455879162006742002f8c1565 /t/marked-sect.t | |
download | HTML-Parser-tarball-master.tar.gz |
HTML-Parser-3.71HEADHTML-Parser-3.71master
Diffstat (limited to 't/marked-sect.t')
-rw-r--r-- | t/marked-sect.t | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/t/marked-sect.t b/t/marked-sect.t new file mode 100644 index 0000000..6a63478 --- /dev/null +++ b/t/marked-sect.t @@ -0,0 +1,121 @@ +#!/usr/bin/perl -w + +use strict; +my $tag; +my $text; + +use HTML::Parser (); +my $p = HTML::Parser->new(start_h => [sub { $tag = shift }, "tagname"], + text_h => [sub { $text .= shift }, "dtext"], + ); + + +use Test::More tests => 14; + +SKIP: { +eval { + $p->marked_sections(1); +}; +skip $@, 14 if $@; + +$p->parse("<![[foo]]>"); +is($text, "foo"); + +$p->parse("<![TEMP INCLUDE[bar]]>"); +is($text, "foobar"); + +$p->parse("<![ INCLUDE -- IGNORE -- [foo<![IGNORE[bar]]>]]>\n<br>"); +is($text, "foobarfoo\n"); + +$text = ""; +$p->parse("<![ CDATA [<foo"); +$p->parse("<![IGNORE[bar]]>,bar>]]><br>"); +is($text, "<foo<![IGNORE[bar,bar>]]>"); + +$text = ""; +$p->parse("<![ RCDATA [å<a>]]><![CDATA[å<a>]]>å<a><br>"); +is($text, "å<a>å<a>å"); +is($tag, "br"); + +$text = ""; +$p->parse("<![INCLUDE RCDATA CDATA IGNORE [fooå<a>]]><br>"); +is($text, ""); + +$text = ""; +$p->parse("<![INCLUDE RCDATA CDATA [fooå<a>]]><br>"); +is($text, "fooå<a>"); + +$text = ""; +$p->parse("<![INCLUDE RCDATA [fooå<a>]]><br>"); +is($text, "fooå<a>"); + +$text = ""; +$p->parse("<![INCLUDE [fooå<a>]]><br>"); +is($text, "fooå"); + +$text = ""; +$p->parse("<![[fooå<a>]]><br>"); +is($text, "fooå"); + +# offsets/line/column numbers +$p = HTML::Parser->new(default_h => [\&x, "line,column,offset,event,text"], + marked_sections => 1, + ); +$p->parse(<<'EOT')->eof; +<title>Test</title> +<![CDATA + [fooå<a> +]]> +<![[ +INCLUDE +STUFF +]]> + <h1>Test</h1> +EOT + +my @x; +sub x { + my($line, $col, $offset, $event, $text) = @_; + $text =~ s/\n/\\n/g; + $text =~ s/ /./g; + push(@x, "$line.$col:$offset $event \"$text\"\n"); +} + +#diag @x; +is(join("", @x), <<'EOT'); +1.0:0 start_document "" +1.0:0 start "<title>" +1.7:7 text "Test" +1.11:11 end "</title>" +1.19:19 text "\n" +3.3:32 text "fooå<a>\n" +4.3:49 text "\n" +5.4:54 text "\nINCLUDE\nSTUFF\n" +8.3:72 text "\n.." +9.2:75 start "<h1>" +9.6:79 text "Test" +9.10:83 end "</h1>" +9.15:88 text "\n" +10.0:89 end_document "" +EOT + +my $doc = "<Tag><![CDATA[This is cdata]]></Tag>"; +my $result = ""; +$p = HTML::Parser->new( + marked_sections => 1, + handlers => { + default => [ sub { $result .= join("",@_); }, "skipped_text,text" ] + } +)->parse($doc)->eof; +is($doc, $result); + +$text = ""; +$p = HTML::Parser->new( + text_h => [sub { $text .= shift }, "dtext"], + marked_sections => 1, +); + +$p->parse("<![CDATA[foo [1]]]>"); +is($text, "foo [1]", "CDATA text ending in square bracket"); + +} # SKIP |