summaryrefslogtreecommitdiff
path: root/t/xml-mode.t
blob: cdfc5b0ca0da70b69d5c9629beca564de2f3d407 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
use strict;
use Test::More tests => 8;

use HTML::Parser ();
my $p = HTML::Parser->new(xml_mode => 1,
			 );

my $text = "";
$p->handler(start =>
	    sub {
		 my($tag, $attr) = @_;
		 $text .= "S[$tag";
		 for my $k (sort keys %$attr) {
		     my $v =  $attr->{$k};
		     $text .= " $k=$v";
		 }
		 $text .= "]";
	     }, "tagname,attr");
$p->handler(end =>
	     sub {
		 $text .= "E[" . shift() . "]";
	     }, "tagname");
$p->handler(process => 
	     sub {
		 $text .= "PI[" . shift() . "]";
	     }, "token0");
$p->handler(text =>
	     sub {
		 $text .= shift;
	     }, "text");

my $xml = <<'EOT';
<?xml version="1.0"?>
<?IS10744:arch name="html"?><!-- comment -->
<DOC>
<title html="h1">My first architectual document</title>
<author html="address">Geir Ove Gronmo, grove@infotek.no</author>
<para>This is the first paragraph in this document</para>
<para html="p">This is the second paragraph</para>
<para/>
<xmp><foo></foo></xmp>
</DOC>
EOT

$p->parse($xml)->eof;

is($text, <<'EOT');
PI[xml version="1.0"]
PI[IS10744:arch name="html"]
S[DOC]
S[title html=h1]My first architectual documentE[title]
S[author html=address]Geir Ove Gronmo, grove@infotek.noE[author]
S[para]This is the first paragraph in this documentE[para]
S[para html=p]This is the second paragraphE[para]
S[para]E[para]
S[xmp]S[foo]E[foo]E[xmp]
E[DOC]
EOT

$text = "";
$p->xml_mode(0);
$p->parse($xml)->eof;

is($text, <<'EOT');
PI[xml version="1.0"?]
PI[IS10744:arch name="html"?]
S[doc]
S[title html=h1]My first architectual documentE[title]
S[author html=address]Geir Ove Gronmo, grove@infotek.noE[author]
S[para]This is the first paragraph in this documentE[para]
S[para html=p]This is the second paragraphE[para]
S[para/]
S[xmp]<foo></foo>E[xmp]
E[doc]
EOT

# Test that we get an empty tag back
$p = HTML::Parser->new(api_version => 3,
	               xml_mode => 1);

$p->handler("end" =>
	    sub {
		my($tagname, $text) = @_;
		is($tagname, "Xyzzy");
	        ok(!length($text));
	    }, "tagname,text");
$p->parse("<Xyzzy foo=bar/>and some more")->eof;

# Test that we get an empty tag back
$p = HTML::Parser->new(api_version => 3,
	               empty_element_tags => 1);

$p->handler("end" =>
	    sub {
		my($tagname, $text) = @_;
		is($tagname, "xyzzy");
	        ok(!length($text));
	    }, "tagname,text");
$p->parse("<Xyzzy foo=bar/>and some more")->eof;

$p = HTML::Parser->new(
    api_version => 3,
    xml_pic => 1,
);

$p->handler(
    "process" => sub {
	my($text, $t0) = @_;
	is($text, "<?foo > bar?>");
	is($t0, "foo > bar");
    }, "text, token0");
$p->parse("<?foo > bar?> and then")->eof;