summaryrefslogtreecommitdiff
path: root/eg/hform
blob: d2599ed03f9c21b04606f79c3b95de1e2df28dab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/perl -w

# See also HTML::Form module

use HTML::PullParser ();
use HTML::Entities qw(decode_entities);
use Data::Dump qw(dump);

my @FORM_TAGS = qw(form input textarea button select option);

my $p = HTML::PullParser->new(file => shift || "xxx.html",
			      start => 'tag, attr',
			      end   => 'tag',
			      text  => '@{text}',
			      report_tags => \@FORM_TAGS,
			     ) || die "$!";

# a little helper function
sub get_text {
    my($p, $stop) = @_;
    my $text;
    while (defined(my $t = $p->get_token)) {
	if (ref $t) {
	    $p->unget_token($t) unless $t->[0] eq $stop;
	    last;
	}
	else {
	    $text .= $t;
	}
    }
    return $text;
}

my @forms;
while (defined(my $t = $p->get_token)) {
    next unless ref $t; # skip text
    if ($t->[0] eq "form") {
	shift @$t;
	push(@forms, $t);
	while (defined(my $t = $p->get_token)) {
	    next unless ref $t;  # skip text
	    last if $t->[0] eq "/form";
	    if ($t->[0] eq "select") {
		my $sel = $t;
		push(@{$forms[-1]}, $t);
		while (defined(my $t = $p->get_token)) {
		    next unless ref $t; # skip text
		    last if $t->[0] eq "/select";
		    #print "select ", dump($t), "\n";
		    if ($t->[0] eq "option") {
			my $value = $t->[1]->{value};
			my $text = get_text($p, "/option");
			unless (defined $value) {
			    $value = decode_entities($text);
			}
			push(@$sel, $value);
		    }
		    else {
			warn "$t->[0] inside select";
		    }
		}
	    }
	    elsif ($t->[0] =~ /^\/?option$/) {
		warn "option tag outside select";
	    }
	    elsif ($t->[0] eq "textarea") {
		push(@{$forms[-1]}, $t);
		$t->[1]{value} = get_text($p, "/textarea");
	    }
	    elsif ($t->[0] =~ m,^/,) {
		warn "stray $t->[0] tag";
	    }
	    else {
		push(@{$forms[-1]}, $t);
	    }
	}
    }
    else {
	warn "form tag $t->[0] outside form";
    }
}

print dump(\@forms), "\n";