summaryrefslogtreecommitdiff
path: root/samples/xmlfilter
diff options
context:
space:
mode:
Diffstat (limited to 'samples/xmlfilter')
-rwxr-xr-xsamples/xmlfilter329
1 files changed, 329 insertions, 0 deletions
diff --git a/samples/xmlfilter b/samples/xmlfilter
new file mode 100755
index 0000000..3f6dea5
--- /dev/null
+++ b/samples/xmlfilter
@@ -0,0 +1,329 @@
+#!/usr/local/bin/perl -w
+#
+# $Revision: 1.1.1.1 $
+#
+# $Date: 2003/07/27 11:07:11 $
+
+use XML::Parser;
+
+my $Usage =<<'End_of_Usage;';
+Usage is:
+ xmlfilter [-h] [-nl] [{-+}root] [{-+}el=elname] [{-+}el:elnamepat]
+ [{-+}att:attname] [{-+}att:attname:attvalpat] xmlfile
+
+Prints on standard output the result of filtering the given xmlfile
+for elements according to the switches. A '-' option will drop the
+element from the output; a '+' will keep it. The output should also
+be a well-formed XML document.
+
+ -h Print this message
+
+ -nl Emit a newline prior to every start tag.
+
+ [-+]root Drop (or keep) the root element. Defaults to keep.
+ If the root element were named "foo", then -root
+ would be equivalent to -el=foo. Note that even if
+ you're dropping the root element, it's start and
+ end tag are kept in order that the output remains
+ a well-formed XML document.
+
+ [-+]el=elname
+ Drop (or keep) elements of type elname.
+
+ [-+]el:elnamepat
+ Drop (or keep) element whose type name matches elnamepat.
+
+ [-+]att:attname
+ Drop (or keep) elements which have an attribute = attname.
+
+ [-+]att:attname:attvalpat
+ Drop (or keep) elements which have an attribute = attname
+ and for which the attribute value matches attvalpat.
+End_of_Usage;
+
+my $pass = 1;
+my $do_newline = 0;
+
+my $attcheck = 0;
+
+my %drop_el;
+my @drop_elpat;
+
+my %keep_el;
+my @keep_elpat;
+
+my %drop_att;
+my %keep_att;
+
+my $always_true = sub {1;};
+my $root_element = '';
+
+my $in_cdata = 0;
+
+# Process options
+
+while (defined($ARGV[0]) and $ARGV[0] =~ /^[-+]/)
+{
+ my $opt = shift;
+
+ if ($opt eq '-root')
+ {
+ $pass = 0;
+ }
+ elsif ($opt eq '+root')
+ {
+ $pass = 1;
+ }
+ elsif ($opt eq '-h')
+ {
+ print $Usage;
+ exit;
+ }
+ elsif ($opt eq '-nl')
+ {
+ $do_newline = 1;
+ }
+ elsif ($opt =~ /^([-+])el([:=])(\S*)/)
+ {
+ my ($disp, $kind, $pattern) = ($1, $2, $3);
+ my ($hashref, $aref);
+
+ if ($disp eq '-')
+ {
+ $hashref = \%drop_el;
+ $aref = \@drop_elpat;
+ }
+ else
+ {
+ $hashref = \%keep_el;
+ $aref = \@keep_elpat;
+ }
+
+ if ($kind eq '=')
+ {
+ $hashref->{$pattern} = 1;
+ }
+ else
+ {
+ push(@$aref, $pattern);
+ }
+ }
+ elsif ($opt =~ /^([-+])att:(\w+)(?::(\S*))?/)
+ {
+ my ($disp, $id, $pattern) = ($1, $2, $3);
+ my $ref = ($disp eq '-') ? \%drop_att : \%keep_att;
+
+ if (defined($pattern))
+ {
+ $pattern =~ s!/!\\/!g;
+ my $sub;
+ eval "\$sub = sub {\$_[0] =~ /$pattern/;};";
+
+ $ref->{$id} = $sub;
+ }
+ else
+ {
+ $ref->{$id} = $always_true;
+ }
+
+ $attcheck = 1;
+ }
+ else
+ {
+ die "Unknown option: $opt\n$Usage";
+ }
+}
+
+my $drop_el_pattern = join('|', @drop_elpat);
+my $keep_el_pattern = join('|', @keep_elpat);
+
+my $drop_sub;
+if ($drop_el_pattern)
+{
+ eval "\$drop_sub = sub {\$_[0] =~ /$drop_el_pattern/;}";
+}
+else
+{
+ $drop_sub = sub {};
+}
+
+my $keep_sub;
+if ($keep_el_pattern)
+{
+ eval "\$keep_sub = sub {\$_[0] =~ /$keep_el_pattern/;}";
+}
+else
+{
+ $keep_sub = sub {};
+}
+
+my $doc = shift;
+
+die "No file specified\n$Usage" unless defined($doc);
+
+my @togglestack = ();
+
+my $p = new XML::Parser(ErrorContext => 2,
+ Handlers => {Start => \&start_handler,
+ End => \&end_handler
+ }
+ );
+
+if ($pass) {
+ $p->setHandlers(Char => \&char_handler,
+ CdataStart => \&cdata_start,
+ CdataEnd => \&cdata_end);
+}
+
+$p->parsefile($doc);
+
+print "</$root_element>\n"
+ unless $pass;
+
+################
+## End of main
+################
+
+sub start_handler
+{
+ my $xp = shift;
+ my $el = shift;
+
+ unless ($root_element)
+ {
+ $root_element = $el;
+ print "<$el>\n"
+ unless $pass;
+ }
+
+ my ($elref, $attref, $sub);
+
+ if ($pass)
+ {
+ $elref = \%drop_el;
+ $attref = \%drop_att;
+ $sub = $drop_sub;
+ }
+ else
+ {
+ $elref = \%keep_el;
+ $attref = \%keep_att;
+ $sub = $keep_sub;
+ }
+
+ if (defined($elref->{$el})
+ or &$sub($el)
+ or check_atts($attref, @_))
+ {
+ $pass = ! $pass;
+ if ($pass) {
+ $xp->setHandlers(Char => \&char_handler,
+ CdataStart => \&cdata_start,
+ CdataEnd => \&cdata_end);
+ }
+ else {
+ $xp->setHandlers(Char => 0,
+ CdataStart => 0,
+ CdataEnd => 0);
+ }
+ push(@togglestack, $xp->depth);
+ }
+
+ if ($pass)
+ {
+ print "\n" if $do_newline;
+ print "<$el";
+ while (@_)
+ {
+ my $id = shift;
+ my $val = shift;
+
+ $val = $xp->xml_escape($val, "'");
+ print " $id='$val'";
+ }
+ print ">";
+ }
+} # End start_handler
+
+sub end_handler
+{
+ my $xp = shift;
+ my $el = shift;
+
+ if ($pass)
+ {
+ print "</$el>";
+ }
+
+ if (@togglestack and $togglestack[-1] == $xp->depth)
+ {
+ $pass = ! $pass;
+ if ($pass) {
+ $xp->setHandlers(Char => \&char_handler,
+ CdataStart => \&cdata_start,
+ CdataEnd => \&cdata_end);
+ }
+ else {
+ $xp->setHandlers(Char => 0,
+ CdataStart => 0,
+ CdataEnd => 0);
+ }
+
+ pop(@togglestack);
+ }
+
+} # End end_handler
+
+
+sub char_handler
+{
+ my ($xp, $text) = @_;
+
+ if (length($text)) {
+
+ $text = $xp->xml_escape($text, '>')
+ unless $in_cdata;
+
+ print $text;
+ }
+} # End char_handler
+
+sub cdata_start {
+ my $xp = shift;
+
+ print '<![CDATA[';
+ $in_cdata = 1;
+}
+
+sub cdata_end {
+ my $xp = shift;
+
+ print ']]>';
+ $in_cdata = 0;
+}
+
+sub check_atts
+{
+ return $attcheck unless $attcheck;
+
+ my $ref = shift;
+
+ while (@_)
+ {
+ my $id = shift;
+ my $val = shift;
+
+ if (defined($ref->{$id}))
+ {
+ my $ret = &{$ref->{$id}}($val);
+ return $ret if $ret;
+ }
+ }
+
+ return 0;
+} # End check_atts
+
+# Tell Emacs that this is really a perl script
+# Local Variables:
+# mode:perl
+# End: