#!/usr/bin/perl -w # # Parse document and report first syntax (well-formedness) error found. # use strict; use XML::Parser; use Getopt::Std; my %opts; getopts('e', \%opts); my $ENTREFS = exists( $opts{'e'} ); # flag: check ent refs my $parser = XML::Parser->new( ErrorContext => 2, # output error context ); # get input from files if( @ARGV ) { foreach( @ARGV ) { my $file = $_; unless( -r $file ) { print STDERR "ERROR: Can't open '$file'.\n"; return; } my $input = ''; open( F, $file ); while( ) { $input .= $_; } close F; # parse and report errors if( &parse_string( $input )) { print STDERR "ERROR in $file:\n$@\n"; } else { print STDERR "'$file' is well-formed.\n"; } } print "All files checked.\n"; # get input from STDIN } else { my $input = ""; while( ) { $input .= $_; } if( &parse_string( $input )) { print STDERR "ERROR in stream:\n$@\n"; } else { print STDERR "No syntax errors found in XML stream.\n"; } } # parse the string and return error message # # NOTE: By default, entity refs are not expanded. XML::Parser can be # told not to expand entity refs, but will still try to find # replacement text just in case, which we don't want. Therefore, we # need to do a stupid regexp replacement, removing entities from input. # sub parse_string { my $string = shift; unless( $ENTREFS ) { $string =~ s/\&[^\s;]+;//g; # remove entity references } eval { $parser->parse( $string ); }; $@ =~ s/at \/.*?$//s; # remove module line number return $@; }