blob: 38f89774fe851b2aa9efa243528700d3e0b21857 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
#!/usr/bin/perl -w
#
# Parse document and report first syntax (well-formedness) error found.
#
use strict;
use XML::Parser;
use Getopt::Std;
my %opts;
getopts('e', \%opts);
my $ENTREFS = exists( $opts{'e'} ); # flag: check ent refs
my $parser = XML::Parser->new(
ErrorContext => 2, # output error context
);
# get input from files
if( @ARGV ) {
foreach( @ARGV ) {
my $file = $_;
unless( -r $file ) {
print STDERR "ERROR: Can't open '$file'.\n";
return;
}
my $input = '';
open( F, $file );
while( <F> ) { $input .= $_; }
close F;
# parse and report errors
if( &parse_string( $input )) {
print STDERR "ERROR in $file:\n$@\n";
} else {
print STDERR "'$file' is well-formed.\n";
}
}
print "All files checked.\n";
# get input from STDIN
} else {
my $input = "";
while( <STDIN> ) { $input .= $_; }
if( &parse_string( $input )) {
print STDERR "ERROR in stream:\n$@\n";
} else {
print STDERR "No syntax errors found in XML stream.\n";
}
}
# parse the string and return error message
#
# NOTE: By default, entity refs are not expanded. XML::Parser can be
# told not to expand entity refs, but will still try to find
# replacement text just in case, which we don't want. Therefore, we
# need to do a stupid regexp replacement, removing entities from input.
#
sub parse_string {
my $string = shift;
unless( $ENTREFS ) {
$string =~ s/\&[^\s;]+;//g; # remove entity references
}
eval { $parser->parse( $string ); };
$@ =~ s/at \/.*?$//s; # remove module line number
return $@;
}
|