summaryrefslogtreecommitdiff
path: root/Docs/Support/xwf
blob: 38f89774fe851b2aa9efa243528700d3e0b21857 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/perl -w
#
# Parse document and report first syntax (well-formedness) error found.
#

use strict;
use XML::Parser;
use Getopt::Std;

my %opts;
getopts('e', \%opts);
my $ENTREFS = exists( $opts{'e'} );                 # flag: check ent refs

my $parser = XML::Parser->new( 
                               ErrorContext => 2,   # output error context
                               );

# get input from files
if( @ARGV ) {
    foreach( @ARGV ) {
        my $file = $_;
        unless( -r $file ) {
            print STDERR "ERROR: Can't open '$file'.\n";
            return;
        }
        my $input = '';
        open( F, $file );
        while( <F> ) { $input .= $_; }
        close F;

        # parse and report errors
        if( &parse_string( $input )) {
            print STDERR "ERROR in $file:\n$@\n";
        } else {
            print STDERR "'$file' is well-formed.\n";
        }
    }
    print "All files checked.\n";

# get input from STDIN
} else {
    my $input = "";
    while( <STDIN> ) { $input .= $_; }
    if( &parse_string( $input )) {
        print STDERR "ERROR in stream:\n$@\n";
    } else {
        print STDERR "No syntax errors found in XML stream.\n";
    }
}


# parse the string and return error message
#
# NOTE: By default, entity refs are not expanded.  XML::Parser can be
# told not to expand entity refs, but will still try to find
# replacement text just in case, which we don't want.  Therefore, we
# need to do a stupid regexp replacement, removing entities from input.
#
sub parse_string {
    my $string = shift;
    unless( $ENTREFS ) {
        $string =~ s/\&[^\s;]+;//g;         # remove entity references
    }
    eval { $parser->parse( $string ); };
    $@ =~ s/at \/.*?$//s;               # remove module line number
    return $@;
}