summaryrefslogtreecommitdiff
path: root/external/jaxp/source/gnu/xml/pipeline/XIncludeFilter.java
blob: 20a24f9bef055e12620d353d058684d8af9bc6ba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
/*
 * $Id: XIncludeFilter.java,v 1.1.1.1 2003-02-01 02:10:22 cbj Exp $
 * Copyright (C) 2001-2002 David Brownell
 * 
 * This file is part of GNU JAXP, a library.
 *
 * GNU JAXP is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * GNU JAXP is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * As a special exception, if you link this library with other files to
 * produce an executable, this library does not by itself cause the
 * resulting executable to be covered by the GNU General Public License.
 * This exception does not however invalidate any other reasons why the
 * executable file might be covered by the GNU General Public License. 
 */

package gnu.xml.pipeline;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL; 
import java.net.URLConnection; 
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Stack;
import java.util.Vector;

import org.xml.sax.Attributes;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

import gnu.xml.util.Resolver;



// $Id: XIncludeFilter.java,v 1.1.1.1 2003-02-01 02:10:22 cbj Exp $

/**
 * Filter to process an XPointer-free subset of
 * <a href="http://www.w3.org/TR/xinclude">XInclude</a>, supporting its
 * use as a kind of replacement for parsed general entities.
 * XInclude works much like the <code>#include</code> of C/C++ but
 * works for XML documents as well as unparsed text files.
 * Restrictions from the 17-Sept-2002 CR draft of XInclude are as follows:
 *
 * <ul>
 *
 * <li> URIs must not include fragment identifiers.
 * The CR specifies support for XPointer <em>element()</em> fragment IDs,
 * which is not currently implemented here.
 *
 * <li> <em>xi:fallback</em> handling of resource errors is not
 * currently supported.
 *
 * <li> DTDs are not supported in included files, since the SAX DTD events
 * must have completely preceded any included file. 
 * The CR explicitly allows the DTD related portions of the infoset to
 * grow as an effect of including XML documents.
 *
 * <li> <em>xml:base</em> fixup isn't done.
 *
 * </ul>
 *
 * <p> XML documents that are included will normally be processed using
 * the default SAX namespace rules, meaning that prefix information may
 * be discarded.  This may be changed with {@link #setSavingPrefixes
 * setSavingPrefixes()}.  <em>You are strongly advised to do this.</em>
 *
 * <p> Note that XInclude allows highly incompatible implementations, which
 * are specialized to handle application-specific infoset extensions.  Some
 * such implementations can be implemented by subclassing this one, but
 * they may only be substituted in applications at "user option".
 *
 * <p>TBD: "IURI" handling.
 *
 * @author David Brownell
 * @version $Date: 2003-02-01 02:10:22 $
 */
public class XIncludeFilter extends EventFilter implements Locator
{
    private Hashtable		extEntities = new Hashtable (5, 5);
    private int			ignoreCount;
    private Stack		uris = new Stack ();
    private Locator		locator;
    private Vector		inclusions = new Vector (5, 5);
    private boolean		savingPrefixes;

    /**
     */
    public XIncludeFilter (EventConsumer next)
    throws SAXException
    {
	super (next);
	setContentHandler (this);
	// DTDHandler callbacks pass straight through
	setProperty (DECL_HANDLER, this);
	setProperty (LEXICAL_HANDLER, this);
    }

    private void fatal (SAXParseException e) throws SAXException
    {
	ErrorHandler		eh;
	
	eh = getErrorHandler ();
	if (eh != null)
	    eh.fatalError (e);
	throw e;
    }

    /**
     * Passes "this" down the filter chain as a proxy locator.
     */
    public void setDocumentLocator (Locator locator)
    {
	this.locator = locator;
	super.setDocumentLocator (this);
    }

    /** Used for proxy locator; do not call directly. */
    public String getSystemId ()
	{ return (locator == null) ? null : locator.getSystemId (); }
    /** Used for proxy locator; do not call directly. */
    public String getPublicId ()
	{ return (locator == null) ? null : locator.getPublicId (); }
    /** Used for proxy locator; do not call directly. */
    public int getLineNumber ()
	{ return (locator == null) ? -1 : locator.getLineNumber (); }
    /** Used for proxy locator; do not call directly. */
    public int getColumnNumber ()
	{ return (locator == null) ? -1 : locator.getColumnNumber (); }

    /**
     * Assigns the flag controlling the setting of the SAX2
     * <em>namespace-prefixes</em> flag.
     */
    public void setSavingPrefixes (boolean flag)
	{ savingPrefixes = flag; }

    /**
     * Returns the flag controlling the setting of the SAX2
     * <em>namespace-prefixes</em> flag when parsing included documents.
     * The default value is the SAX2 default (false), which discards
     * information that can be useful.
     */
    public boolean isSavingPrefixes ()
	{ return savingPrefixes; }

    //
    // Two mechanisms are interacting here.
    // 
    //	- XML Base implies a stack of base URIs, updated both by
    //	  "real entity" boundaries and element boundaries.
    //
    //	- Active "Real Entities" (for document and general entities,
    //	  and by xincluded files) are tracked to prevent circular
    //	  inclusions.
    //
    private String addMarker (String uri)
    throws SAXException
    {
	if (locator != null && locator.getSystemId () != null)
	    uri = locator.getSystemId ();

	// guard against InputSource objects without system IDs
	if (uri == null)
	    fatal (new SAXParseException ("Entity URI is unknown", locator));

	try {
	    URL	url = new URL (uri);

	    uri = url.toString ();
	    if (inclusions.contains (uri))
		fatal (new SAXParseException (
			"XInclude, circular inclusion", locator));
	    inclusions.addElement (uri);
	    uris.push (url);
	} catch (IOException e) {
	    // guard against illegal relative URIs (Xerces)
	    fatal (new SAXParseException ("parser bug: relative URI",
		locator, e));
	}
	return uri;
    }

    private void pop (String uri)
    {
	inclusions.removeElement (uri);
	uris.pop ();
    }

    //
    // Document entity boundaries get both treatments.
    //
    public void startDocument () throws SAXException
    {
	ignoreCount = 0;
	addMarker (null);
	super.startDocument ();
    }

    public void endDocument () throws SAXException
    {
	inclusions.setSize (0);
	extEntities.clear ();
	uris.setSize (0);
	super.endDocument ();
    }

    //
    // External general entity boundaries get both treatments.
    //
    public void externalEntityDecl (String name,
    	String publicId, String systemId)
    throws SAXException
    {
	if (name.charAt (0) == '%')
	    return;
	try {
	    URL	url = new URL (locator.getSystemId ());
	    systemId = new URL (url, systemId).toString ();
	} catch (IOException e) {
	    // what could we do?
	}
	extEntities.put (name, systemId);
    }

    public void startEntity (String name)
    throws SAXException
    {
	if (ignoreCount != 0) {
	    ignoreCount++;
	    return;
	}

	String	uri = (String) extEntities.get (name);
	if (uri != null)
	    addMarker (uri);
	super.startEntity (name);
    }

    public void endEntity (String name)
    throws SAXException
    {
	if (ignoreCount != 0) {
	    if (--ignoreCount != 0)
		return;
	}

	String	uri = (String) extEntities.get (name);

	if (uri != null)
	    pop (uri);
	super.endEntity (name);
    }
    
    //
    // element boundaries only affect the base URI stack,
    // unless they're XInclude elements.
    //
    public void
    startElement (String uri, String localName, String qName, Attributes atts)
    throws SAXException
    {
	if (ignoreCount != 0) {
	    ignoreCount++;
	    return;
	}

	URL	baseURI = (URL) uris.peek ();
	String	base;

	base = atts.getValue ("http://www.w3.org/XML/1998/namespace", "base");
	if (base == null)
	    uris.push (baseURI);
	else {
	    URL		url;

	    if (base.indexOf ('#') != -1)
		fatal (new SAXParseException (
		    "xml:base with fragment: " + base,
		    locator));

	    try {
		baseURI = new URL (baseURI, base);
		uris.push (baseURI);
	    } catch (Exception e) {
		fatal (new SAXParseException (
		    "xml:base with illegal uri: " + base,
		    locator, e));
	    }
	}

	if (!"http://www.w3.org/2001/XInclude".equals (uri)) {
	    super.startElement (uri, localName, qName, atts);
	    return;
	}

	if ("include".equals (localName)) {
	    String	href = atts.getValue ("href");
	    String	parse = atts.getValue ("parse");
	    String	encoding = atts.getValue ("encoding");
	    URL		url = (URL) uris.peek ();
	    SAXParseException	x = null;

	    if (href == null)
		fatal (new SAXParseException (
		    "XInclude missing href",
		    locator));
	    if (href.indexOf ('#') != -1)
		fatal (new SAXParseException (
		    "XInclude with fragment: " + href,
		    locator));

	    if (parse == null || "xml".equals (parse))
		x = xinclude (url, href);
	    else if ("text".equals (parse))
		x = readText (url, href, encoding);
	    else
		fatal (new SAXParseException (
		    "unknown XInclude parsing mode: " + parse,
		    locator));
	    if (x == null) {
		// strip out all child content
		ignoreCount++;
		return;
	    }

	    // FIXME the 17-Sept-2002 CR of XInclude says we "must"
	    // use xi:fallback elements to handle resource errors,
	    // if they exist.
	    fatal (x);

	} else if ("fallback".equals (localName)) {
	    fatal (new SAXParseException (
		"illegal top level XInclude 'fallback' element",
		locator));
	} else {
	    ErrorHandler	eh = getErrorHandler ();

	    // CR doesn't say this is an error
	    if (eh != null)
		eh.warning (new SAXParseException (
		    "unrecognized toplevel XInclude element: " + localName,
		    locator));
	    super.startElement (uri, localName, qName, atts);
	}
    }

    public void endElement (String uri, String localName, String qName)
    throws SAXException
    {
	if (ignoreCount != 0) {
	    if (--ignoreCount != 0)
		return;
	}

	uris.pop ();
	if (!("http://www.w3.org/2001/XInclude".equals (uri)
		&& "include".equals (localName)))
	    super.endElement (uri, localName, qName);
    }

    //
    // ignore all content within non-empty xi:include elements
    //
    public void characters (char ch [], int start, int length)
    throws SAXException
    {
	if (ignoreCount == 0)
	    super.characters (ch, start, length);
    }

    public void processingInstruction (String target, String value)
    throws SAXException
    {
	if (ignoreCount == 0)
	    super.processingInstruction (target, value);
    }

    public void ignorableWhitespace (char ch [], int start, int length)
    throws SAXException
    {
	if (ignoreCount == 0)
	    super.ignorableWhitespace (ch, start, length);
    }

    public void comment (char ch [], int start, int length)
    throws SAXException
    {
	if (ignoreCount == 0)
	    super.comment (ch, start, length);
    }

    public void startCDATA () throws SAXException
    {
	if (ignoreCount == 0)
	    super.startCDATA ();
    }

    public void endCDATA () throws SAXException
    {
	if (ignoreCount == 0)
	    super.endCDATA ();
    }

    public void startPrefixMapping (String prefix, String uri)
    throws SAXException
    {
	if (ignoreCount == 0)
	    super.startPrefixMapping (prefix, uri);
    }

    public void endPrefixMapping (String prefix) throws SAXException
    {
	if (ignoreCount == 0)
	    super.endPrefixMapping (prefix);
    }

    public void skippedEntity (String name) throws SAXException
    {
	if (ignoreCount == 0)
	    super.skippedEntity (name);
    }

    // JDK 1.1 seems to need it to be done this way, sigh
    void setLocator (Locator l) { locator = l; }
    Locator getLocator () { return locator; }
    

    //
    // for XIncluded entities, manage the current locator and
    // filter out events that would be incorrect to report
    //
    private class Scrubber extends EventFilter
    {
	Scrubber (EventFilter f)
	throws SAXException
	{
	    // delegation passes to next in chain
	    super (f);

	    // process all content events
	    setContentHandler (this);
	    setProperty (LEXICAL_HANDLER, this);

	    // drop all DTD events
	    setDTDHandler (null);
	    setProperty (DECL_HANDLER, null);
	}

	// maintain proxy locator
	// only one startDocument()/endDocument() pair per event stream
	public void setDocumentLocator (Locator l)
	    { setLocator (l); }
	public void startDocument ()
	    { }
	public void endDocument ()
	    { }
	
	private void reject (String message) throws SAXException
	    { fatal (new SAXParseException (message, getLocator ())); }
	
	// only the DTD from the "base document" gets reported
	public void startDTD (String root, String publicId, String systemId)
	throws SAXException
	    { reject ("XIncluded DTD: " + systemId); }
	public void endDTD ()
	throws SAXException
	    { reject ("XIncluded DTD"); }
	// ... so this should never happen
	public void skippedEntity (String name) throws SAXException
	    { reject ("XInclude skipped entity: " + name); }

	// since we rejected DTDs, only builtin entities can be reported
    }

    // <xi:include parse='xml' ...>
    // relative to the base URI passed
    private SAXParseException xinclude (URL url, String href)
    throws SAXException
    {
	XMLReader	helper;
	Scrubber	scrubber;
	Locator		savedLocator = locator;

	// start with a parser acting just like our input
	// modulo DTD-ish stuff (validation flag, entity resolver)
	helper = XMLReaderFactory.createXMLReader ();
	helper.setErrorHandler (getErrorHandler ());
	helper.setFeature (FEATURE_URI + "namespace-prefixes", true);

	// Set up the proxy locator and event filter.
	scrubber = new Scrubber (this);
	locator = null;
	bind (helper, scrubber);

	// Merge the included document, except its DTD
	try {
	    url = new URL (url, href);
	    href = url.toString ();

	    if (inclusions.contains (href))
		fatal (new SAXParseException (
			"XInclude, circular inclusion", locator));

	    inclusions.addElement (href);
	    uris.push (url);
	    helper.parse (new InputSource (href));
	    return null;
	} catch (java.io.IOException e) {
	    return new SAXParseException (href, locator, e);
	} finally {
	    pop (href);
	    locator = savedLocator;
	}
    }

    // <xi:include parse='text' ...>
    // relative to the base URI passed
    private SAXParseException readText (URL url, String href, String encoding)
    throws SAXException
    {
	InputStream	in = null;

	try {
	    URLConnection	conn;
	    InputStreamReader	reader;
	    char		buf [] = new char [4096];
	    int			count;

	    url = new URL (url, href);
	    conn = url.openConnection ();
	    in = conn.getInputStream ();
	    if (encoding == null)
		encoding = Resolver.getEncoding (conn.getContentType ());
	    if (encoding == null) {
		ErrorHandler	eh = getErrorHandler ();
		if (eh != null)
		    eh.warning (new SAXParseException (
			"guessing text encoding for URL: " + url,
			locator));
		reader = new InputStreamReader (in);
	    } else
		reader = new InputStreamReader (in, encoding);

	    while ((count = reader.read (buf, 0, buf.length)) != -1)
		super.characters (buf, 0, count);
	    in.close ();
	    return null;
	} catch (IOException e) {
	    return new SAXParseException (
		"can't XInclude text",
		locator, e);
	}
    }
}