diff options
author | Brian Jones <cbj@gnu.org> | 2003-02-01 02:10:07 +0000 |
---|---|---|
committer | Brian Jones <cbj@gnu.org> | 2003-02-01 02:10:07 +0000 |
commit | a286d8e992bbc0a6180638df2bfb55e433642b78 (patch) | |
tree | 6c7ff3ab361b5543e219d49f537b2ecb38de6eb7 /external/jaxp/source/gnu/xml/pipeline/XIncludeFilter.java | |
parent | a1314139baa4e9cd89051573d3bb5a5b556f3b32 (diff) | |
download | classpath-a286d8e992bbc0a6180638df2bfb55e433642b78.tar.gz |
Initial revision
Diffstat (limited to 'external/jaxp/source/gnu/xml/pipeline/XIncludeFilter.java')
-rw-r--r-- | external/jaxp/source/gnu/xml/pipeline/XIncludeFilter.java | 573 |
1 files changed, 573 insertions, 0 deletions
diff --git a/external/jaxp/source/gnu/xml/pipeline/XIncludeFilter.java b/external/jaxp/source/gnu/xml/pipeline/XIncludeFilter.java new file mode 100644 index 000000000..e629542de --- /dev/null +++ b/external/jaxp/source/gnu/xml/pipeline/XIncludeFilter.java @@ -0,0 +1,573 @@ +/* + * $Id: XIncludeFilter.java,v 1.1 2003-02-01 02:10:22 cbj Exp $ + * Copyright (C) 2001-2002 David Brownell + * + * This file is part of GNU JAXP, a library. + * + * GNU JAXP is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GNU JAXP is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * As a special exception, if you link this library with other files to + * produce an executable, this library does not by itself cause the + * resulting executable to be covered by the GNU General Public License. + * This exception does not however invalidate any other reasons why the + * executable file might be covered by the GNU General Public License. + */ + +package gnu.xml.pipeline; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.URL; +import java.net.URLConnection; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Stack; +import java.util.Vector; + +import org.xml.sax.Attributes; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.XMLReaderFactory; + +import gnu.xml.util.Resolver; + + + +// $Id: XIncludeFilter.java,v 1.1 2003-02-01 02:10:22 cbj Exp $ + +/** + * Filter to process an XPointer-free subset of + * <a href="http://www.w3.org/TR/xinclude">XInclude</a>, supporting its + * use as a kind of replacement for parsed general entities. + * XInclude works much like the <code>#include</code> of C/C++ but + * works for XML documents as well as unparsed text files. + * Restrictions from the 17-Sept-2002 CR draft of XInclude are as follows: + * + * <ul> + * + * <li> URIs must not include fragment identifiers. + * The CR specifies support for XPointer <em>element()</em> fragment IDs, + * which is not currently implemented here. + * + * <li> <em>xi:fallback</em> handling of resource errors is not + * currently supported. + * + * <li> DTDs are not supported in included files, since the SAX DTD events + * must have completely preceded any included file. + * The CR explicitly allows the DTD related portions of the infoset to + * grow as an effect of including XML documents. + * + * <li> <em>xml:base</em> fixup isn't done. + * + * </ul> + * + * <p> XML documents that are included will normally be processed using + * the default SAX namespace rules, meaning that prefix information may + * be discarded. This may be changed with {@link #setSavingPrefixes + * setSavingPrefixes()}. <em>You are strongly advised to do this.</em> + * + * <p> Note that XInclude allows highly incompatible implementations, which + * are specialized to handle application-specific infoset extensions. Some + * such implementations can be implemented by subclassing this one, but + * they may only be substituted in applications at "user option". + * + * <p>TBD: "IURI" handling. + * + * @author David Brownell + * @version $Date: 2003-02-01 02:10:22 $ + */ +public class XIncludeFilter extends EventFilter implements Locator +{ + private Hashtable extEntities = new Hashtable (5, 5); + private int ignoreCount; + private Stack uris = new Stack (); + private Locator locator; + private Vector inclusions = new Vector (5, 5); + private boolean savingPrefixes; + + /** + */ + public XIncludeFilter (EventConsumer next) + throws SAXException + { + super (next); + setContentHandler (this); + // DTDHandler callbacks pass straight through + setProperty (DECL_HANDLER, this); + setProperty (LEXICAL_HANDLER, this); + } + + private void fatal (SAXParseException e) throws SAXException + { + ErrorHandler eh; + + eh = getErrorHandler (); + if (eh != null) + eh.fatalError (e); + throw e; + } + + /** + * Passes "this" down the filter chain as a proxy locator. + */ + public void setDocumentLocator (Locator locator) + { + this.locator = locator; + super.setDocumentLocator (this); + } + + /** Used for proxy locator; do not call directly. */ + public String getSystemId () + { return (locator == null) ? null : locator.getSystemId (); } + /** Used for proxy locator; do not call directly. */ + public String getPublicId () + { return (locator == null) ? null : locator.getPublicId (); } + /** Used for proxy locator; do not call directly. */ + public int getLineNumber () + { return (locator == null) ? -1 : locator.getLineNumber (); } + /** Used for proxy locator; do not call directly. */ + public int getColumnNumber () + { return (locator == null) ? -1 : locator.getColumnNumber (); } + + /** + * Assigns the flag controlling the setting of the SAX2 + * <em>namespace-prefixes</em> flag. + */ + public void setSavingPrefixes (boolean flag) + { savingPrefixes = flag; } + + /** + * Returns the flag controlling the setting of the SAX2 + * <em>namespace-prefixes</em> flag when parsing included documents. + * The default value is the SAX2 default (false), which discards + * information that can be useful. + */ + public boolean isSavingPrefixes () + { return savingPrefixes; } + + // + // Two mechanisms are interacting here. + // + // - XML Base implies a stack of base URIs, updated both by + // "real entity" boundaries and element boundaries. + // + // - Active "Real Entities" (for document and general entities, + // and by xincluded files) are tracked to prevent circular + // inclusions. + // + private String addMarker (String uri) + throws SAXException + { + if (locator != null && locator.getSystemId () != null) + uri = locator.getSystemId (); + + // guard against InputSource objects without system IDs + if (uri == null) + fatal (new SAXParseException ("Entity URI is unknown", locator)); + + try { + URL url = new URL (uri); + + uri = url.toString (); + if (inclusions.contains (uri)) + fatal (new SAXParseException ( + "XInclude, circular inclusion", locator)); + inclusions.addElement (uri); + uris.push (url); + } catch (IOException e) { + // guard against illegal relative URIs (Xerces) + fatal (new SAXParseException ("parser bug: relative URI", + locator, e)); + } + return uri; + } + + private void pop (String uri) + { + inclusions.removeElement (uri); + uris.pop (); + } + + // + // Document entity boundaries get both treatments. + // + public void startDocument () throws SAXException + { + ignoreCount = 0; + addMarker (null); + super.startDocument (); + } + + public void endDocument () throws SAXException + { + inclusions.setSize (0); + extEntities.clear (); + uris.setSize (0); + super.endDocument (); + } + + // + // External general entity boundaries get both treatments. + // + public void externalEntityDecl (String name, + String publicId, String systemId) + throws SAXException + { + if (name.charAt (0) == '%') + return; + try { + URL url = new URL (locator.getSystemId ()); + systemId = new URL (url, systemId).toString (); + } catch (IOException e) { + // what could we do? + } + extEntities.put (name, systemId); + } + + public void startEntity (String name) + throws SAXException + { + if (ignoreCount != 0) { + ignoreCount++; + return; + } + + String uri = (String) extEntities.get (name); + if (uri != null) + addMarker (uri); + super.startEntity (name); + } + + public void endEntity (String name) + throws SAXException + { + if (ignoreCount != 0) { + if (--ignoreCount != 0) + return; + } + + String uri = (String) extEntities.get (name); + + if (uri != null) + pop (uri); + super.endEntity (name); + } + + // + // element boundaries only affect the base URI stack, + // unless they're XInclude elements. + // + public void + startElement (String uri, String localName, String qName, Attributes atts) + throws SAXException + { + if (ignoreCount != 0) { + ignoreCount++; + return; + } + + URL baseURI = (URL) uris.peek (); + String base; + + base = atts.getValue ("http://www.w3.org/XML/1998/namespace", "base"); + if (base == null) + uris.push (baseURI); + else { + URL url; + + if (base.indexOf ('#') != -1) + fatal (new SAXParseException ( + "xml:base with fragment: " + base, + locator)); + + try { + baseURI = new URL (baseURI, base); + uris.push (baseURI); + } catch (Exception e) { + fatal (new SAXParseException ( + "xml:base with illegal uri: " + base, + locator, e)); + } + } + + if (!"http://www.w3.org/2001/XInclude".equals (uri)) { + super.startElement (uri, localName, qName, atts); + return; + } + + if ("include".equals (localName)) { + String href = atts.getValue ("href"); + String parse = atts.getValue ("parse"); + String encoding = atts.getValue ("encoding"); + URL url = (URL) uris.peek (); + SAXParseException x = null; + + if (href == null) + fatal (new SAXParseException ( + "XInclude missing href", + locator)); + if (href.indexOf ('#') != -1) + fatal (new SAXParseException ( + "XInclude with fragment: " + href, + locator)); + + if (parse == null || "xml".equals (parse)) + x = xinclude (url, href); + else if ("text".equals (parse)) + x = readText (url, href, encoding); + else + fatal (new SAXParseException ( + "unknown XInclude parsing mode: " + parse, + locator)); + if (x == null) { + // strip out all child content + ignoreCount++; + return; + } + + // FIXME the 17-Sept-2002 CR of XInclude says we "must" + // use xi:fallback elements to handle resource errors, + // if they exist. + fatal (x); + + } else if ("fallback".equals (localName)) { + fatal (new SAXParseException ( + "illegal top level XInclude 'fallback' element", + locator)); + } else { + ErrorHandler eh = getErrorHandler (); + + // CR doesn't say this is an error + if (eh != null) + eh.warning (new SAXParseException ( + "unrecognized toplevel XInclude element: " + localName, + locator)); + super.startElement (uri, localName, qName, atts); + } + } + + public void endElement (String uri, String localName, String qName) + throws SAXException + { + if (ignoreCount != 0) { + if (--ignoreCount != 0) + return; + } + + uris.pop (); + if (!("http://www.w3.org/2001/XInclude".equals (uri) + && "include".equals (localName))) + super.endElement (uri, localName, qName); + } + + // + // ignore all content within non-empty xi:include elements + // + public void characters (char ch [], int start, int length) + throws SAXException + { + if (ignoreCount == 0) + super.characters (ch, start, length); + } + + public void processingInstruction (String target, String value) + throws SAXException + { + if (ignoreCount == 0) + super.processingInstruction (target, value); + } + + public void ignorableWhitespace (char ch [], int start, int length) + throws SAXException + { + if (ignoreCount == 0) + super.ignorableWhitespace (ch, start, length); + } + + public void comment (char ch [], int start, int length) + throws SAXException + { + if (ignoreCount == 0) + super.comment (ch, start, length); + } + + public void startCDATA () throws SAXException + { + if (ignoreCount == 0) + super.startCDATA (); + } + + public void endCDATA () throws SAXException + { + if (ignoreCount == 0) + super.endCDATA (); + } + + public void startPrefixMapping (String prefix, String uri) + throws SAXException + { + if (ignoreCount == 0) + super.startPrefixMapping (prefix, uri); + } + + public void endPrefixMapping (String prefix) throws SAXException + { + if (ignoreCount == 0) + super.endPrefixMapping (prefix); + } + + public void skippedEntity (String name) throws SAXException + { + if (ignoreCount == 0) + super.skippedEntity (name); + } + + // JDK 1.1 seems to need it to be done this way, sigh + void setLocator (Locator l) { locator = l; } + Locator getLocator () { return locator; } + + + // + // for XIncluded entities, manage the current locator and + // filter out events that would be incorrect to report + // + private class Scrubber extends EventFilter + { + Scrubber (EventFilter f) + throws SAXException + { + // delegation passes to next in chain + super (f); + + // process all content events + setContentHandler (this); + setProperty (LEXICAL_HANDLER, this); + + // drop all DTD events + setDTDHandler (null); + setProperty (DECL_HANDLER, null); + } + + // maintain proxy locator + // only one startDocument()/endDocument() pair per event stream + public void setDocumentLocator (Locator l) + { setLocator (l); } + public void startDocument () + { } + public void endDocument () + { } + + private void reject (String message) throws SAXException + { fatal (new SAXParseException (message, getLocator ())); } + + // only the DTD from the "base document" gets reported + public void startDTD (String root, String publicId, String systemId) + throws SAXException + { reject ("XIncluded DTD: " + systemId); } + public void endDTD () + throws SAXException + { reject ("XIncluded DTD"); } + // ... so this should never happen + public void skippedEntity (String name) throws SAXException + { reject ("XInclude skipped entity: " + name); } + + // since we rejected DTDs, only builtin entities can be reported + } + + // <xi:include parse='xml' ...> + // relative to the base URI passed + private SAXParseException xinclude (URL url, String href) + throws SAXException + { + XMLReader helper; + Scrubber scrubber; + Locator savedLocator = locator; + + // start with a parser acting just like our input + // modulo DTD-ish stuff (validation flag, entity resolver) + helper = XMLReaderFactory.createXMLReader (); + helper.setErrorHandler (getErrorHandler ()); + helper.setFeature (FEATURE_URI + "namespace-prefixes", true); + + // Set up the proxy locator and event filter. + scrubber = new Scrubber (this); + locator = null; + bind (helper, scrubber); + + // Merge the included document, except its DTD + try { + url = new URL (url, href); + href = url.toString (); + + if (inclusions.contains (href)) + fatal (new SAXParseException ( + "XInclude, circular inclusion", locator)); + + inclusions.addElement (href); + uris.push (url); + helper.parse (new InputSource (href)); + return null; + } catch (java.io.IOException e) { + return new SAXParseException (href, locator, e); + } finally { + pop (href); + locator = savedLocator; + } + } + + // <xi:include parse='text' ...> + // relative to the base URI passed + private SAXParseException readText (URL url, String href, String encoding) + throws SAXException + { + InputStream in = null; + + try { + URLConnection conn; + InputStreamReader reader; + char buf [] = new char [4096]; + int count; + + url = new URL (url, href); + conn = url.openConnection (); + in = conn.getInputStream (); + if (encoding == null) + encoding = Resolver.getEncoding (conn.getContentType ()); + if (encoding == null) { + ErrorHandler eh = getErrorHandler (); + if (eh != null) + eh.warning (new SAXParseException ( + "guessing text encoding for URL: " + url, + locator)); + reader = new InputStreamReader (in); + } else + reader = new InputStreamReader (in, encoding); + + while ((count = reader.read (buf, 0, buf.length)) != -1) + super.characters (buf, 0, count); + in.close (); + return null; + } catch (IOException e) { + return new SAXParseException ( + "can't XInclude text", + locator, e); + } + } +} |