diff options
author | Chris Burdess <dog@bluezoo.org> | 2005-02-27 14:32:28 +0000 |
---|---|---|
committer | Chris Burdess <dog@bluezoo.org> | 2005-02-27 14:32:28 +0000 |
commit | 60abf2fc53c234158de6ead4745cf5f0db988a9d (patch) | |
tree | cbd3d9c083488c29dc0ce500d9bd5d71600db667 /gnu/xml | |
parent | f21fa1545d26a37531bf59a46e0c3abea82e7b49 (diff) | |
download | classpath-60abf2fc53c234158de6ead4745cf5f0db988a9d.tar.gz |
2005-02-27 Chris Burdess <dog@gnu.org>
* gnu/xml/aelfred2/JAXPFactory.java,
gnu/xml/aelfred2/SAXDriver.java,
gnu/xml/aelfred2/XmlParser.java,
gnu/xml/aelfred2/XmlReader.java: Applied GNU Classpath source code
formatting conventions. Replaced arrays of Object with struct-like
classes for easier maintainability. Made SAXDriver.stringInterning
package private to allow access from XmlParser inside the loop without
a method call overhead.
Diffstat (limited to 'gnu/xml')
-rw-r--r-- | gnu/xml/aelfred2/JAXPFactory.java | 273 | ||||
-rw-r--r-- | gnu/xml/aelfred2/SAXDriver.java | 2451 | ||||
-rw-r--r-- | gnu/xml/aelfred2/XmlParser.java | 10240 | ||||
-rw-r--r-- | gnu/xml/aelfred2/XmlReader.java | 515 |
4 files changed, 7254 insertions, 6225 deletions
diff --git a/gnu/xml/aelfred2/JAXPFactory.java b/gnu/xml/aelfred2/JAXPFactory.java index 006dc1302..ff585a82a 100644 --- a/gnu/xml/aelfred2/JAXPFactory.java +++ b/gnu/xml/aelfred2/JAXPFactory.java @@ -60,137 +60,172 @@ import javax.xml.parsers.SAXParserFactory; * * @author David Brownell */ -public final class JAXPFactory extends SAXParserFactory +public final class JAXPFactory + extends SAXParserFactory { - private Hashtable flags = new Hashtable (); - - /** - * Constructs a factory which normally returns a non-validating - * parser. - */ - public JAXPFactory () { } - - public SAXParser newSAXParser () + + private Hashtable flags = new Hashtable(); + + /** + * Constructs a factory which normally returns a non-validating + * parser. + */ + public JAXPFactory() + { + } + + public SAXParser newSAXParser() throws ParserConfigurationException, SAXException + { + JaxpParser jaxp = new JaxpParser(); + Enumeration e = flags.keys(); + XMLReader parser = jaxp.getXMLReader(); + + parser.setFeature(SAXDriver.FEATURE + "namespaces", + isNamespaceAware()); + parser.setFeature(SAXDriver.FEATURE + "validation", + isValidating()); + // that makes SAX2 feature flags trump JAXP + + while (e.hasMoreElements()) + { + String uri = (String) e.nextElement(); + Boolean value = (Boolean) flags.get(uri); + parser.setFeature(uri, value.booleanValue()); + } + + return jaxp; + } + + // yes, this "feature transfer" mechanism doesn't play well + + public void setFeature(String name, boolean value) + throws ParserConfigurationException, SAXNotRecognizedException, + SAXNotSupportedException + { + try + { + // force "early" detection of errors where possible + // (flags can't necessarily be set before parsing) + new JaxpParser().getXMLReader().setFeature(name, value); + + flags.put(name, new Boolean(value)); + } + catch (SAXNotRecognizedException e) + { + throw new SAXNotRecognizedException(name); + } + catch (SAXNotSupportedException e) + { + throw new SAXNotSupportedException(name); + } + catch (Exception e) + { + throw new ParserConfigurationException(e.getClass().getName() + + ": " + + e.getMessage()); + } + } + + public boolean getFeature(String name) + throws ParserConfigurationException, SAXNotRecognizedException, + SAXNotSupportedException + { + Boolean value = (Boolean) flags.get(name); + + if (value != null) + { + return value.booleanValue(); + } + else + { + try + { + return new JaxpParser().getXMLReader().getFeature(name); + } + catch (SAXNotRecognizedException e) + { + throw new SAXNotRecognizedException(name); + } + catch (SAXNotSupportedException e) + { + throw new SAXNotSupportedException(name); + } + catch (SAXException e) + { + throw new ParserConfigurationException(e.getClass().getName() + + ": " + + e.getMessage()); + } + } + } + + private static class JaxpParser + extends SAXParser + { + + private XmlReader ae2 = new XmlReader(); + private XMLReaderAdapter parser = null; + + JaxpParser() { - JaxpParser jaxp = new JaxpParser (); - Enumeration e = flags.keys (); - XMLReader parser = jaxp.getXMLReader (); - - parser.setFeature ( - SAXDriver.FEATURE + "namespaces", - isNamespaceAware ()); - parser.setFeature ( - SAXDriver.FEATURE + "validation", - isValidating ()); - // that makes SAX2 feature flags trump JAXP - - while (e.hasMoreElements ()) { - String uri = (String) e.nextElement (); - Boolean value = (Boolean) flags.get (uri); - parser.setFeature (uri, value.booleanValue ()); - } - - return jaxp; } - // yes, this "feature transfer" mechanism doesn't play well + public void setProperty(String id, Object value) + throws SAXNotRecognizedException, SAXNotSupportedException + { + ae2.setProperty(id, value); + } - public void setFeature (String name, boolean value) - throws - ParserConfigurationException, - SAXNotRecognizedException, - SAXNotSupportedException + public Object getProperty(String id) + throws SAXNotRecognizedException, SAXNotSupportedException { - try { - // force "early" detection of errors where possible - // (flags can't necessarily be set before parsing) - new JaxpParser ().getXMLReader ().setFeature (name, value); - - flags.put (name, new Boolean (value)); - } catch (SAXNotRecognizedException e) { - throw new SAXNotRecognizedException (name); - } catch (SAXNotSupportedException e) { - throw new SAXNotSupportedException (name); - } catch (Exception e) { - throw new ParserConfigurationException ( - e.getClass ().getName () - + ": " - + e.getMessage ()); - } + return ae2.getProperty(id); } - public boolean getFeature (String name) - throws - ParserConfigurationException, - SAXNotRecognizedException, - SAXNotSupportedException + public Parser getParser() + throws SAXException { - Boolean value = (Boolean) flags.get (name); - - if (value != null) - return value.booleanValue (); - else - try { - return new JaxpParser ().getXMLReader ().getFeature (name); - } catch (SAXNotRecognizedException e) { - throw new SAXNotRecognizedException (name); - } catch (SAXNotSupportedException e) { - throw new SAXNotSupportedException (name); - } catch (SAXException e) { - throw new ParserConfigurationException ( - e.getClass ().getName () - + ": " - + e.getMessage ()); - } + if (parser == null) + { + parser = new XMLReaderAdapter(ae2); + } + return parser; } - private static class JaxpParser extends SAXParser + public XMLReader getXMLReader () + throws SAXException { - private XmlReader ae2 = new XmlReader (); - private XMLReaderAdapter parser = null; - - JaxpParser () { } - - public void setProperty (String id, Object value) - throws SAXNotRecognizedException, SAXNotSupportedException - { ae2.setProperty (id, value); } - - public Object getProperty (String id) - throws SAXNotRecognizedException, SAXNotSupportedException - { return ae2.getProperty (id); } - - public Parser getParser () - throws SAXException - { - if (parser == null) - parser = new XMLReaderAdapter (ae2); - return parser; - } - - public XMLReader getXMLReader () - throws SAXException - { return ae2; } - - public boolean isNamespaceAware () - { - try { - return ae2.getFeature (SAXDriver.FEATURE + "namespaces"); - } catch (Exception e) { - throw new Error (); - } - } - - public boolean isValidating () - { - try { - return ae2.getFeature (SAXDriver.FEATURE + "validation"); - } catch (Exception e) { - throw new Error (); - } - } - - // TODO isXIncludeAware() - + return ae2; } + + public boolean isNamespaceAware() + { + try + { + return ae2.getFeature(SAXDriver.FEATURE + "namespaces"); + } + catch (Exception e) + { + throw new Error(); + } + } + + public boolean isValidating() + { + try + { + return ae2.getFeature(SAXDriver.FEATURE + "validation"); + } + catch (Exception e) + { + throw new Error(); + } + } + + // TODO isXIncludeAware() + + } + } + diff --git a/gnu/xml/aelfred2/SAXDriver.java b/gnu/xml/aelfred2/SAXDriver.java index 80cbc1155..3663116ad 100644 --- a/gnu/xml/aelfred2/SAXDriver.java +++ b/gnu/xml/aelfred2/SAXDriver.java @@ -60,8 +60,6 @@ import java.net.URL; import java.util.Locale; import java.util.Stack; -// maintaining 1.1 compatibility for now ... more portable, PJava, etc -// Iterator, Hashmap and ArrayList ought to be faster import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; @@ -131,1276 +129,1491 @@ import org.xml.sax.helpers.NamespaceSupport; * @see org.xml.sax.Parser */ final public class SAXDriver - implements Locator, Attributes2, XMLReader, Parser, AttributeList + implements Locator, Attributes2, XMLReader, Parser, AttributeList { - private final DefaultHandler2 base = new DefaultHandler2 (); - private XmlParser parser; - - private EntityResolver entityResolver = base; - private EntityResolver2 resolver2 = null; - private ContentHandler contentHandler = base; - private DTDHandler dtdHandler = base; - private ErrorHandler errorHandler = base; - private DeclHandler declHandler = base; - private LexicalHandler lexicalHandler = base; - - private String elementName; - private Stack entityStack; - - // one vector (of object/struct): faster, smaller - private List attributesList; - - private boolean namespaces = true; - private boolean xmlNames = false; - private boolean extGE = true; - private boolean extPE = true; - private boolean resolveAll = true; - private boolean useResolver2 = true; - private boolean stringInterning = true; - - private int attributeCount; - private boolean attributes; - private String nsTemp []; - private NamespaceSupport prefixStack; + + private final DefaultHandler2 base = new DefaultHandler2(); + private XmlParser parser; + + private EntityResolver entityResolver = base; + private EntityResolver2 resolver2 = null; + private ContentHandler contentHandler = base; + private DTDHandler dtdHandler = base; + private ErrorHandler errorHandler = base; + private DeclHandler declHandler = base; + private LexicalHandler lexicalHandler = base; + + private String elementName; + private Stack entityStack; + + // one vector (of object/struct): faster, smaller + private List attributesList; + + private boolean namespaces = true; + private boolean xmlNames = false; + private boolean extGE = true; + private boolean extPE = true; + private boolean resolveAll = true; + private boolean useResolver2 = true; + + // package private to allow (read-only) access in XmlParser + boolean stringInterning = true; + + private int attributeCount; + private boolean attributes; + private String[] nsTemp; + private NamespaceSupport prefixStack; + + // + // Constructor. + // + + /** + * Constructs a SAX Parser. + */ + public SAXDriver() + { + reset(); + } - // - // Constructor. - // + private void reset() + { + elementName = null; + entityStack = new Stack(); + attributesList = Collections.synchronizedList(new ArrayList()); + attributeCount = 0; + attributes = false; + nsTemp = new String[3]; + prefixStack = null; + } - /** Constructs a SAX Parser. */ - public SAXDriver () - { - reset (); - } - private void reset () - { - elementName = null; - entityStack = new Stack (); - attributesList = Collections.synchronizedList(new ArrayList()); - attributeCount = 0; - attributes = false; - nsTemp = new String[3]; - prefixStack = null; - } + // + // Implementation of org.xml.sax.Parser. + // + /** + * <b>SAX1</b>: Sets the locale used for diagnostics; currently, + * only locales using the English language are supported. + * @param locale The locale for which diagnostics will be generated + */ + public void setLocale(Locale locale) + throws SAXException + { + if ("en".equals(locale.getLanguage())) + { + return; + } + throw new SAXException ("AElfred2 only supports English locales."); + } - // - // Implementation of org.xml.sax.Parser. - // + /** + * <b>SAX2</b>: Returns the object used when resolving external + * entities during parsing (both general and parameter entities). + */ + public EntityResolver getEntityResolver() + { + return (entityResolver == base) ? null : entityResolver; + } - /** - * <b>SAX1</b>: Sets the locale used for diagnostics; currently, - * only locales using the English language are supported. - * @param locale The locale for which diagnostics will be generated - */ - public void setLocale (Locale locale) - throws SAXException - { - if ("en".equals (locale.getLanguage ())) - return ; + /** + * <b>SAX1, SAX2</b>: Set the entity resolver for this parser. + * @param handler The object to receive entity events. + */ + public void setEntityResolver(EntityResolver resolver) + { + if (resolver instanceof EntityResolver2) + { + resolver2 = (EntityResolver2) resolver; + } + else + { + resolver2 = null; + } + if (resolver == null) + { + resolver = base; + } + entityResolver = resolver; + } - throw new SAXException ("AElfred2 only supports English locales."); - } + /** + * <b>SAX2</b>: Returns the object used to process declarations related + * to notations and unparsed entities. + */ + public DTDHandler getDTDHandler() + { + return (dtdHandler == base) ? null : dtdHandler; + } + /** + * <b>SAX1, SAX2</b>: Set the DTD handler for this parser. + * @param handler The object to receive DTD events. + */ + public void setDTDHandler(DTDHandler handler) + { + if (handler == null) + { + handler = base; + } + this.dtdHandler = handler; + } - /** - * <b>SAX2</b>: Returns the object used when resolving external - * entities during parsing (both general and parameter entities). - */ - public EntityResolver getEntityResolver () - { - return (entityResolver == base) ? null : entityResolver; - } - /** - * <b>SAX1, SAX2</b>: Set the entity resolver for this parser. - * @param handler The object to receive entity events. - */ - public void setEntityResolver (EntityResolver resolver) - { - if (resolver instanceof EntityResolver2) - resolver2 = (EntityResolver2) resolver; - else - resolver2 = null; - if (resolver == null) - resolver = base; - entityResolver = resolver; - } + /** + * <b>SAX1</b>: Set the document handler for this parser. If a + * content handler was set, this document handler will supplant it. + * The parser is set to report all XML 1.0 names rather than to + * filter out "xmlns" attributes (the "namespace-prefixes" feature + * is set to true). + * + * @deprecated SAX2 programs should use the XMLReader interface + * and a ContentHandler. + * + * @param handler The object to receive document events. + */ + public void setDocumentHandler(DocumentHandler handler) + { + contentHandler = new Adapter(handler); + xmlNames = true; + } + /** + * <b>SAX2</b>: Returns the object used to report the logical + * content of an XML document. + */ + public ContentHandler getContentHandler() + { + return (contentHandler == base) ? null : contentHandler; + } - /** - * <b>SAX2</b>: Returns the object used to process declarations related - * to notations and unparsed entities. - */ - public DTDHandler getDTDHandler () - { - return (dtdHandler == base) ? null : dtdHandler; - } + /** + * <b>SAX2</b>: Assigns the object used to report the logical + * content of an XML document. If a document handler was set, + * this content handler will supplant it (but XML 1.0 style name + * reporting may remain enabled). + */ + public void setContentHandler(ContentHandler handler) + { + if (handler == null) + { + handler = base; + } + contentHandler = handler; + } - /** - * <b>SAX1, SAX2</b>: Set the DTD handler for this parser. - * @param handler The object to receive DTD events. - */ - public void setDTDHandler (DTDHandler handler) - { - if (handler == null) - handler = base; - this.dtdHandler = handler; - } + /** + * <b>SAX1, SAX2</b>: Set the error handler for this parser. + * @param handler The object to receive error events. + */ + public void setErrorHandler(ErrorHandler handler) + { + if (handler == null) + { + handler = base; + } + this.errorHandler = handler; + } + /** + * <b>SAX2</b>: Returns the object used to receive callbacks for XML + * errors of all levels (fatal, nonfatal, warning); this is never null; + */ + public ErrorHandler getErrorHandler() + { + return (errorHandler == base) ? null : errorHandler; + } - /** - * <b>SAX1</b>: Set the document handler for this parser. If a - * content handler was set, this document handler will supplant it. - * The parser is set to report all XML 1.0 names rather than to - * filter out "xmlns" attributes (the "namespace-prefixes" feature - * is set to true). - * - * @deprecated SAX2 programs should use the XMLReader interface - * and a ContentHandler. - * - * @param handler The object to receive document events. - */ - public void setDocumentHandler (DocumentHandler handler) - { - contentHandler = new Adapter (handler); - xmlNames = true; - } + /** + * <b>SAX1, SAX2</b>: Auxiliary API to parse an XML document, used mostly + * when no URI is available. + * If you want anything useful to happen, you should set + * at least one type of handler. + * @param source The XML input source. Don't set 'encoding' unless + * you know for a fact that it's correct. + * @see #setEntityResolver + * @see #setDTDHandler + * @see #setContentHandler + * @see #setErrorHandler + * @exception SAXException The handlers may throw any SAXException, + * and the parser normally throws SAXParseException objects. + * @exception IOException IOExceptions are normally through through + * the parser if there are problems reading the source document. + */ + public void parse(InputSource source) + throws SAXException, IOException + { + synchronized (base) + { + parser = new XmlParser(); + if (namespaces) + { + prefixStack = new NamespaceSupport(); + } + else if (!xmlNames) + { + throw new IllegalStateException(); + } + parser.setHandler(this); + + try + { + Reader r = source.getCharacterStream(); + InputStream in = source.getByteStream(); + + parser.doParse(source.getSystemId(), + source.getPublicId(), + r, + in, + source.getEncoding()); + } + catch (SAXException e) + { + throw e; + } + catch (IOException e) + { + throw e; + } + catch (RuntimeException e) + { + throw e; + } + catch (Exception e) + { + throw new SAXParseException(e.getMessage(), this, e); + } + finally + { + contentHandler.endDocument(); + reset(); + } + } + } - /** - * <b>SAX2</b>: Returns the object used to report the logical - * content of an XML document. - */ - public ContentHandler getContentHandler () - { - return contentHandler == base ? null : contentHandler; - } + /** + * <b>SAX1, SAX2</b>: Preferred API to parse an XML document, using a + * system identifier (URI). + */ + public void parse(String systemId) + throws SAXException, IOException + { + parse(new InputSource(systemId)); + } - /** - * <b>SAX2</b>: Assigns the object used to report the logical - * content of an XML document. If a document handler was set, - * this content handler will supplant it (but XML 1.0 style name - * reporting may remain enabled). - */ - public void setContentHandler (ContentHandler handler) - { - if (handler == null) - handler = base; - contentHandler = handler; - } + // + // Implementation of SAX2 "XMLReader" interface + // + static final String FEATURE = "http://xml.org/sax/features/"; + static final String PROPERTY = "http://xml.org/sax/properties/"; + + /** + * <b>SAX2</b>: Tells the value of the specified feature flag. + * + * @exception SAXNotRecognizedException thrown if the feature flag + * is neither built in, nor yet assigned. + */ + public boolean getFeature(String featureId) + throws SAXNotRecognizedException, SAXNotSupportedException + { + if ((FEATURE + "validation").equals(featureId)) + { + return false; + } - /** - * <b>SAX1, SAX2</b>: Set the error handler for this parser. - * @param handler The object to receive error events. - */ - public void setErrorHandler (ErrorHandler handler) - { - if (handler == null) - handler = base; - this.errorHandler = handler; - } + // external entities (both types) are optionally included + if ((FEATURE + "external-general-entities").equals(featureId)) + { + return extGE; + } + if ((FEATURE + "external-parameter-entities").equals(featureId)) + { + return extPE; + } + + // element/attribute names are as written in document; no mangling + if ((FEATURE + "namespace-prefixes").equals(featureId)) + { + return xmlNames; + } - /** - * <b>SAX2</b>: Returns the object used to receive callbacks for XML - * errors of all levels (fatal, nonfatal, warning); this is never null; - */ - public ErrorHandler getErrorHandler () - { return errorHandler == base ? null : errorHandler; } - - - /** - * <b>SAX1, SAX2</b>: Auxiliary API to parse an XML document, used mostly - * when no URI is available. - * If you want anything useful to happen, you should set - * at least one type of handler. - * @param source The XML input source. Don't set 'encoding' unless - * you know for a fact that it's correct. - * @see #setEntityResolver - * @see #setDTDHandler - * @see #setContentHandler - * @see #setErrorHandler - * @exception SAXException The handlers may throw any SAXException, - * and the parser normally throws SAXParseException objects. - * @exception IOException IOExceptions are normally through through - * the parser if there are problems reading the source document. - */ - public void parse (InputSource source) - throws SAXException, IOException - { - synchronized (base) { - parser = new XmlParser (); - if (namespaces) - prefixStack = new NamespaceSupport (); - else if (!xmlNames) - throw new IllegalStateException (); - parser.setHandler (this); - - try { - - Reader r = source.getCharacterStream(); - InputStream in = source.getByteStream(); - - - parser.doParse (source.getSystemId (), - source.getPublicId (), - r, - in, - source.getEncoding ()); - } catch (SAXException e) { - throw e; - } catch (IOException e) { - throw e; - } catch (RuntimeException e) { - throw e; - } catch (Exception e) { - throw new SAXParseException (e.getMessage (), this, e); - } finally { - contentHandler.endDocument (); - reset(); - } - } - } + // report element/attribute namespaces? + if ((FEATURE + "namespaces").equals(featureId)) + { + return namespaces; + } + // all PEs and GEs are reported + if ((FEATURE + "lexical-handler/parameter-entities").equals(featureId)) + { + return true; + } - /** - * <b>SAX1, SAX2</b>: Preferred API to parse an XML document, using a - * system identifier (URI). - */ - public void parse (String systemId) - throws SAXException, IOException - { - parse (new InputSource (systemId)); - } + // default is true + if ((FEATURE + "string-interning").equals(featureId)) + { + return stringInterning; + } + + // EXTENSIONS 1.1 + + // always returns isSpecified info + if ((FEATURE + "use-attributes2").equals(featureId)) + { + return true; + } + + // meaningful between startDocument/endDocument + if ((FEATURE + "is-standalone").equals(featureId)) + { + if (parser == null) + { + throw new SAXNotSupportedException(featureId); + } + return parser.isStandalone(); + } - // - // Implementation of SAX2 "XMLReader" interface - // - static final String FEATURE = "http://xml.org/sax/features/"; - static final String PROPERTY = "http://xml.org/sax/properties/"; - - /** - * <b>SAX2</b>: Tells the value of the specified feature flag. - * - * @exception SAXNotRecognizedException thrown if the feature flag - * is neither built in, nor yet assigned. - */ - public boolean getFeature (String featureId) - throws SAXNotRecognizedException, SAXNotSupportedException - { - if ((FEATURE + "validation").equals (featureId)) - return false; - - // external entities (both types) are optionally included - if ((FEATURE + "external-general-entities").equals (featureId)) - return extGE; - if ((FEATURE + "external-parameter-entities") .equals (featureId)) - return extPE; - - // element/attribute names are as written in document; no mangling - if ((FEATURE + "namespace-prefixes").equals (featureId)) - return xmlNames; - - // report element/attribute namespaces? - if ((FEATURE + "namespaces").equals (featureId)) - return namespaces; - - // all PEs and GEs are reported - if ((FEATURE + "lexical-handler/parameter-entities").equals (featureId)) - return true; - - // default is true - if ((FEATURE + "string-interning").equals (featureId)) - return stringInterning; - - // EXTENSIONS 1.1 - - // always returns isSpecified info - if ((FEATURE + "use-attributes2").equals (featureId)) - return true; - - // meaningful between startDocument/endDocument - if ((FEATURE + "is-standalone").equals (featureId)) { - if (parser == null) - throw new SAXNotSupportedException (featureId); - return parser.isStandalone (); - } - - // optionally don't absolutize URIs in declarations - if ((FEATURE + "resolve-dtd-uris").equals (featureId)) - return resolveAll; - - // optionally use resolver2 interface methods, if possible - if ((FEATURE + "use-entity-resolver2").equals (featureId)) - return useResolver2; - - throw new SAXNotRecognizedException (featureId); - } + // optionally don't absolutize URIs in declarations + if ((FEATURE + "resolve-dtd-uris").equals(featureId)) + { + return resolveAll; + } + + // optionally use resolver2 interface methods, if possible + if ((FEATURE + "use-entity-resolver2").equals(featureId)) + { + return useResolver2; + } + + throw new SAXNotRecognizedException(featureId); + } - // package private - DeclHandler getDeclHandler () { return declHandler; } + // package private + DeclHandler getDeclHandler() + { + return declHandler; + } - // package private - boolean resolveURIs () { return resolveAll; } + // package private + boolean resolveURIs() + { + return resolveAll; + } - /** - * <b>SAX2</b>: Returns the specified property. - * - * @exception SAXNotRecognizedException thrown if the property value - * is neither built in, nor yet stored. - */ - public Object getProperty (String propertyId) + /** + * <b>SAX2</b>: Returns the specified property. + * + * @exception SAXNotRecognizedException thrown if the property value + * is neither built in, nor yet stored. + */ + public Object getProperty(String propertyId) throws SAXNotRecognizedException - { - if ((PROPERTY + "declaration-handler").equals (propertyId)) - return declHandler == base ? null : declHandler; - - if ((PROPERTY + "lexical-handler").equals (propertyId)) - return lexicalHandler == base ? null : lexicalHandler; - - // unknown properties - throw new SAXNotRecognizedException (propertyId); - } + { + if ((PROPERTY + "declaration-handler").equals(propertyId)) + { + return (declHandler == base) ? null : declHandler; + } - /** - * <b>SAX2</b>: Sets the state of feature flags in this parser. Some - * built-in feature flags are mutable. - */ - public void setFeature (String featureId, boolean value) - throws SAXNotRecognizedException, SAXNotSupportedException - { - boolean state; - - // Features with a defined value, we just change it if we can. - state = getFeature (featureId); - - if (state == value) - return; - if (parser != null) - throw new SAXNotSupportedException ("not while parsing"); - - if ((FEATURE + "namespace-prefixes").equals (featureId)) { - // in this implementation, this only affects xmlns reporting - xmlNames = value; - // forcibly prevent illegal parser state - if (!xmlNames) - namespaces = true; - return; - } - - if ((FEATURE + "namespaces").equals (featureId)) { - namespaces = value; - // forcibly prevent illegal parser state - if (!namespaces) - xmlNames = true; - return; - } - - if ((FEATURE + "external-general-entities").equals (featureId)) { - extGE = value; - return; - } - if ((FEATURE + "external-parameter-entities") .equals (featureId)) { - extPE = value; - return; - } - if ((FEATURE + "resolve-dtd-uris").equals (featureId)) { - resolveAll = value; - return; - } - - if ((FEATURE + "use-entity-resolver2").equals (featureId)) { - useResolver2 = value; - return; - } - - throw new SAXNotRecognizedException (featureId); - } + if ((PROPERTY + "lexical-handler").equals(propertyId)) + { + return (lexicalHandler == base) ? null : lexicalHandler; + } + + // unknown properties + throw new SAXNotRecognizedException(propertyId); + } - /** - * <b>SAX2</b>: Assigns the specified property. Like SAX1 handlers, - * these may be changed at any time. - */ - public void setProperty (String propertyId, Object value) + /** + * <b>SAX2</b>: Sets the state of feature flags in this parser. Some + * built-in feature flags are mutable. + */ + public void setFeature(String featureId, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException - { - // see if the property is recognized - getProperty (propertyId); - - // Properties with a defined value, we just change it if we can. - - if ((PROPERTY + "declaration-handler").equals (propertyId)) { - if (value == null) - declHandler = base; - else if (! (value instanceof DeclHandler)) - throw new SAXNotSupportedException (propertyId); - else - declHandler = (DeclHandler) value; - return ; - } - - if ((PROPERTY + "lexical-handler").equals (propertyId)) { - if (value == null) - lexicalHandler = base; - else if (! (value instanceof LexicalHandler)) - throw new SAXNotSupportedException (propertyId); - else - lexicalHandler = (LexicalHandler) value; - return ; - } - - throw new SAXNotSupportedException (propertyId); - } + { + boolean state; + + // Features with a defined value, we just change it if we can. + state = getFeature (featureId); + + if (state == value) + { + return; + } + if (parser != null) + { + throw new SAXNotSupportedException("not while parsing"); + } + if ((FEATURE + "namespace-prefixes").equals(featureId)) + { + // in this implementation, this only affects xmlns reporting + xmlNames = value; + // forcibly prevent illegal parser state + if (!xmlNames) + { + namespaces = true; + } + return; + } - // - // This is where the driver receives XmlParser callbacks and translates - // them into SAX callbacks. Some more callbacks have been added for - // SAX2 support. - // + if ((FEATURE + "namespaces").equals(featureId)) + { + namespaces = value; + // forcibly prevent illegal parser state + if (!namespaces) + { + xmlNames = true; + } + return; + } + + if ((FEATURE + "external-general-entities").equals(featureId)) + { + extGE = value; + return; + } + if ((FEATURE + "external-parameter-entities").equals(featureId)) + { + extPE = value; + return; + } + if ((FEATURE + "resolve-dtd-uris").equals(featureId)) + { + resolveAll = value; + return; + } + + if ((FEATURE + "use-entity-resolver2").equals(featureId)) + { + useResolver2 = value; + return; + } + + throw new SAXNotRecognizedException(featureId); + } + + /** + * <b>SAX2</b>: Assigns the specified property. Like SAX1 handlers, + * these may be changed at any time. + */ + public void setProperty(String propertyId, Object value) + throws SAXNotRecognizedException, SAXNotSupportedException + { + // see if the property is recognized + getProperty(propertyId); + + // Properties with a defined value, we just change it if we can. + + if ((PROPERTY + "declaration-handler").equals(propertyId)) + { + if (value == null) + { + declHandler = base; + } + else if (!(value instanceof DeclHandler)) + { + throw new SAXNotSupportedException(propertyId); + } + else + { + declHandler = (DeclHandler) value; + } + return ; + } + + if ((PROPERTY + "lexical-handler").equals(propertyId)) + { + if (value == null) + { + lexicalHandler = base; + } + else if (!(value instanceof LexicalHandler)) + { + throw new SAXNotSupportedException(propertyId); + } + else + { + lexicalHandler = (LexicalHandler) value; + } + return; + } + + throw new SAXNotSupportedException(propertyId); + } + + // + // This is where the driver receives XmlParser callbacks and translates + // them into SAX callbacks. Some more callbacks have been added for + // SAX2 support. + // - void startDocument () + void startDocument() throws SAXException - { - contentHandler.setDocumentLocator (this); - contentHandler.startDocument (); - attributesList.clear (); - } + { + contentHandler.setDocumentLocator(this); + contentHandler.startDocument(); + attributesList.clear(); + } - void xmlDecl(String version, - String encoding, - boolean standalone, - String inputEncoding) - throws SAXException - { - if (contentHandler instanceof ContentHandler2) - { - ((ContentHandler2) contentHandler).xmlDecl(version, - encoding, - standalone, - inputEncoding); - } - } + void xmlDecl(String version, + String encoding, + boolean standalone, + String inputEncoding) + throws SAXException + { + if (contentHandler instanceof ContentHandler2) + { + ((ContentHandler2) contentHandler).xmlDecl(version, + encoding, + standalone, + inputEncoding); + } + } - void skippedEntity (String name) + void skippedEntity(String name) throws SAXException - { contentHandler.skippedEntity (name); } + { + contentHandler.skippedEntity(name); + } - InputSource getExternalSubset (String name, String baseURI) + InputSource getExternalSubset(String name, String baseURI) throws SAXException, IOException - { - if (resolver2 == null || !useResolver2 || !extPE) - return null; - return resolver2.getExternalSubset (name, baseURI); - } + { + if (resolver2 == null || !useResolver2 || !extPE) + { + return null; + } + return resolver2.getExternalSubset(name, baseURI); + } - InputSource resolveEntity (boolean isPE, String name, - InputSource in, String baseURI) + InputSource resolveEntity(boolean isPE, String name, + InputSource in, String baseURI) throws SAXException, IOException - { - InputSource source; - - // external entities might be skipped - if (isPE && !extPE) - return null; - if (!isPE && !extGE) - return null; - - // ... or not - lexicalHandler.startEntity (name); - if (resolver2 != null && useResolver2) { - source = resolver2.resolveEntity (name, in.getPublicId (), - baseURI, in.getSystemId ()); - if (source == null) { - in.setSystemId (absolutize (baseURI, - in.getSystemId (), false)); - source = in; - } - } else { - in.setSystemId (absolutize (baseURI, in.getSystemId (), false)); - source = entityResolver.resolveEntity (in.getPublicId (), - in.getSystemId ()); - if (source == null) - source = in; - } - startExternalEntity (name, source.getSystemId (), true); - return source; - } + { + InputSource source; + + // external entities might be skipped + if (isPE && !extPE) + { + return null; + } + if (!isPE && !extGE) + { + return null; + } + + // ... or not + lexicalHandler.startEntity(name); + if (resolver2 != null && useResolver2) + { + source = resolver2.resolveEntity(name, in.getPublicId(), + baseURI, in.getSystemId()); + if (source == null) + { + in.setSystemId(absolutize(baseURI, + in.getSystemId(), false)); + source = in; + } + } + else + { + in.setSystemId(absolutize(baseURI, in.getSystemId(), false)); + source = entityResolver.resolveEntity(in.getPublicId(), + in.getSystemId()); + if (source == null) + { + source = in; + } + } + startExternalEntity(name, source.getSystemId(), true); + return source; + } - // absolutize a system ID relative to the specified base URI - // (temporarily) package-visible for external entity decls - String absolutize (String baseURI, String systemId, boolean nice) + // absolutize a system ID relative to the specified base URI + // (temporarily) package-visible for external entity decls + String absolutize(String baseURI, String systemId, boolean nice) throws MalformedURLException, SAXException - { - // FIXME normalize system IDs -- when? - // - Convert to UTF-8 - // - Map reserved and non-ASCII characters to %HH - - try { - if (baseURI == null) { - warn ("No base URI; hope this SYSTEM id is absolute: " - + systemId); - return new URL (systemId).toString (); - } else - return new URL (new URL (baseURI), systemId).toString (); - - } catch (MalformedURLException e) { - - // Let unknown URI schemes pass through unless we need - // the JVM to map them to i/o streams for us... - if (!nice) - throw e; - - // sometimes sysids for notations or unparsed entities - // aren't really URIs... - warn ("Can't absolutize SYSTEM id: " + e.getMessage ()); - return systemId; - } - } + { + // FIXME normalize system IDs -- when? + // - Convert to UTF-8 + // - Map reserved and non-ASCII characters to %HH + + try + { + if (baseURI == null && XmlParser.uriWarnings) + { + warn ("No base URI; hope this SYSTEM id is absolute: " + + systemId); + return new URL(systemId).toString(); + } + else + { + return new URL(new URL(baseURI), systemId).toString(); + } + } + catch (MalformedURLException e) + { + // Let unknown URI schemes pass through unless we need + // the JVM to map them to i/o streams for us... + if (!nice) + { + throw e; + } + + // sometimes sysids for notations or unparsed entities + // aren't really URIs... + warn("Can't absolutize SYSTEM id: " + e.getMessage()); + return systemId; + } + } - void startExternalEntity (String name, String systemId, - boolean stackOnly) + void startExternalEntity(String name, String systemId, boolean stackOnly) throws SAXException - { - // The following warning was deleted because the application has the - // option of not setting systemId. Sun's JAXP or Xerces seems to - // ignore this case. - /* - if (systemId == null) - warn ("URI was not reported to parser for entity " + name); - */ - if (!stackOnly) // spliced [dtd] needs startEntity - lexicalHandler.startEntity (name); - entityStack.push (systemId); - } + { + // The following warning was deleted because the application has the + // option of not setting systemId. Sun's JAXP or Xerces seems to + // ignore this case. + /* + if (systemId == null) + warn ("URI was not reported to parser for entity " + name); + */ + if (!stackOnly) // spliced [dtd] needs startEntity + { + lexicalHandler.startEntity(name); + } + entityStack.push(systemId); + } - void endExternalEntity (String name) + void endExternalEntity(String name) throws SAXException - { - if (!"[document]".equals (name)) - lexicalHandler.endEntity (name); - entityStack.pop (); - } + { + if (!"[document]".equals(name)) + { + lexicalHandler.endEntity(name); + } + entityStack.pop(); + } - void startInternalEntity (String name) + void startInternalEntity(String name) throws SAXException - { - lexicalHandler.startEntity (name); - } + { + lexicalHandler.startEntity(name); + } - void endInternalEntity (String name) + void endInternalEntity(String name) throws SAXException - { - lexicalHandler.endEntity (name); - } + { + lexicalHandler.endEntity(name); + } - void doctypeDecl (String name, String publicId, String systemId) + void doctypeDecl(String name, String publicId, String systemId) throws SAXException - { - lexicalHandler.startDTD (name, publicId, systemId); - - // ... the "name" is a declaration and should be given - // to the DeclHandler (but sax2 doesn't). - - // the IDs for the external subset are lexical details, - // as are the contents of the internal subset; but sax2 - // doesn't provide the internal subset "pre-parse" - } - - void notationDecl (String name, String ids []) + { + lexicalHandler.startDTD(name, publicId, systemId); + + // ... the "name" is a declaration and should be given + // to the DeclHandler (but sax2 doesn't). + + // the IDs for the external subset are lexical details, + // as are the contents of the internal subset; but sax2 + // doesn't provide the internal subset "pre-parse" + } + + void notationDecl(String name, String publicId, String systemId, + String baseUri) throws SAXException - { - try { - dtdHandler.notationDecl (name, ids [0], - (resolveAll && ids [1] != null) - ? absolutize (ids [2], ids [1], true) - : ids [1]); - } catch (IOException e) { - // "can't happen" - throw new SAXParseException (e.getMessage (), this, e); - } - } + { + try + { + dtdHandler.notationDecl(name, publicId, + (resolveAll && systemId != null) + ? absolutize(baseUri, systemId, true) + : systemId); + } + catch (IOException e) + { + // "can't happen" + throw new SAXParseException(e.getMessage(), this, e); + } + } - void unparsedEntityDecl (String name, String ids [], String notation) + void unparsedEntityDecl(String name, String publicId, String systemId, + String baseUri, String notation) throws SAXException - { - try { - dtdHandler.unparsedEntityDecl (name, ids [0], - resolveAll - ? absolutize (ids [2], ids [1], true) - : ids [1], - notation); - } catch (IOException e) { - // "can't happen" - throw new SAXParseException (e.getMessage (), this, e); - } - } + { + try + { + dtdHandler.unparsedEntityDecl(name, publicId, + resolveAll + ? absolutize(baseUri, systemId, true) + : systemId, + notation); + } + catch (IOException e) + { + // "can't happen" + throw new SAXParseException(e.getMessage(), this, e); + } + } - void endDoctype () + void endDoctype() throws SAXException - { - lexicalHandler.endDTD (); - } + { + lexicalHandler.endDTD(); + } - private void declarePrefix (String prefix, String uri) + private void declarePrefix(String prefix, String uri) throws SAXException - { - int index = uri.indexOf (':'); - - // many versions of nwalsh docbook stylesheets - // have bogus URLs; so this can't be an error... - if (index < 1 && uri.length () != 0) - warn ("relative URI for namespace: " + uri); - - // FIXME: char [0] must be ascii alpha; chars [1..index] - // must be ascii alphanumeric or in "+-." [RFC 2396] - - //Namespace Constraints - //name for xml prefix must be http://www.w3.org/XML/1998/namespace - boolean prefixEquality = prefix.equals("xml"); - boolean uriEquality = uri.equals("http://www.w3.org/XML/1998/namespace"); - if ((prefixEquality || uriEquality) && !(prefixEquality && uriEquality)) - fatal ("xml is by definition bound to the namespace name " + - "http://www.w3.org/XML/1998/namespace"); - - //xmlns prefix declaration is illegal but xml prefix declaration is llegal... - if (prefixEquality && uriEquality) - return; - - //name for xmlns prefix must be http://www.w3.org/2000/xmlns/ - prefixEquality = prefix.equals("xmlns"); - uriEquality = uri.equals("http://www.w3.org/2000/xmlns/"); - if ((prefixEquality || uriEquality) && !(prefixEquality && uriEquality)) - fatal("http://www.w3.org/2000/xmlns/ is by definition bound" + - " to prefix xmlns"); - - //even if the uri is http://www.w3.org/2000/xmlns/ it is illegal to declare it - if (prefixEquality && uriEquality) - fatal ("declaring the xmlns prefix is illegal"); - - uri = uri.intern (); - prefixStack.declarePrefix (prefix, uri); - contentHandler.startPrefixMapping (prefix, uri); - } + { + int index = uri.indexOf(':'); + + // many versions of nwalsh docbook stylesheets + // have bogus URLs; so this can't be an error... + if (index < 1 && uri.length() != 0) + { + warn("relative URI for namespace: " + uri); + } - void attribute (String qname, String value, boolean isSpecified) - throws SAXException - { - if (!attributes) { - attributes = true; - if (namespaces) - prefixStack.pushContext (); - } - - // process namespace decls immediately; - // then maybe forget this as an attribute - if (namespaces) { - int index; - - // default NS declaration? - if (getFeature (FEATURE + "string-interning")) { - if ("xmlns" == qname) { - declarePrefix ("", value); - if (!xmlNames) - return; - } - // NS prefix declaration? - else if ((index = qname.indexOf (':')) == 5 - && qname.startsWith ("xmlns")) { - String prefix = qname.substring (6); - - if (prefix.equals("")) - fatal ("missing prefix in namespace declaration attribute"); - if (value.length () == 0) { - verror ("missing URI in namespace declaration attribute: " - + qname); - } else - declarePrefix (prefix, value); - if (!xmlNames) - return; - } - } else { - if ("xmlns".equals(qname)) { - declarePrefix ("", value); - if (!xmlNames) - return; - } - // NS prefix declaration? - else if ((index = qname.indexOf (':')) == 5 - && qname.startsWith ("xmlns")) { - String prefix = qname.substring (6); - - if (value.length () == 0) { - verror ("missing URI in namespace decl attribute: " - + qname); - } else - declarePrefix (prefix, value); - if (!xmlNames) - return; - } + // FIXME: char [0] must be ascii alpha; chars [1..index] + // must be ascii alphanumeric or in "+-." [RFC 2396] + + //Namespace Constraints + //name for xml prefix must be http://www.w3.org/XML/1998/namespace + boolean prefixEquality = prefix.equals("xml"); + boolean uriEquality = uri.equals("http://www.w3.org/XML/1998/namespace"); + if ((prefixEquality || uriEquality) && !(prefixEquality && uriEquality)) + { + fatal("xml is by definition bound to the namespace name " + + "http://www.w3.org/XML/1998/namespace"); + } + + //xmlns prefix declaration is illegal but xml prefix declaration is llegal... + if (prefixEquality && uriEquality) + { + return; + } + + //name for xmlns prefix must be http://www.w3.org/2000/xmlns/ + prefixEquality = prefix.equals("xmlns"); + uriEquality = uri.equals("http://www.w3.org/2000/xmlns/"); + if ((prefixEquality || uriEquality) && !(prefixEquality && uriEquality)) + { + fatal("http://www.w3.org/2000/xmlns/ is by definition bound" + + " to prefix xmlns"); } + + //even if the uri is http://www.w3.org/2000/xmlns/ + // it is illegal to declare it + if (prefixEquality && uriEquality) + { + fatal ("declaring the xmlns prefix is illegal"); + } + + uri = uri.intern(); + prefixStack.declarePrefix(prefix, uri); + contentHandler.startPrefixMapping(prefix, uri); } - // remember this attribute ... - - attributeCount++; - - // attribute type comes from querying parser's DTD records - attributesList.add(new Attribute(qname, value, isSpecified)); - - } - void startElement (String elname) + void attribute(String qname, String value, boolean isSpecified) throws SAXException - { - ContentHandler handler = contentHandler; - - // - // NOTE: this implementation of namespace support adds something - // like six percent to parsing CPU time, in a large (~50 MB) - // document that doesn't use namespaces at all. (Measured by PC - // sampling, with a bug where endElement processing was omitted.) - // [Measurement referred to older implementation, older JVM ...] - // - // It ought to become notably faster in such cases. Most - // costs are the prefix stack calling Hashtable.get() (2%), - // String.hashCode() (1.5%) and about 1.3% each for pushing - // the context, and two chunks of name processing. - // - - if (!attributes) { - if (namespaces) - prefixStack.pushContext (); - } else if (namespaces) { - - // now we can patch up namespace refs; we saw all the - // declarations, so now we'll do the Right Thing - Iterator itt = attributesList.iterator (); - while(itt.hasNext()) - { - Attribute attribute = (Attribute) itt.next(); - String qname = attribute.name; - int index; - - // default NS declaration? - if (getFeature (FEATURE + "string-interning")) { - if ("xmlns" == qname) - continue; - } else { - if ("xmlns".equals(qname)) - continue; - } - //Illegal in the new Namespaces Draft - //should it be only in 1.1 docs?? - if (qname.equals (":")) - fatal ("namespace names consisting of a single colon " + - "character are invalid"); - index = qname.indexOf (':'); - - // NS prefix declaration? - if (index == 5 && qname.startsWith ("xmlns")) - continue; - - // it's not a NS decl; patch namespace info items - if (prefixStack.processName (qname, nsTemp, true) == null) - fatal ("undeclared attribute prefix in: " + qname); - else { - attribute.nameSpace = nsTemp[0]; - attribute.localName = nsTemp[1]; - } - } - } - - // save element name so attribute callbacks work - elementName = elname; - if (namespaces) { - if (prefixStack.processName (elname, nsTemp, false) == null) { - fatal ("undeclared element prefix in: " + elname); - nsTemp [0] = nsTemp [1] = ""; - } - handler.startElement (nsTemp [0], nsTemp [1], elname, this); - } else - handler.startElement ("", "", elname, this); - // elementName = null; - - // elements with no attributes are pretty common! - if (attributes) { - attributesList.clear(); - attributeCount = 0; - attributes = false; - } - } - - void endElement (String elname) + { + if (!attributes) + { + attributes = true; + if (namespaces) + { + prefixStack.pushContext(); + } + } + + // process namespace decls immediately; + // then maybe forget this as an attribute + if (namespaces) + { + int index; + + // default NS declaration? + if (stringInterning) + { + if ("xmlns" == qname) + { + declarePrefix("", value); + if (!xmlNames) + { + return; + } + } + // NS prefix declaration? + else if ((index = qname.indexOf(':')) == 5 + && qname.startsWith("xmlns")) + { + String prefix = qname.substring(6); + + if (prefix.equals("")) + { + fatal("missing prefix " + + "in namespace declaration attribute"); + } + if (value.length() == 0) + { + verror("missing URI in namespace declaration attribute: " + + qname); + } + else + { + declarePrefix(prefix, value); + } + if (!xmlNames) + { + return; + } + } + } + else + { + if ("xmlns".equals(qname)) + { + declarePrefix("", value); + if (!xmlNames) + { + return; + } + } + // NS prefix declaration? + else if ((index = qname.indexOf(':')) == 5 + && qname.startsWith("xmlns")) + { + String prefix = qname.substring(6); + + if (value.length() == 0) + { + verror("missing URI in namespace decl attribute: " + + qname); + } + else + { + declarePrefix(prefix, value); + } + if (!xmlNames) + { + return; + } + } + } + } + // remember this attribute ... + attributeCount++; + + // attribute type comes from querying parser's DTD records + attributesList.add(new Attribute(qname, value, isSpecified)); + + } + + void startElement(String elname) throws SAXException - { - ContentHandler handler = contentHandler; - - if (!namespaces) { - handler.endElement ("", "", elname); - return; - } - prefixStack.processName (elname, nsTemp, false); - handler.endElement (nsTemp [0], nsTemp [1], elname); + { + ContentHandler handler = contentHandler; - Enumeration prefixes = prefixStack.getDeclaredPrefixes (); + // + // NOTE: this implementation of namespace support adds something + // like six percent to parsing CPU time, in a large (~50 MB) + // document that doesn't use namespaces at all. (Measured by PC + // sampling, with a bug where endElement processing was omitted.) + // [Measurement referred to older implementation, older JVM ...] + // + // It ought to become notably faster in such cases. Most + // costs are the prefix stack calling Hashtable.get() (2%), + // String.hashCode() (1.5%) and about 1.3% each for pushing + // the context, and two chunks of name processing. + // + + if (!attributes) + { + if (namespaces) + { + prefixStack.pushContext(); + } + } + else if (namespaces) + { + + // now we can patch up namespace refs; we saw all the + // declarations, so now we'll do the Right Thing + Iterator itt = attributesList.iterator(); + while (itt.hasNext()) + { + Attribute attribute = (Attribute) itt.next(); + String qname = attribute.name; + int index; + + // default NS declaration? + if (stringInterning) + { + if ("xmlns" == qname) + { + continue; + } + } + else + { + if ("xmlns".equals(qname)) + { + continue; + } + } + //Illegal in the new Namespaces Draft + //should it be only in 1.1 docs?? + if (qname.equals (":")) + { + fatal("namespace names consisting of a single colon " + + "character are invalid"); + } + index = qname.indexOf(':'); + + // NS prefix declaration? + if (index == 5 && qname.startsWith("xmlns")) + { + continue; + } + + // it's not a NS decl; patch namespace info items + if (prefixStack.processName(qname, nsTemp, true) == null) + { + fatal("undeclared attribute prefix in: " + qname); + } + else + { + attribute.nameSpace = nsTemp[0]; + attribute.localName = nsTemp[1]; + } + } + } + + // save element name so attribute callbacks work + elementName = elname; + if (namespaces) + { + if (prefixStack.processName(elname, nsTemp, false) == null) + { + fatal("undeclared element prefix in: " + elname); + nsTemp[0] = nsTemp[1] = ""; + } + handler.startElement(nsTemp[0], nsTemp[1], elname, this); + } + else + { + handler.startElement("", "", elname, this); + } + // elementName = null; + + // elements with no attributes are pretty common! + if (attributes) + { + attributesList.clear(); + attributeCount = 0; + attributes = false; + } + } + + void endElement(String elname) + throws SAXException + { + ContentHandler handler = contentHandler; - while (prefixes.hasMoreElements ()) - handler.endPrefixMapping ((String) prefixes.nextElement ()); - prefixStack.popContext (); - } + if (!namespaces) + { + handler.endElement("", "", elname); + return; + } + prefixStack.processName(elname, nsTemp, false); + handler.endElement(nsTemp[0], nsTemp[1], elname); + + Enumeration prefixes = prefixStack.getDeclaredPrefixes(); + + while (prefixes.hasMoreElements()) + { + handler.endPrefixMapping((String) prefixes.nextElement()); + } + prefixStack.popContext(); + } - void startCDATA () + void startCDATA() throws SAXException - { - lexicalHandler.startCDATA (); - } + { + lexicalHandler.startCDATA(); + } - void charData (char ch[], int start, int length) + void charData(char[] ch, int start, int length) throws SAXException - { - contentHandler.characters (ch, start, length); - } + { + contentHandler.characters(ch, start, length); + } - void endCDATA () + void endCDATA() throws SAXException - { - lexicalHandler.endCDATA (); - } + { + lexicalHandler.endCDATA(); + } - void ignorableWhitespace (char ch[], int start, int length) + void ignorableWhitespace(char[] ch, int start, int length) throws SAXException - { - contentHandler.ignorableWhitespace (ch, start, length); - } + { + contentHandler.ignorableWhitespace(ch, start, length); + } - void processingInstruction (String target, String data) + void processingInstruction(String target, String data) throws SAXException - { - contentHandler.processingInstruction (target, data); - } + { + contentHandler.processingInstruction(target, data); + } - void comment (char ch[], int start, int length) + void comment(char[] ch, int start, int length) throws SAXException - { - if (lexicalHandler != base) - lexicalHandler.comment (ch, start, length); - } + { + if (lexicalHandler != base) + { + lexicalHandler.comment(ch, start, length); + } + } - void fatal (String message) + void fatal(String message) throws SAXException - { - SAXParseException fatal; - - fatal = new SAXParseException (message, this); - errorHandler.fatalError (fatal); - - // Even if the application can continue ... we can't! - throw fatal; - } + { + SAXParseException fatal; + + fatal = new SAXParseException(message, this); + errorHandler.fatalError(fatal); + + // Even if the application can continue ... we can't! + throw fatal; + } - // We can safely report a few validity errors that - // make layered SAX2 DTD validation more conformant - void verror (String message) + // We can safely report a few validity errors that + // make layered SAX2 DTD validation more conformant + void verror(String message) throws SAXException - { - SAXParseException err; - - err = new SAXParseException (message, this); - errorHandler.error (err); - } - - void warn (String message) + { + SAXParseException err; + + err = new SAXParseException(message, this); + errorHandler.error(err); + } + + void warn(String message) throws SAXException - { - SAXParseException err; - - err = new SAXParseException (message, this); - errorHandler.warning (err); - } + { + SAXParseException err; + + err = new SAXParseException(message, this); + errorHandler.warning(err); + } + // + // Implementation of org.xml.sax.Attributes. + // + + /** + * <b>SAX1 AttributeList, SAX2 Attributes</b> method + * (don't invoke on parser); + */ + public int getLength() + { + return attributesList.size(); + } - // - // Implementation of org.xml.sax.Attributes. - // + /** + * <b>SAX2 Attributes</b> method (don't invoke on parser); + */ + public String getURI(int index) + { + if (index < 0 || index >= attributesList.size()) + { + return null; + } + return ((Attribute) attributesList.get(index)).nameSpace; + } - /** - * <b>SAX1 AttributeList, SAX2 Attributes</b> method - * (don't invoke on parser); - */ - public int getLength () - { - return attributesList.size(); - } + /** + * <b>SAX2 Attributes</b> method (don't invoke on parser); + */ + public String getLocalName(int index) + { + if (index < 0 || index >= attributesList.size()) + { + return null; + } + Attribute attr = (Attribute) attributesList.get(index); + // FIXME attr.localName is sometimes null, why? + if (namespaces && attr.localName == null) + { + // XXX fix this here for now + int ci = attr.name.indexOf(':'); + attr.localName = (ci == -1) ? attr.name : + attr.name.substring(ci + 1); + } + return (attr.localName == null) ? "" : attr.localName; + } - /** - * <b>SAX2 Attributes</b> method (don't invoke on parser); - */ - public String getURI (int index) - { - if (index < 0 || index >= attributesList.size()) - { - return null; - } - return ((Attribute) attributesList.get(index)).nameSpace; - } + /** + * <b>SAX2 Attributes</b> method (don't invoke on parser); + */ + public String getQName(int index) + { + if (index < 0 || index >= attributesList.size()) + { + return null; + } + Attribute attr = (Attribute) attributesList.get(index); + return (attr.name == null) ? "" : attr.name; + } - /** - * <b>SAX2 Attributes</b> method (don't invoke on parser); - */ - public String getLocalName (int index) - { - if (index < 0 || index >= attributesList.size()) - { - return null; - } - Attribute attr = (Attribute) attributesList.get(index); - // FIXME attr.localName is sometimes null, why? - if (namespaces && attr.localName == null) - { - // XXX fix this here for now - int ci = attr.name.indexOf(':'); - attr.localName = (ci == -1) ? attr.name : - attr.name.substring(ci + 1); - } - return (attr.localName == null) ? "" : attr.localName; - } + /** + * <b>SAX1 AttributeList</b> method (don't invoke on parser); + */ + public String getName(int index) + { + return getQName(index); + } - /** - * <b>SAX2 Attributes</b> method (don't invoke on parser); - */ - public String getQName (int index) - { - if (index < 0 || index >= attributesList.size()) - { - return null; - } - Attribute attr = (Attribute) attributesList.get(index); - return (attr.name == null) ? "" : attr.name; - } + /** + * <b>SAX1 AttributeList, SAX2 Attributes</b> method + * (don't invoke on parser); + */ + public String getType(int index) + { + if (index < 0 || index >= attributesList.size()) + { + return null; + } + String type = parser.getAttributeType(elementName, getQName(index)); + if (type == null) + { + return "CDATA"; + } + // ... use DeclHandler.attributeDecl to see enumerations + if (type == "ENUMERATION") + { + return "NMTOKEN"; + } + return type; + } - /** - * <b>SAX1 AttributeList</b> method (don't invoke on parser); - */ - public String getName (int index) - { - return getQName(index); - } + /** + * <b>SAX1 AttributeList, SAX2 Attributes</b> method + * (don't invoke on parser); + */ + public String getValue(int index) + { + if (index < 0 || index >= attributesList.size()) + { + return null; + } + return ((Attribute) attributesList.get(index)).value; + } - /** - * <b>SAX1 AttributeList, SAX2 Attributes</b> method - * (don't invoke on parser); - */ - public String getType (int index) + /** + * <b>SAX2 Attributes</b> method (don't invoke on parser); + */ + public int getIndex(String uri, String local) { - if (index < 0 || index >= attributesList.size()) - { - return null; - } - String type = parser.getAttributeType(elementName, getQName(index)); - if (type == null) - { - return "CDATA"; - } - // ... use DeclHandler.attributeDecl to see enumerations - if (type == "ENUMERATION") - { - return "NMTOKEN"; - } - return type; - } - + int length = getLength(); + + for (int i = 0; i < length; i++) + { + if (!getURI(i).equals(uri)) + { + continue; + } + if (getLocalName(i).equals(local)) + { + return i; + } + } + return -1; + } - /** - * <b>SAX1 AttributeList, SAX2 Attributes</b> method - * (don't invoke on parser); - */ - public String getValue (int index) - { - if (index < 0 || index >= attributesList.size()) + /** + * <b>SAX2 Attributes</b> method (don't invoke on parser); + */ + public int getIndex(String xmlName) + { + int length = getLength(); + + for (int i = 0; i < length; i++) + { + if (getQName(i).equals(xmlName)) { - return null; + return i; } - return ((Attribute) attributesList.get(index)).value; - } + } + return -1; + } + /** + * <b>SAX2 Attributes</b> method (don't invoke on parser); + */ + public String getType(String uri, String local) + { + int index = getIndex(uri, local); + + if (index < 0) + { + return null; + } + return getType(index); + } - /** - * <b>SAX2 Attributes</b> method (don't invoke on parser); - */ - public int getIndex (String uri, String local) - { - int length = getLength(); + /** + * <b>SAX1 AttributeList, SAX2 Attributes</b> method + * (don't invoke on parser); + */ + public String getType(String xmlName) + { + int index = getIndex(xmlName); + + if (index < 0) + { + return null; + } + return getType(index); + } - for (int i = 0; i < length; i++) - { - if (!getURI(i).equals(uri)) - { - continue; - } - if (getLocalName(i).equals(local)) - { - return i; - } - } - return -1; - } + /** + * <b>SAX Attributes</b> method (don't invoke on parser); + */ + public String getValue(String uri, String local) + { + int index = getIndex(uri, local); + + if (index < 0) + { + return null; + } + return getValue(index); + } + /** + * <b>SAX1 AttributeList, SAX2 Attributes</b> method + * (don't invoke on parser); + */ + public String getValue(String xmlName) + { + int index = getIndex(xmlName); + + if (index < 0) + { + return null; + } + return getValue(index); + } - /** - * <b>SAX2 Attributes</b> method (don't invoke on parser); - */ - public int getIndex (String xmlName) - { - int length = getLength(); + // + // Implementation of org.xml.sax.ext.Attributes2 + // + + /** @return false unless the attribute was declared in the DTD. + * @throws java.lang.ArrayIndexOutOfBoundsException + * When the supplied index does not identify an attribute. + */ + public boolean isDeclared(int index) + { + if (index < 0 || index >= attributeCount) + { + throw new ArrayIndexOutOfBoundsException(); + } + String type = parser.getAttributeType(elementName, getQName(index)); + return (type != null); + } - for (int i = 0; i < length; i++) - { - if (getQName(i).equals(xmlName)) - { - return i; - } - } - return -1; - } + /** @return false unless the attribute was declared in the DTD. + * @throws java.lang.IllegalArgumentException + * When the supplied names do not identify an attribute. + */ + public boolean isDeclared(String qName) + { + int index = getIndex(qName); + if (index < 0) + { + throw new IllegalArgumentException(); + } + String type = parser.getAttributeType(elementName, qName); + return (type != null); + } + /** @return false unless the attribute was declared in the DTD. + * @throws java.lang.IllegalArgumentException + * When the supplied names do not identify an attribute. + */ + public boolean isDeclared(String uri, String localName) + { + int index = getIndex(uri, localName); + return isDeclared(index); + } - /** - * <b>SAX2 Attributes</b> method (don't invoke on parser); - */ - public String getType (String uri, String local) - { - int index = getIndex(uri, local); + /** + * <b>SAX-ext Attributes2</b> method (don't invoke on parser); + */ + public boolean isSpecified(int index) + { + return ((Attribute) attributesList.get(index)).specified; + } - if (index < 0) - { - return null; - } - return getType(index); - } + /** + * <b>SAX-ext Attributes2</b> method (don't invoke on parser); + */ + public boolean isSpecified(String uri, String local) + { + int index = getIndex (uri, local); + return isSpecified(index); + } + /** + * <b>SAX-ext Attributes2</b> method (don't invoke on parser); + */ + public boolean isSpecified(String xmlName) + { + int index = getIndex (xmlName); + return isSpecified(index); + } - /** - * <b>SAX1 AttributeList, SAX2 Attributes</b> method - * (don't invoke on parser); - */ - public String getType (String xmlName) - { - int index = getIndex(xmlName); + // + // Implementation of org.xml.sax.Locator. + // - if (index < 0) - { - return null; - } - return getType(index); - } + /** + * <b>SAX Locator</b> method (don't invoke on parser); + */ + public String getPublicId() + { + return null; // FIXME track public IDs too + } + /** + * <b>SAX Locator</b> method (don't invoke on parser); + */ + public String getSystemId() + { + if (entityStack.empty()) + { + return null; + } + else + { + return (String) entityStack.peek(); + } + } - /** - * <b>SAX Attributes</b> method (don't invoke on parser); - */ - public String getValue (String uri, String local) - { - int index = getIndex(uri, local); + /** + * <b>SAX Locator</b> method (don't invoke on parser); + */ + public int getLineNumber() + { + return parser.getLineNumber(); + } - if (index < 0) - { - return null; - } - return getValue(index); - } + /** + * <b>SAX Locator</b> method (don't invoke on parser); + */ + public int getColumnNumber() + { + return parser.getColumnNumber(); + } + // adapter between SAX2 content handler and SAX1 document handler callbacks + private static class Adapter + implements ContentHandler + { + + private DocumentHandler docHandler; - /** - * <b>SAX1 AttributeList, SAX2 Attributes</b> method - * (don't invoke on parser); - */ - public String getValue (String xmlName) + Adapter(DocumentHandler dh) { - int index = getIndex(xmlName); - - if (index < 0) - { - return null; - } - return getValue(index); + docHandler = dh; } - - // - // Implementation of org.xml.sax.ext.Attributes2 - // - - - /** @return false unless the attribute was declared in the DTD. - * @throws java.lang.ArrayIndexOutOfBoundsException - * When the supplied index does not identify an attribute. - */ - public boolean isDeclared (int index) + public void setDocumentLocator(Locator l) { - if (index < 0 || index >= attributeCount) - throw new ArrayIndexOutOfBoundsException (); - String type = parser.getAttributeType(elementName, getQName(index)); - return (type != null); + docHandler.setDocumentLocator(l); } - - /** @return false unless the attribute was declared in the DTD. - * @throws java.lang.IllegalArgumentException - * When the supplied names do not identify an attribute. - */ - public boolean isDeclared (String qName) + + public void startDocument() + throws SAXException { - int index = getIndex (qName); - if (index < 0) - throw new IllegalArgumentException (); - String type = parser.getAttributeType(elementName, qName); - return (type != null); + docHandler.startDocument(); } - - /** @return false unless the attribute was declared in the DTD. - * @throws java.lang.IllegalArgumentException - * When the supplied names do not identify an attribute. - */ - public boolean isDeclared (String uri, String localName) + + public void processingInstruction(String target, String data) + throws SAXException { - int index = getIndex (uri, localName); - return isDeclared(index); + docHandler.processingInstruction(target, data); } - - - /** - * <b>SAX-ext Attributes2</b> method (don't invoke on parser); - */ - public boolean isSpecified (int index) + + public void startPrefixMapping(String prefix, String uri) { - return ((Attribute) attributesList.get(index)).specified; + /* ignored */ } - /** - * <b>SAX-ext Attributes2</b> method (don't invoke on parser); - */ - public boolean isSpecified (String uri, String local) + public void startElement(String namespace, + String local, + String name, + Attributes attrs) + throws SAXException { - int index = getIndex (uri, local); - return isSpecified(index); + docHandler.startElement(name, (AttributeList) attrs); } - /** - * <b>SAX-ext Attributes2</b> method (don't invoke on parser); - */ - public boolean isSpecified (String xmlName) + public void characters(char[] buf, int offset, int len) + throws SAXException { - int index = getIndex (xmlName); - return isSpecified(index); + docHandler.characters(buf, offset, len); } - - // - // Implementation of org.xml.sax.Locator. - // - - /** - * <b>SAX Locator</b> method (don't invoke on parser); - */ - public String getPublicId () + public void ignorableWhitespace(char[] buf, int offset, int len) + throws SAXException { - return null; // FIXME track public IDs too + docHandler.ignorableWhitespace(buf, offset, len); } - /** - * <b>SAX Locator</b> method (don't invoke on parser); - */ - public String getSystemId () + public void skippedEntity(String name) { - if (entityStack.empty ()) - return null; - else - return (String) entityStack.peek (); + /* ignored */ } - /** - * <b>SAX Locator</b> method (don't invoke on parser); - */ - public int getLineNumber () + public void endElement(String u, String l, String name) + throws SAXException { - return parser.getLineNumber (); + docHandler.endElement(name); } - /** - * <b>SAX Locator</b> method (don't invoke on parser); - */ - public int getColumnNumber () + public void endPrefixMapping(String prefix) { - return parser.getColumnNumber (); + /* ignored */ } - // adapter between SAX2 content handler and SAX1 document handler callbacks - private static class Adapter implements ContentHandler + public void endDocument() + throws SAXException { - private DocumentHandler docHandler; - - Adapter (DocumentHandler dh) - { docHandler = dh; } - - - public void setDocumentLocator (Locator l) - { docHandler.setDocumentLocator (l); } - - public void startDocument () throws SAXException - { docHandler.startDocument (); } - - public void processingInstruction (String target, String data) - throws SAXException - { docHandler.processingInstruction (target, data); } - - public void startPrefixMapping (String prefix, String uri) - { /* ignored */ } - - public void startElement ( - String namespace, - String local, - String name, - Attributes attrs - ) throws SAXException - { docHandler.startElement (name, (AttributeList) attrs); } - - public void characters (char buf [], int offset, int len) - throws SAXException - { docHandler.characters (buf, offset, len); } - - public void ignorableWhitespace (char buf [], int offset, int len) - throws SAXException - { docHandler.ignorableWhitespace (buf, offset, len); } - - public void skippedEntity (String name) - { /* ignored */ } - - public void endElement (String u, String l, String name) - throws SAXException - { docHandler.endElement (name); } - - public void endPrefixMapping (String prefix) - { /* ignored */ } - - public void endDocument () throws SAXException - { docHandler.endDocument (); } + docHandler.endDocument(); } -} - -class Attribute -{ + } + private static class Attribute + { + String name; String value; String nameSpace; String localName; boolean specified; - + Attribute(String name, String value, boolean specified) { - this.name = name; - this.value = value; - this.nameSpace = ""; - this.specified = specified; + this.name = name; + this.value = value; + this.nameSpace = ""; + this.specified = specified; } -} + + } +} diff --git a/gnu/xml/aelfred2/XmlParser.java b/gnu/xml/aelfred2/XmlParser.java index f4abf2229..b29849217 100644 --- a/gnu/xml/aelfred2/XmlParser.java +++ b/gnu/xml/aelfred2/XmlParser.java @@ -53,6 +53,8 @@ Partly derived from code which carried the following notice: package gnu.xml.aelfred2; +import gnu.java.security.action.GetPropertyAction; + import java.io.BufferedInputStream; import java.io.CharConversionException; import java.io.EOFException; @@ -63,12 +65,11 @@ import java.io.Reader; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLConnection; +import java.security.AccessController; -// maintaining 1.1 compatibility for now ... -// Iterator and Hashmap ought to be faster -import java.util.Enumeration; -import java.util.Hashtable; -import java.util.Stack; +import java.util.Iterator; +import java.util.HashMap; +import java.util.LinkedList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -86,1511 +87,1838 @@ import org.xml.sax.SAXException; */ final class XmlParser { - // avoid slow per-character readCh() - private final static boolean USE_CHEATS = true; - - - ////////////////////////////////////////////////////////////////////// - // Constructors. - //////////////////////////////////////////////////////////////////////// - - - /** - * Construct a new parser with no associated handler. - * @see #setHandler - * @see #parse - */ - // package private - XmlParser () - { - } + // avoid slow per-character readCh() + private final static boolean USE_CHEATS = true; - /** - * Set the handler that will receive parsing events. - * @param handler The handler to receive callback events. - * @see #parse - */ - // package private - void setHandler (SAXDriver handler) - { - this.handler = handler; - } - - - /** - * Parse an XML document from the character stream, byte stream, or URI - * that you provide (in that order of preference). Any URI that you - * supply will become the base URI for resolving relative URI, and may - * be used to acquire a reader or byte stream. - * - * <p> Only one thread at a time may use this parser; since it is - * private to this package, post-parse cleanup is done by the caller, - * which MUST NOT REUSE the parser (just null it). - * - * @param systemId Absolute URI of the document; should never be null, - * but may be so iff a reader <em>or</em> a stream is provided. - * @param publicId The public identifier of the document, or null. - * @param reader A character stream; must be null if stream isn't. - * @param stream A byte input stream; must be null if reader isn't. - * @param encoding The suggested encoding, or null if unknown. - * @exception java.lang.Exception Basically SAXException or IOException - */ - // package private - void doParse ( - String systemId, - String publicId, - Reader reader, - InputStream stream, - String encoding - ) throws Exception - { - if (handler == null) - throw new IllegalStateException ("no callback handler"); - - initializeVariables (); - - // predeclare the built-in entities here (replacement texts) - // we don't need to intern(), since we're guaranteed literals - // are always (globally) interned. - setInternalEntity ("amp", "&"); - setInternalEntity ("lt", "<"); - setInternalEntity ("gt", ">"); - setInternalEntity ("apos", "'"); - setInternalEntity ("quot", """); - - try { - // pushURL first to ensure locator is correct in startDocument - // ... it might report an IO or encoding exception. - handler.startDocument (); - pushURL (false, "[document]", - // default baseURI: null - new String [] { publicId, systemId, null}, - reader, stream, encoding, false); - - parseDocument (); - } catch (EOFException e){ - //empty input - error("empty document, with no root element."); - }finally { - if (reader != null) - try { reader.close (); - } catch (IOException e) { /* ignore */ } - if (stream != null) - try { stream.close (); - } catch (IOException e) { /* ignore */ } - if (is != null) - try { is.close (); - } catch (IOException e) { /* ignore */ } - if (reader != null) - try { - reader.close (); - } catch (IOException e) { /* ignore */ - } - scratch = null; - } - } - - - //////////////////////////////////////////////////////////////////////// - // Constants. - //////////////////////////////////////////////////////////////////////// - - // - // Constants for element content type. - // - - /** - * Constant: an element has not been declared. - * @see #getElementContentType - */ - public final static int CONTENT_UNDECLARED = 0; - - /** - * Constant: the element has a content model of ANY. - * @see #getElementContentType - */ - public final static int CONTENT_ANY = 1; - - /** - * Constant: the element has declared content of EMPTY. - * @see #getElementContentType - */ - public final static int CONTENT_EMPTY = 2; - - /** - * Constant: the element has mixed content. - * @see #getElementContentType - */ - public final static int CONTENT_MIXED = 3; - - /** - * Constant: the element has element content. - * @see #getElementContentType - */ - public final static int CONTENT_ELEMENTS = 4; - - - // - // Constants for the entity type. - // - - /** - * Constant: the entity has not been declared. - * @see #getEntityType - */ - public final static int ENTITY_UNDECLARED = 0; - - /** - * Constant: the entity is internal. - * @see #getEntityType - */ - public final static int ENTITY_INTERNAL = 1; - - /** - * Constant: the entity is external, non-parsable data. - * @see #getEntityType - */ - public final static int ENTITY_NDATA = 2; - - /** - * Constant: the entity is external XML data. - * @see #getEntityType - */ - public final static int ENTITY_TEXT = 3; - - - // - // Attribute type constants are interned literal strings. - // - - // - // Constants for supported encodings. "external" is just a flag. - // - private final static int ENCODING_EXTERNAL = 0; - private final static int ENCODING_UTF_8 = 1; - private final static int ENCODING_ISO_8859_1 = 2; - private final static int ENCODING_UCS_2_12 = 3; - private final static int ENCODING_UCS_2_21 = 4; - private final static int ENCODING_UCS_4_1234 = 5; - private final static int ENCODING_UCS_4_4321 = 6; - private final static int ENCODING_UCS_4_2143 = 7; - private final static int ENCODING_UCS_4_3412 = 8; - private final static int ENCODING_ASCII = 9; - - - // - // Constants for attribute default value. - // - - /** - * Constant: the attribute is not declared. - * @see #getAttributeDefaultValueType - */ - public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; - - /** - * Constant: the attribute has a literal default value specified. - * @see #getAttributeDefaultValueType - * @see #getAttributeDefaultValue - */ - public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; - - /** - * Constant: the attribute was declared #IMPLIED. - * @see #getAttributeDefaultValueType - */ - public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; - - /** - * Constant: the attribute was declared #REQUIRED. - * @see #getAttributeDefaultValueType - */ - public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; - - /** - * Constant: the attribute was declared #FIXED. - * @see #getAttributeDefaultValueType - * @see #getAttributeDefaultValue - */ - public final static int ATTRIBUTE_DEFAULT_FIXED = 34; - - - // - // Constants for input. - // - private final static int INPUT_NONE = 0; - private final static int INPUT_INTERNAL = 1; - private final static int INPUT_STREAM = 3; - private final static int INPUT_READER = 5; - - - // - // Flags for reading literals. - // - // expand general entity refs (attribute values in dtd and content) - private final static int LIT_ENTITY_REF = 2; - // normalize this value (space chars) (attributes, public ids) - private final static int LIT_NORMALIZE = 4; - // literal is an attribute value - private final static int LIT_ATTRIBUTE = 8; - // don't expand parameter entities - private final static int LIT_DISABLE_PE = 16; - // don't expand [or parse] character refs - private final static int LIT_DISABLE_CREF = 32; - // don't parse general entity refs - private final static int LIT_DISABLE_EREF = 64; - // literal is a public ID value - private final static int LIT_PUBID = 256; - - - // - // Flags affecting PE handling in DTDs (if expandPE is true). - // PEs expand with space padding, except inside literals. - // - private final static int CONTEXT_NORMAL = 0; - private final static int CONTEXT_LITERAL = 1; + //////////////////////////////////////////////////////////////////////// + // Constants. + //////////////////////////////////////////////////////////////////////// + + // + // Constants for element content type. + // + + /** + * Constant: an element has not been declared. + * @see #getElementContentType + */ + public final static int CONTENT_UNDECLARED = 0; + + /** + * Constant: the element has a content model of ANY. + * @see #getElementContentType + */ + public final static int CONTENT_ANY = 1; + + /** + * Constant: the element has declared content of EMPTY. + * @see #getElementContentType + */ + public final static int CONTENT_EMPTY = 2; + + /** + * Constant: the element has mixed content. + * @see #getElementContentType + */ + public final static int CONTENT_MIXED = 3; + + /** + * Constant: the element has element content. + * @see #getElementContentType + */ + public final static int CONTENT_ELEMENTS = 4; + + + // + // Constants for the entity type. + // + + /** + * Constant: the entity has not been declared. + * @see #getEntityType + */ + public final static int ENTITY_UNDECLARED = 0; + + /** + * Constant: the entity is internal. + * @see #getEntityType + */ + public final static int ENTITY_INTERNAL = 1; + + /** + * Constant: the entity is external, non-parsable data. + * @see #getEntityType + */ + public final static int ENTITY_NDATA = 2; + + /** + * Constant: the entity is external XML data. + * @see #getEntityType + */ + public final static int ENTITY_TEXT = 3; + + // + // Attribute type constants are interned literal strings. + // + + // + // Constants for supported encodings. "external" is just a flag. + // + private final static int ENCODING_EXTERNAL = 0; + private final static int ENCODING_UTF_8 = 1; + private final static int ENCODING_ISO_8859_1 = 2; + private final static int ENCODING_UCS_2_12 = 3; + private final static int ENCODING_UCS_2_21 = 4; + private final static int ENCODING_UCS_4_1234 = 5; + private final static int ENCODING_UCS_4_4321 = 6; + private final static int ENCODING_UCS_4_2143 = 7; + private final static int ENCODING_UCS_4_3412 = 8; + private final static int ENCODING_ASCII = 9; + + // + // Constants for attribute default value. + // + + /** + * Constant: the attribute is not declared. + * @see #getAttributeDefaultValueType + */ + public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; + + /** + * Constant: the attribute has a literal default value specified. + * @see #getAttributeDefaultValueType + * @see #getAttributeDefaultValue + */ + public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; + + /** + * Constant: the attribute was declared #IMPLIED. + * @see #getAttributeDefaultValueType + */ + public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; + + /** + * Constant: the attribute was declared #REQUIRED. + * @see #getAttributeDefaultValueType + */ + public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; + + /** + * Constant: the attribute was declared #FIXED. + * @see #getAttributeDefaultValueType + * @see #getAttributeDefaultValue + */ + public final static int ATTRIBUTE_DEFAULT_FIXED = 34; + + // + // Constants for input. + // + private final static int INPUT_NONE = 0; + private final static int INPUT_INTERNAL = 1; + private final static int INPUT_STREAM = 3; + private final static int INPUT_READER = 5; + + // + // Flags for reading literals. + // + // expand general entity refs (attribute values in dtd and content) + private final static int LIT_ENTITY_REF = 2; + // normalize this value (space chars) (attributes, public ids) + private final static int LIT_NORMALIZE = 4; + // literal is an attribute value + private final static int LIT_ATTRIBUTE = 8; + // don't expand parameter entities + private final static int LIT_DISABLE_PE = 16; + // don't expand [or parse] character refs + private final static int LIT_DISABLE_CREF = 32; + // don't parse general entity refs + private final static int LIT_DISABLE_EREF = 64; + // literal is a public ID value + private final static int LIT_PUBID = 256; + + // + // Flags affecting PE handling in DTDs (if expandPE is true). + // PEs expand with space padding, except inside literals. + // + private final static int CONTEXT_NORMAL = 0; + private final static int CONTEXT_LITERAL = 1; + + // Emit warnings for relative URIs with no base URI. + static boolean uriWarnings; + static + { + String key = "gnu.xml.aelfred2.XmlParser.uriWarnings"; + GetPropertyAction a = new GetPropertyAction(key); + uriWarnings = "true".equals(AccessController.doPrivileged(a)); + } + + // + // The current XML handler interface. + // + private SAXDriver handler; + + // + // I/O information. + // + private Reader reader; // current reader + private InputStream is; // current input stream + private int line; // current line number + private int column; // current column number + private int sourceType; // type of input source + private LinkedList inputStack; // stack of input soruces + private URLConnection externalEntity; // current external entity + private int encoding; // current character encoding + private int currentByteCount; // bytes read from current source + private InputSource scratch; // temporary + + // + // Buffers for decoded but unparsed character input. + // + private char[] readBuffer; + private int readBufferPos; + private int readBufferLength; + private int readBufferOverflow; // overflow from last data chunk. + + // + // Buffer for undecoded raw byte input. + // + private final static int READ_BUFFER_MAX = 16384; + private byte[] rawReadBuffer; + + + // + // Buffer for attribute values, char refs, DTD stuff. + // + private static int DATA_BUFFER_INITIAL = 4096; + private char[] dataBuffer; + private int dataBufferPos; + + // + // Buffer for parsed names. + // + private static int NAME_BUFFER_INITIAL = 1024; + private char[] nameBuffer; + private int nameBufferPos; + + // + // Save any standalone flag + // + private boolean docIsStandalone; + + // + // Hashtables for DTD information on elements, entities, and notations. + // Populated until we start ignoring decls (because of skipping a PE) + // + private HashMap elementInfo; + private HashMap entityInfo; + private HashMap notationInfo; + private boolean skippedPE; + + // + // Element type currently in force. + // + private String currentElement; + private int currentElementContent; + + // + // Stack of entity names, to detect recursion. + // + private LinkedList entityStack; + + // + // PE expansion is enabled in most chunks of the DTD, not all. + // When it's enabled, literals are treated differently. + // + private boolean inLiteral; + private boolean expandPE; + private boolean peIsError; + + // + // can't report entity expansion inside two constructs: + // - attribute expansions (internal entities only) + // - markup declarations (parameter entities only) + // + private boolean doReport; + + // + // Symbol table, for caching interned names. + // + // These show up wherever XML names or nmtokens are used: naming elements, + // attributes, PIs, notations, entities, and enumerated attribute values. + // + // NOTE: This hashtable doesn't grow. The default size is intended to be + // rather large for most documents. Example: one snapshot of the DocBook + // XML 4.1 DTD used only about 350 such names. As a rule, only pathological + // documents (ones that don't reuse names) should ever see much collision. + // + // Be sure that SYMBOL_TABLE_LENGTH always stays prime, for best hashing. + // "2039" keeps the hash table size at about two memory pages on typical + // 32 bit hardware. + // + private final static int SYMBOL_TABLE_LENGTH = 2039; + + private Object[][] symbolTable; + + // + // Hash table of attributes found in current start tag. + // + private String[] tagAttributes; + private int tagAttributePos; + + // + // Utility flag: have we noticed a CR while reading the last + // data chunk? If so, we will have to go back and normalise + // CR or CR/LF line ends. + // + private boolean sawCR; + + // + // Utility flag: are we in CDATA? If so, whitespace isn't ignorable. + // + private boolean inCDATA; + + // + // Xml version. + // + private static final int XML_10 = 0; + private static final int XML_11 = 1; + private int xmlVersion = XML_10; + + ////////////////////////////////////////////////////////////////////// + // Constructors. + //////////////////////////////////////////////////////////////////////// + + /** + * Construct a new parser with no associated handler. + * @see #setHandler + * @see #parse + */ + // package private + XmlParser() + { + } + /** + * Set the handler that will receive parsing events. + * @param handler The handler to receive callback events. + * @see #parse + */ + // package private + void setHandler(SAXDriver handler) + { + this.handler = handler; + } - ////////////////////////////////////////////////////////////////////// - // Error reporting. - ////////////////////////////////////////////////////////////////////// + /** + * Parse an XML document from the character stream, byte stream, or URI + * that you provide (in that order of preference). Any URI that you + * supply will become the base URI for resolving relative URI, and may + * be used to acquire a reader or byte stream. + * + * <p> Only one thread at a time may use this parser; since it is + * private to this package, post-parse cleanup is done by the caller, + * which MUST NOT REUSE the parser (just null it). + * + * @param systemId Absolute URI of the document; should never be null, + * but may be so iff a reader <em>or</em> a stream is provided. + * @param publicId The public identifier of the document, or null. + * @param reader A character stream; must be null if stream isn't. + * @param stream A byte input stream; must be null if reader isn't. + * @param encoding The suggested encoding, or null if unknown. + * @exception java.lang.Exception Basically SAXException or IOException + */ + // package private + void doParse(String systemId, String publicId, Reader reader, + InputStream stream, String encoding) + throws Exception + { + if (handler == null) + { + throw new IllegalStateException("no callback handler"); + } + initializeVariables(); + + // predeclare the built-in entities here (replacement texts) + // we don't need to intern(), since we're guaranteed literals + // are always (globally) interned. + setInternalEntity("amp", "&"); + setInternalEntity("lt", "<"); + setInternalEntity("gt", ">"); + setInternalEntity("apos", "'"); + setInternalEntity("quot", """); + + try + { + // pushURL first to ensure locator is correct in startDocument + // ... it might report an IO or encoding exception. + handler.startDocument(); + pushURL(false, "[document]", + // default baseURI: null + new ExternalIdentifiers(publicId, systemId, null), + reader, stream, encoding, false); + + parseDocument(); + } + catch (EOFException e) + { + //empty input + error("empty document, with no root element."); + } + finally + { + if (reader != null) + { + try + { + reader.close(); + } + catch (IOException e) + { + /* ignore */ + } + } + if (stream != null) + { + try + { + stream.close(); + } + catch (IOException e) + { + /* ignore */ + } + } + if (is != null) + { + try + { + is.close(); + } + catch (IOException e) + { + /* ignore */ + } + } + scratch = null; + } + } - /** - * Report an error. - * @param message The error message. - * @param textFound The text that caused the error (or null). - * @see SAXDriver#error - * @see #line - */ - private void error (String message, String textFound, String textExpected) + ////////////////////////////////////////////////////////////////////// + // Error reporting. + ////////////////////////////////////////////////////////////////////// + + /** + * Report an error. + * @param message The error message. + * @param textFound The text that caused the error (or null). + * @see SAXDriver#error + * @see #line + */ + private void error(String message, String textFound, String textExpected) throws SAXException - { - if (textFound != null) { - message = message + " (found \"" + textFound + "\")"; - } - if (textExpected != null) { - message = message + " (expected \"" + textExpected + "\")"; - } - handler.fatal (message); - - // "can't happen" - throw new SAXException (message); - } - + { + if (textFound != null) + { + message = message + " (found \"" + textFound + "\")"; + } + if (textExpected != null) + { + message = message + " (expected \"" + textExpected + "\")"; + } + handler.fatal(message); + + // "can't happen" + throw new SAXException(message); + } - /** - * Report a serious error. - * @param message The error message. - * @param textFound The text that caused the error (or null). - */ - private void error (String message, char textFound, String textExpected) + /** + * Report a serious error. + * @param message The error message. + * @param textFound The text that caused the error (or null). + */ + private void error(String message, char textFound, String textExpected) throws SAXException - { - error (message, new Character (textFound).toString (), textExpected); - } + { + error(message, new Character(textFound).toString(), textExpected); + } - /** Report typical case fatal errors. */ - private void error (String message) + /** + * Report typical case fatal errors. + */ + private void error(String message) throws SAXException - { - handler.fatal (message); - } - - - ////////////////////////////////////////////////////////////////////// - // Major syntactic productions. - ////////////////////////////////////////////////////////////////////// + { + handler.fatal(message); + } + ////////////////////////////////////////////////////////////////////// + // Major syntactic productions. + ////////////////////////////////////////////////////////////////////// - /** - * Parse an XML document. - * <pre> - * [1] document ::= prolog element Misc* - * </pre> - * <p>This is the top-level parsing function for a single XML - * document. As a minimum, a well-formed document must have - * a document element, and a valid document must have a prolog - * (one with doctype) as well. - */ - private void parseDocument () + /** + * Parse an XML document. + * <pre> + * [1] document ::= prolog element Misc* + * </pre> + * <p>This is the top-level parsing function for a single XML + * document. As a minimum, a well-formed document must have + * a document element, and a valid document must have a prolog + * (one with doctype) as well. + */ + private void parseDocument() throws Exception - { - try { // added by MHK - boolean sawDTD = parseProlog (); - require ('<'); - parseElement (!sawDTD); - } catch (EOFException ee) { // added by MHK - error("premature end of file", "[EOF]", null); - } - - try { - parseMisc (); //skip all white, PIs, and comments - char c = readCh (); //if this doesn't throw an exception... - error ("unexpected characters after document end", c, null); - } catch (EOFException e) { - return; - } - } - - static final char startDelimComment [] = { '<', '!', '-', '-' }; - static final char endDelimComment [] = { '-', '-' }; + { + try + { // added by MHK + boolean sawDTD = parseProlog(); + require('<'); + parseElement(!sawDTD); + } + catch (EOFException ee) + { // added by MHK + error("premature end of file", "[EOF]", null); + } + + try + { + parseMisc(); //skip all white, PIs, and comments + char c = readCh(); //if this doesn't throw an exception... + error("unexpected characters after document end", c, null); + } + catch (EOFException e) + { + return; + } + } + + static final char[] startDelimComment = { '<', '!', '-', '-' }; + static final char[] endDelimComment = { '-', '-' }; - /** - * Skip a comment. - * <pre> - * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->" - * </pre> - * <p> (The <code><!--</code> has already been read.) - */ - private void parseComment () + /** + * Skip a comment. + * <pre> + * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->" + * </pre> + * <p> (The <code><!--</code> has already been read.) + */ + private void parseComment() throws Exception - { - char c; - boolean saved = expandPE; - - expandPE = false; - parseUntil (endDelimComment); - require ('>'); - expandPE = saved; - handler.comment (dataBuffer, 0, dataBufferPos); - dataBufferPos = 0; - } + { + char c; + boolean saved = expandPE; + + expandPE = false; + parseUntil(endDelimComment); + require('>'); + expandPE = saved; + handler.comment(dataBuffer, 0, dataBufferPos); + dataBufferPos = 0; + } + + static final char[] startDelimPI = { '<', '?' }; + static final char[] endDelimPI = { '?', '>' }; - static final char startDelimPI [] = { '<', '?' }; - static final char endDelimPI [] = { '?', '>' }; - - /** - * Parse a processing instruction and do a call-back. - * <pre> - * [16] PI ::= '<?' PITarget - * (S (Char* - (Char* '?>' Char*)))? - * '?>' - * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') ) - * </pre> - * <p> (The <code><?</code> has already been read.) - */ - private void parsePI () + /** + * Parse a processing instruction and do a call-back. + * <pre> + * [16] PI ::= '<?' PITarget + * (S (Char* - (Char* '?>' Char*)))? + * '?>' + * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') ) + * </pre> + * <p> (The <code><?</code> has already been read.) + */ + private void parsePI() throws SAXException, IOException - { - String name; - boolean saved = expandPE; - - expandPE = false; - name = readNmtoken (true); - //NE08 - if (name.indexOf(':') >= 0) - error ("Illegal character(':') in processing instruction name ", name, null); - if ("xml".equalsIgnoreCase (name)) - error ("Illegal processing instruction target", name, null); - if (!tryRead (endDelimPI)) { - requireWhitespace (); - parseUntil (endDelimPI); - } - expandPE = saved; - handler.processingInstruction (name, dataBufferToString ()); - } - - - static final char endDelimCDATA [] = { ']', ']', '>' }; + { + String name; + boolean saved = expandPE; + + expandPE = false; + name = readNmtoken(true); + //NE08 + if (name.indexOf(':') >= 0) + { + error("Illegal character(':') in processing instruction name ", + name, null); + } + if ("xml".equalsIgnoreCase(name)) + { + error("Illegal processing instruction target", name, null); + } + if (!tryRead(endDelimPI)) + { + requireWhitespace(); + parseUntil(endDelimPI); + } + expandPE = saved; + handler.processingInstruction(name, dataBufferToString()); + } + + static final char[] endDelimCDATA = { ']', ']', '>' }; - private boolean isDirtyCurrentElement; + private boolean isDirtyCurrentElement; - /** - * Parse a CDATA section. - * <pre> - * [18] CDSect ::= CDStart CData CDEnd - * [19] CDStart ::= '<![CDATA[' - * [20] CData ::= (Char* - (Char* ']]>' Char*)) - * [21] CDEnd ::= ']]>' - * </pre> - * <p> (The '<![CDATA[' has already been read.) - */ - private void parseCDSect () + /** + * Parse a CDATA section. + * <pre> + * [18] CDSect ::= CDStart CData CDEnd + * [19] CDStart ::= '<![CDATA[' + * [20] CData ::= (Char* - (Char* ']]>' Char*)) + * [21] CDEnd ::= ']]>' + * </pre> + * <p> (The '<![CDATA[' has already been read.) + */ + private void parseCDSect() throws Exception - { - parseUntil (endDelimCDATA); - dataBufferFlush (); - } - + { + parseUntil(endDelimCDATA); + dataBufferFlush(); + } - /** - * Parse the prolog of an XML document. - * <pre> - * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)? - * </pre> - * <p>We do not look for the XML declaration here, because it was - * handled by pushURL (). - * @see pushURL - * @return true if a DTD was read. - */ - private boolean parseProlog () + /** + * Parse the prolog of an XML document. + * <pre> + * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)? + * </pre> + * <p>We do not look for the XML declaration here, because it was + * handled by pushURL (). + * @see pushURL + * @return true if a DTD was read. + */ + private boolean parseProlog() throws Exception - { - parseMisc (); - - if (tryRead ("<!DOCTYPE")) { - parseDoctypedecl (); - parseMisc (); - return true; - } - return false; - } + { + parseMisc(); - private void checkLegalVersion (String version) - throws SAXException - { - int len = version.length (); - for (int i = 0; i < len; i++) { - char c = version.charAt (i); - if ('0' <= c && c <= '9') - continue; - if (c == '_' || c == '.' || c == ':' || c == '-') - continue; - if ('a' <= c && c <= 'z') - continue; - if ('A' <= c && c <= 'Z') - continue; - error ("illegal character in version", version, "1.0"); - } - } + if (tryRead("<!DOCTYPE")) + { + parseDoctypedecl(); + parseMisc(); + return true; + } + return false; + } + private void checkLegalVersion(String version) + throws SAXException + { + int len = version.length(); + for (int i = 0; i < len; i++) + { + char c = version.charAt(i); + if ('0' <= c && c <= '9') + { + continue; + } + if (c == '_' || c == '.' || c == ':' || c == '-') + { + continue; + } + if ('a' <= c && c <= 'z') + { + continue; + } + if ('A' <= c && c <= 'Z') + { + continue; + } + error ("illegal character in version", version, "1.0"); + } + } - /** - * Parse the XML declaration. - * <pre> - * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' - * [24] VersionInfo ::= S 'version' Eq - * ("'" VersionNum "'" | '"' VersionNum '"' ) - * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')* - * [32] SDDecl ::= S 'standalone' Eq - * ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' ) - * [80] EncodingDecl ::= S 'encoding' Eq - * ( "'" EncName "'" | "'" EncName "'" ) - * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* - * </pre> - * <p> (The <code><?xml</code> and whitespace have already been read.) - * @return the encoding in the declaration, uppercased; or null - * @see #parseTextDecl - * @see #setupDecoding - */ - private String parseXMLDecl (boolean ignoreEncoding) + /** + * Parse the XML declaration. + * <pre> + * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' + * [24] VersionInfo ::= S 'version' Eq + * ("'" VersionNum "'" | '"' VersionNum '"' ) + * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')* + * [32] SDDecl ::= S 'standalone' Eq + * ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' ) + * [80] EncodingDecl ::= S 'encoding' Eq + * ( "'" EncName "'" | "'" EncName "'" ) + * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* + * </pre> + * <p> (The <code><?xml</code> and whitespace have already been read.) + * @return the encoding in the declaration, uppercased; or null + * @see #parseTextDecl + * @see #setupDecoding + */ + private String parseXMLDecl(boolean ignoreEncoding) throws SAXException, IOException - { - String version; - String encodingName = null; - String standalone = null; - int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; - String inputEncoding = null; + { + String version; + String encodingName = null; + String standalone = null; + int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; + String inputEncoding = null; - switch (this.encoding) + switch (this.encoding) + { + case ENCODING_EXTERNAL: + case ENCODING_UTF_8: + inputEncoding = "UTF-8"; + break; + case ENCODING_ISO_8859_1: + inputEncoding = "ISO-8859-1"; + break; + case ENCODING_UCS_2_12: + inputEncoding = "UTF-16BE"; + break; + case ENCODING_UCS_2_21: + inputEncoding = "UTF-16LE"; + break; + } + + // Read the version. + require("version"); + parseEq(); + checkLegalVersion(version = readLiteral(flags)); + if (!version.equals("1.0")) + { + if (version.equals("1.1")) { - case ENCODING_EXTERNAL: - case ENCODING_UTF_8: - inputEncoding = "UTF-8"; - break; - case ENCODING_ISO_8859_1: - inputEncoding = "ISO-8859-1"; - break; - case ENCODING_UCS_2_12: - inputEncoding = "UTF-16BE"; - break; - case ENCODING_UCS_2_21: - inputEncoding = "UTF-16LE"; - break; + handler.warn("expected XML version 1.0, not: " + version); + xmlVersion = XML_11; + } + else + { + error("illegal XML version", version, "1.0 or 1.1"); } + } + else + { + xmlVersion = XML_10; + } + // Try reading an encoding declaration. + boolean white = tryWhitespace(); + + if (tryRead("encoding")) + { + if (!white) + { + error("whitespace required before 'encoding='"); + } + parseEq(); + encodingName = readLiteral(flags); + if (!ignoreEncoding) + { + setupDecoding(encodingName); + } + } + + // Try reading a standalone declaration + if (encodingName != null) + { + white = tryWhitespace(); + } + if (tryRead("standalone")) + { + if (!white) + { + error("whitespace required before 'standalone='"); + } + parseEq(); + standalone = readLiteral(flags); + if ("yes".equals(standalone)) + { + docIsStandalone = true; + } + else if (!"no".equals(standalone)) + { + error("standalone flag must be 'yes' or 'no'"); + } + } - // Read the version. - require ("version"); - parseEq (); - checkLegalVersion (version = readLiteral (flags)); - if (!version.equals ("1.0")){ - if(version.equals ("1.1")){ - handler.warn ("expected XML version 1.0, not: " + version); - xmlVersion = XML_11; - }else { - error("illegal XML version", version, "1.0 or 1.1"); - } - } - else - xmlVersion = XML_10; - // Try reading an encoding declaration. - boolean white = tryWhitespace (); - - if (tryRead ("encoding")) { - if (!white) - error ("whitespace required before 'encoding='"); - parseEq (); - encodingName = readLiteral (flags); - if (!ignoreEncoding) - setupDecoding (encodingName); - } - - // Try reading a standalone declaration - if (encodingName != null) - white = tryWhitespace (); - if (tryRead ("standalone")) { - if (!white) - error ("whitespace required before 'standalone='"); - parseEq (); - standalone = readLiteral (flags); - if ("yes".equals (standalone)) - docIsStandalone = true; - else if (!"no".equals (standalone)) - error ("standalone flag must be 'yes' or 'no'"); - } - - skipWhitespace (); - require ("?>"); - - if (inputEncoding == null) - { - inputEncoding = encodingName; - } - handler.xmlDecl(version, encodingName, "yes".equals(standalone), - inputEncoding); - - return encodingName; - } + skipWhitespace(); + require("?>"); + if (inputEncoding == null) + { + inputEncoding = encodingName; + } + handler.xmlDecl(version, encodingName, docIsStandalone, + inputEncoding); + + return encodingName; + } - /** - * Parse a text declaration. - * <pre> - * [79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' - * [80] EncodingDecl ::= S 'encoding' Eq - * ( '"' EncName '"' | "'" EncName "'" ) - * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* - * </pre> - * <p> (The <code><?xml</code>' and whitespace have already been read.) - * @return the encoding in the declaration, uppercased; or null - * @see #parseXMLDecl - * @see #setupDecoding - */ - private String parseTextDecl (boolean ignoreEncoding) + /** + * Parse a text declaration. + * <pre> + * [79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' + * [80] EncodingDecl ::= S 'encoding' Eq + * ( '"' EncName '"' | "'" EncName "'" ) + * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* + * </pre> + * <p> (The <code><?xml</code>' and whitespace have already been read.) + * @return the encoding in the declaration, uppercased; or null + * @see #parseXMLDecl + * @see #setupDecoding + */ + private String parseTextDecl(boolean ignoreEncoding) throws SAXException, IOException - { - String encodingName = null; - int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; - - // Read an optional version. - if (tryRead ("version")) { - String version; - parseEq (); - checkLegalVersion (version = readLiteral (flags)); - - if (version.equals ("1.1")){ - if (xmlVersion == XML_10){ - error ("external subset has later version number.", "1.0", version); - } - handler.warn ("expected XML version 1.0, not: " + version); - xmlVersion = XML_11; - }else if(!version.equals ("1.0")) { - error("illegal XML version", version, "1.0 or 1.1"); - } - requireWhitespace (); - } - - - // Read the encoding. - require ("encoding"); - parseEq (); - encodingName = readLiteral (flags); - if (!ignoreEncoding) - setupDecoding (encodingName); - - skipWhitespace (); - require ("?>"); - - return encodingName; - } - + { + String encodingName = null; + int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; + + // Read an optional version. + if (tryRead ("version")) + { + String version; + parseEq(); + checkLegalVersion(version = readLiteral(flags)); + + if (version.equals("1.1")) + { + if (xmlVersion == XML_10) + { + error("external subset has later version number.", "1.0", + version); + } + handler.warn("expected XML version 1.0, not: " + version); + xmlVersion = XML_11; + } + else if (!version.equals("1.0")) + { + error("illegal XML version", version, "1.0 or 1.1"); + } + requireWhitespace(); + } + + // Read the encoding. + require("encoding"); + parseEq(); + encodingName = readLiteral(flags); + if (!ignoreEncoding) + { + setupDecoding(encodingName); + } + skipWhitespace(); + require("?>"); + + return encodingName; + } - /** - * Sets up internal state so that we can decode an entity using the - * specified encoding. This is used when we start to read an entity - * and we have been given knowledge of its encoding before we start to - * read any data (e.g. from a SAX input source or from a MIME type). - * - * <p> It is also used after autodetection, at which point only very - * limited adjustments to the encoding may be used (switching between - * related builtin decoders). - * - * @param encodingName The name of the encoding specified by the user. - * @exception IOException if the encoding isn't supported either - * internally to this parser, or by the hosting JVM. - * @see #parseXMLDecl - * @see #parseTextDecl + /** + * Sets up internal state so that we can decode an entity using the + * specified encoding. This is used when we start to read an entity + * and we have been given knowledge of its encoding before we start to + * read any data (e.g. from a SAX input source or from a MIME type). + * + * <p> It is also used after autodetection, at which point only very + * limited adjustments to the encoding may be used (switching between + * related builtin decoders). + * + * @param encodingName The name of the encoding specified by the user. + * @exception IOException if the encoding isn't supported either + * internally to this parser, or by the hosting JVM. + * @see #parseXMLDecl + * @see #parseTextDecl */ - private void setupDecoding (String encodingName) + private void setupDecoding(String encodingName) throws SAXException, IOException - { - encodingName = encodingName.toUpperCase (); - - // ENCODING_EXTERNAL indicates an encoding that wasn't - // autodetected ... we can use builtin decoders, or - // ones from the JVM (InputStreamReader). - - // Otherwise we can only tweak what was autodetected, and - // only for single byte (ASCII derived) builtin encodings. - - // ASCII-derived encodings - if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) { - if (encodingName.equals ("ISO-8859-1") - || encodingName.equals ("8859_1") - || encodingName.equals ("ISO8859_1") - ) { - encoding = ENCODING_ISO_8859_1; - return; - } else if (encodingName.equals ("US-ASCII") - || encodingName.equals ("ASCII")) { - encoding = ENCODING_ASCII; - return; - } else if (encodingName.equals ("UTF-8") - || encodingName.equals ("UTF8")) { - encoding = ENCODING_UTF_8; - return; - } else if (encoding != ENCODING_EXTERNAL) { - // used to start with a new reader ... - throw new UnsupportedEncodingException (encodingName); - } - // else fallthrough ... - // it's ASCII-ish and something other than a builtin - } - - // Unicode and such - if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) { - if (!(encodingName.equals ("ISO-10646-UCS-2") - || encodingName.equals ("UTF-16") - || encodingName.equals ("UTF-16BE") - || encodingName.equals ("UTF-16LE"))) - error ("unsupported Unicode encoding", - encodingName, - "UTF-16"); - return; - } - - // four byte encodings - if (encoding == ENCODING_UCS_4_1234 - || encoding == ENCODING_UCS_4_4321 - || encoding == ENCODING_UCS_4_2143 - || encoding == ENCODING_UCS_4_3412) { - // Strictly: "UCS-4" == "UTF-32BE"; also, "UTF-32LE" exists - if (!encodingName.equals ("ISO-10646-UCS-4")) - error ("unsupported 32-bit encoding", - encodingName, - "ISO-10646-UCS-4"); - return; - } - - // assert encoding == ENCODING_EXTERNAL - // if (encoding != ENCODING_EXTERNAL) - // throw new RuntimeException ("encoding = " + encoding); - - if (encodingName.equals ("UTF-16BE")) { - encoding = ENCODING_UCS_2_12; - return; - } - if (encodingName.equals ("UTF-16LE")) { - encoding = ENCODING_UCS_2_21; - return; - } - - // We couldn't use the builtin decoders at all. But we can try to - // create a reader, since we haven't messed up buffering. Tweak - // the encoding name if necessary. - - if (encodingName.equals ("UTF-16") - || encodingName.equals ("ISO-10646-UCS-2")) - encodingName = "Unicode"; - // Ignoring all the EBCDIC aliases here - - reader = new InputStreamReader (is, encodingName); - sourceType = INPUT_READER; - } - - - /** - * Parse miscellaneous markup outside the document element and DOCTYPE - * declaration. - * <pre> - * [27] Misc ::= Comment | PI | S - * </pre> - */ - private void parseMisc () + { + encodingName = encodingName.toUpperCase(); + + // ENCODING_EXTERNAL indicates an encoding that wasn't + // autodetected ... we can use builtin decoders, or + // ones from the JVM (InputStreamReader). + + // Otherwise we can only tweak what was autodetected, and + // only for single byte (ASCII derived) builtin encodings. + + // ASCII-derived encodings + if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) + { + if (encodingName.equals("ISO-8859-1") + || encodingName.equals("8859_1") + || encodingName.equals("ISO8859_1")) + { + encoding = ENCODING_ISO_8859_1; + return; + } + else if (encodingName.equals("US-ASCII") + || encodingName.equals("ASCII")) + { + encoding = ENCODING_ASCII; + return; + } + else if (encodingName.equals("UTF-8") + || encodingName.equals("UTF8")) + { + encoding = ENCODING_UTF_8; + return; + } + else if (encoding != ENCODING_EXTERNAL) + { + // used to start with a new reader ... + throw new UnsupportedEncodingException(encodingName); + } + // else fallthrough ... + // it's ASCII-ish and something other than a builtin + } + + // Unicode and such + if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) + { + if (!(encodingName.equals("ISO-10646-UCS-2") + || encodingName.equals("UTF-16") + || encodingName.equals("UTF-16BE") + || encodingName.equals("UTF-16LE"))) + { + error("unsupported Unicode encoding", encodingName, "UTF-16"); + } + return; + } + + // four byte encodings + if (encoding == ENCODING_UCS_4_1234 + || encoding == ENCODING_UCS_4_4321 + || encoding == ENCODING_UCS_4_2143 + || encoding == ENCODING_UCS_4_3412) + { + // Strictly: "UCS-4" == "UTF-32BE"; also, "UTF-32LE" exists + if (!encodingName.equals("ISO-10646-UCS-4")) + { + error("unsupported 32-bit encoding", encodingName, + "ISO-10646-UCS-4"); + } + return; + } + + // assert encoding == ENCODING_EXTERNAL + // if (encoding != ENCODING_EXTERNAL) + // throw new RuntimeException ("encoding = " + encoding); + + if (encodingName.equals("UTF-16BE")) + { + encoding = ENCODING_UCS_2_12; + return; + } + if (encodingName.equals("UTF-16LE")) + { + encoding = ENCODING_UCS_2_21; + return; + } + + // We couldn't use the builtin decoders at all. But we can try to + // create a reader, since we haven't messed up buffering. Tweak + // the encoding name if necessary. + + if (encodingName.equals("UTF-16") + || encodingName.equals("ISO-10646-UCS-2")) + { + encodingName = "Unicode"; + } + // Ignoring all the EBCDIC aliases here + + reader = new InputStreamReader(is, encodingName); + sourceType = INPUT_READER; + } + + /** + * Parse miscellaneous markup outside the document element and DOCTYPE + * declaration. + * <pre> + * [27] Misc ::= Comment | PI | S + * </pre> + */ + private void parseMisc() throws Exception - { - while (true) { - skipWhitespace (); - if (tryRead (startDelimPI)) { - parsePI (); - } else if (tryRead (startDelimComment)) { - parseComment (); - } else { - return; - } - } - } - + { + while (true) + { + skipWhitespace(); + if (tryRead(startDelimPI)) + { + parsePI(); + } + else if (tryRead(startDelimComment)) + { + parseComment(); + } + else + { + return; + } + } + } - /** - * Parse a document type declaration. - * <pre> - * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? - * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' - * </pre> - * <p> (The <code><!DOCTYPE</code> has already been read.) - */ - private void parseDoctypedecl () + /** + * Parse a document type declaration. + * <pre> + * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? + * ('[' (markupdecl | PEReference | S)* ']' S?)? '>' + * </pre> + * <p> (The <code><!DOCTYPE</code> has already been read.) + */ + private void parseDoctypedecl() throws Exception - { - String rootName, ids[]; - - // Read the document type name. - requireWhitespace (); - rootName = readNmtoken (true); - - // Read the External subset's IDs - skipWhitespace (); - ids = readExternalIds (false, true); - - // report (a) declaration of name, (b) lexical info (ids) - handler.doctypeDecl (rootName, ids [0], ids [1]); - - // Internal subset is parsed first, if present - skipWhitespace (); - if (tryRead ('[')) { - - // loop until the subset ends - while (true) { - doReport = expandPE = true; - skipWhitespace (); - doReport = expandPE = false; - if (tryRead (']')) { - break; // end of subset - } else { - // WFC, PEs in internal subset (only between decls) - peIsError = expandPE = true; - parseMarkupdecl (); - peIsError = expandPE = false; - } - } - } - skipWhitespace (); - require ('>'); - - // Read the external subset, if any - InputSource subset; - - if (ids [1] == null) - subset = handler.getExternalSubset (rootName, - handler.getSystemId ()); - else - subset = null; - if (ids [1] != null || subset != null) { - pushString (null, ">"); - - // NOTE: [dtd] is so we say what SAX2 expects, - // though it's misleading (subset, not entire dtd) - if (ids [1] != null) - pushURL (true, "[dtd]", ids, null, null, null, true); - else { - handler.warn ("modifying document by adding external subset"); - pushURL (true, "[dtd]", - new String [] { subset.getPublicId (), - subset.getSystemId (), null }, - subset.getCharacterStream (), - subset.getByteStream (), - subset.getEncoding (), - false); - } - - // Loop until we end up back at '>' - while (true) { - doReport = expandPE = true; - skipWhitespace (); - doReport = expandPE = false; - if (tryRead ('>')) { - break; - } else { - expandPE = true; - parseMarkupdecl (); - expandPE = false; - } - } - - // the ">" string isn't popped yet - if (inputStack.size () != 1) - error ("external subset has unmatched '>'"); - } - - // done dtd - handler.endDoctype (); - expandPE = false; - doReport = true; - } + { + String rootName; + ExternalIdentifiers ids; + // Read the document type name. + requireWhitespace(); + rootName = readNmtoken(true); - /** - * Parse a markup declaration in the internal or external DTD subset. - * <pre> - * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl - * | NotationDecl | PI | Comment - * [30] extSubsetDecl ::= (markupdecl | conditionalSect - * | PEReference | S) * - * </pre> - * <p> Reading toplevel PE references is handled as a lexical issue - * by the caller, as is whitespace. - */ - private void parseMarkupdecl () + // Read the External subset's IDs + skipWhitespace(); + ids = readExternalIds(false, true); + + // report (a) declaration of name, (b) lexical info (ids) + handler.doctypeDecl(rootName, ids.publicId, ids.systemId); + + // Internal subset is parsed first, if present + skipWhitespace(); + if (tryRead('[')) + { + + // loop until the subset ends + while (true) + { + doReport = expandPE = true; + skipWhitespace(); + doReport = expandPE = false; + if (tryRead(']')) + { + break; // end of subset + } + else + { + // WFC, PEs in internal subset (only between decls) + peIsError = expandPE = true; + parseMarkupdecl(); + peIsError = expandPE = false; + } + } + } + skipWhitespace(); + require('>'); + + // Read the external subset, if any + InputSource subset; + + if (ids.systemId == null) + { + subset = handler.getExternalSubset(rootName, + handler.getSystemId()); + } + else + { + subset = null; + } + if (ids.systemId != null || subset != null) + { + pushString(null, ">"); + + // NOTE: [dtd] is so we say what SAX2 expects, + // though it's misleading (subset, not entire dtd) + if (ids.systemId != null) + { + pushURL(true, "[dtd]", ids, null, null, null, true); + } + else + { + handler.warn("modifying document by adding external subset"); + pushURL(true, "[dtd]", + new ExternalIdentifiers(subset.getPublicId(), + subset.getSystemId(), + null), + subset.getCharacterStream(), + subset.getByteStream(), + subset.getEncoding(), + false); + } + + // Loop until we end up back at '>' + while (true) + { + doReport = expandPE = true; + skipWhitespace(); + doReport = expandPE = false; + if (tryRead('>')) + { + break; + } + else + { + expandPE = true; + parseMarkupdecl(); + expandPE = false; + } + } + + // the ">" string isn't popped yet + if (inputStack.size() != 1) + { + error("external subset has unmatched '>'"); + } + } + + // done dtd + handler.endDoctype(); + expandPE = false; + doReport = true; + } + + /** + * Parse a markup declaration in the internal or external DTD subset. + * <pre> + * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl + * | NotationDecl | PI | Comment + * [30] extSubsetDecl ::= (markupdecl | conditionalSect + * | PEReference | S) * + * </pre> + * <p> Reading toplevel PE references is handled as a lexical issue + * by the caller, as is whitespace. + */ + private void parseMarkupdecl() throws Exception - { - char saved [] = null; - boolean savedPE = expandPE; - - // prevent "<%foo;" and ensures saved entity is right - require ('<'); - unread ('<'); - expandPE = false; - - if (tryRead ("<!ELEMENT")) { - saved = readBuffer; - expandPE = savedPE; - parseElementDecl (); - } else if (tryRead ("<!ATTLIST")) { - saved = readBuffer; - expandPE = savedPE; - parseAttlistDecl (); - } else if (tryRead ("<!ENTITY")) { - saved = readBuffer; - expandPE = savedPE; - parseEntityDecl (); - } else if (tryRead ("<!NOTATION")) { - saved = readBuffer; - expandPE = savedPE; - parseNotationDecl (); - } else if (tryRead (startDelimPI)) { - saved = readBuffer; - expandPE = savedPE; - parsePI (); - } else if (tryRead (startDelimComment)) { - saved = readBuffer; - expandPE = savedPE; - parseComment (); - } else if (tryRead ("<![")) { - saved = readBuffer; - expandPE = savedPE; - if (inputStack.size () > 0) - parseConditionalSect (saved); - else - error ("conditional sections illegal in internal subset"); - } else { - error ("expected markup declaration"); - } - - // VC: Proper Decl/PE Nesting - if (readBuffer != saved) - handler.verror ("Illegal Declaration/PE nesting"); - } + { + char[] saved = null; + boolean savedPE = expandPE; + // prevent "<%foo;" and ensures saved entity is right + require('<'); + unread('<'); + expandPE = false; + + if (tryRead("<!ELEMENT")) + { + saved = readBuffer; + expandPE = savedPE; + parseElementDecl(); + } + else if (tryRead("<!ATTLIST")) + { + saved = readBuffer; + expandPE = savedPE; + parseAttlistDecl(); + } + else if (tryRead("<!ENTITY")) + { + saved = readBuffer; + expandPE = savedPE; + parseEntityDecl(); + } + else if (tryRead("<!NOTATION")) + { + saved = readBuffer; + expandPE = savedPE; + parseNotationDecl(); + } + else if (tryRead(startDelimPI)) + { + saved = readBuffer; + expandPE = savedPE; + parsePI(); + } + else if (tryRead(startDelimComment)) + { + saved = readBuffer; + expandPE = savedPE; + parseComment(); + } + else if (tryRead("<![")) + { + saved = readBuffer; + expandPE = savedPE; + if (inputStack.size() > 0) + { + parseConditionalSect(saved); + } + else + { + error("conditional sections illegal in internal subset"); + } + } + else + { + error("expected markup declaration"); + } - /** - * Parse an element, with its tags. - * <pre> - * [39] element ::= EmptyElementTag | STag content ETag - * [40] STag ::= '<' Name (S Attribute)* S? '>' - * [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>' - * </pre> - * <p> (The '<' has already been read.) - * <p>NOTE: this method actually chains onto parseContent (), if necessary, - * and parseContent () will take care of calling parseETag (). - */ - private void parseElement (boolean maybeGetSubset) + // VC: Proper Decl/PE Nesting + if (readBuffer != saved) + { + handler.verror("Illegal Declaration/PE nesting"); + } + } + + /** + * Parse an element, with its tags. + * <pre> + * [39] element ::= EmptyElementTag | STag content ETag + * [40] STag ::= '<' Name (S Attribute)* S? '>' + * [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>' + * </pre> + * <p> (The '<' has already been read.) + * <p>NOTE: this method actually chains onto parseContent (), if necessary, + * and parseContent () will take care of calling parseETag (). + */ + private void parseElement(boolean maybeGetSubset) throws Exception - { - String gi; - char c; - int oldElementContent = currentElementContent; - String oldElement = currentElement; - Object element []; - - // This is the (global) counter for the - // array of specified attributes. - tagAttributePos = 0; - - // Read the element type name. - gi = readNmtoken (true); - - // If we saw no DTD, and this is the document root element, - // let the application modify the input stream by providing one. - if (maybeGetSubset) { - InputSource subset = handler.getExternalSubset (gi, - handler.getSystemId ()); - if (subset != null) { - String publicId = subset.getPublicId (); - String systemId = subset.getSystemId (); - - handler.warn ("modifying document by adding DTD"); - handler.doctypeDecl (gi, publicId, systemId); - pushString (null, ">"); - - // NOTE: [dtd] is so we say what SAX2 expects, - // though it's misleading (subset, not entire dtd) - pushURL (true, "[dtd]", - new String [] { publicId, systemId, null }, - subset.getCharacterStream (), - subset.getByteStream (), - subset.getEncoding (), - false); - - // Loop until we end up back at '>' - while (true) { - doReport = expandPE = true; - skipWhitespace (); - doReport = expandPE = false; - if (tryRead ('>')) { - break; - } else { - expandPE = true; - parseMarkupdecl (); - expandPE = false; - } - } - - // the ">" string isn't popped yet - if (inputStack.size () != 1) - error ("external subset has unmatched '>'"); - - handler.endDoctype (); - } - } - - // Determine the current content type. - currentElement = gi; - element = (Object []) elementInfo.get (gi); - currentElementContent = getContentType (element, CONTENT_ANY); - - // Read the attributes, if any. - // After this loop, "c" is the closing delimiter. - boolean white = tryWhitespace (); - c = readCh (); - while (c != '/' && c != '>') { - unread (c); - if (!white) - error ("need whitespace between attributes"); - parseAttribute (gi); - white = tryWhitespace (); - c = readCh (); - } - - // Supply any defaulted attributes. - Enumeration atts = declaredAttributes (element); - if (atts != null) { - String aname; + { + String gi; + char c; + int oldElementContent = currentElementContent; + String oldElement = currentElement; + ElementDecl element; + + // This is the (global) counter for the + // array of specified attributes. + tagAttributePos = 0; + + // Read the element type name. + gi = readNmtoken(true); + + // If we saw no DTD, and this is the document root element, + // let the application modify the input stream by providing one. + if (maybeGetSubset) + { + InputSource subset = handler.getExternalSubset(gi, + handler.getSystemId()); + if (subset != null) + { + String publicId = subset.getPublicId(); + String systemId = subset.getSystemId(); + + handler.warn("modifying document by adding DTD"); + handler.doctypeDecl(gi, publicId, systemId); + pushString(null, ">"); + + // NOTE: [dtd] is so we say what SAX2 expects, + // though it's misleading (subset, not entire dtd) + pushURL(true, "[dtd]", + new ExternalIdentifiers(publicId, systemId, null), + subset.getCharacterStream(), + subset.getByteStream(), + subset.getEncoding(), + false); + + // Loop until we end up back at '>' + while (true) + { + doReport = expandPE = true; + skipWhitespace(); + doReport = expandPE = false; + if (tryRead('>')) + { + break; + } + else + { + expandPE = true; + parseMarkupdecl(); + expandPE = false; + } + } + + // the ">" string isn't popped yet + if (inputStack.size() != 1) + { + error("external subset has unmatched '>'"); + } + + handler.endDoctype(); + } + } + + // Determine the current content type. + currentElement = gi; + element = (ElementDecl) elementInfo.get(gi); + currentElementContent = getContentType(element, CONTENT_ANY); + + // Read the attributes, if any. + // After this loop, "c" is the closing delimiter. + boolean white = tryWhitespace(); + c = readCh(); + while (c != '/' && c != '>') + { + unread(c); + if (!white) + { + error("need whitespace between attributes"); + } + parseAttribute(gi); + white = tryWhitespace(); + c = readCh(); + } + + // Supply any defaulted attributes. + Iterator atts = declaredAttributes(element); + if (atts != null) + { + String aname; loop: - while (atts.hasMoreElements ()) { - aname = (String) atts.nextElement (); - // See if it was specified. - for (int i = 0; i < tagAttributePos; i++) { - if (tagAttributes [i] == aname) { - continue loop; - } - } - // ... or has a default - String value = getAttributeDefaultValue (gi, aname); - - if (value == null) - continue; - handler.attribute (aname, value, false); - } - } - - // Figure out if this is a start tag - // or an empty element, and dispatch an - // event accordingly. - switch (c) { - case '>': - handler.startElement (gi); - parseContent (); - break; - case '/': - require ('>'); - handler.startElement (gi); - handler.endElement (gi); - break; - } - - // Restore the previous state. - currentElement = oldElement; - currentElementContent = oldElementContent; - } + while (atts.hasNext()) + { + aname = (String) atts.next(); + // See if it was specified. + for (int i = 0; i < tagAttributePos; i++) + { + if (tagAttributes[i] == aname) + { + continue loop; + } + } + // ... or has a default + String value = getAttributeDefaultValue(gi, aname); + + if (value == null) + { + continue; + } + handler.attribute(aname, value, false); + } + } + // Figure out if this is a start tag + // or an empty element, and dispatch an + // event accordingly. + switch (c) + { + case '>': + handler.startElement(gi); + parseContent(); + break; + case '/': + require('>'); + handler.startElement(gi); + handler.endElement(gi); + break; + } - /** - * Parse an attribute assignment. - * <pre> - * [41] Attribute ::= Name Eq AttValue - * </pre> - * @param name The name of the attribute's element. - * @see SAXDriver#attribute - */ - private void parseAttribute (String name) - throws Exception - { - String aname; - String type; - String value; - int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF; - - // Read the attribute name. - aname = readNmtoken (true); - type = getAttributeType (name, aname); - - // Parse '=' - parseEq (); - - // Read the value, normalizing whitespace - // unless it is CDATA. - if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { - if (type == "CDATA" || type == null) { - value = readLiteral (flags); - } else { - value = readLiteral (flags | LIT_NORMALIZE); - } - } else { - if (type.equals("CDATA") || type == null) { - value = readLiteral (flags); - } else { - value = readLiteral (flags | LIT_NORMALIZE); - } + // Restore the previous state. + currentElement = oldElement; + currentElementContent = oldElementContent; } + + /** + * Parse an attribute assignment. + * <pre> + * [41] Attribute ::= Name Eq AttValue + * </pre> + * @param name The name of the attribute's element. + * @see SAXDriver#attribute + */ + private void parseAttribute(String name) + throws Exception + { + String aname; + String type; + String value; + int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF; + + // Read the attribute name. + aname = readNmtoken(true); + type = getAttributeType(name, aname); + + // Parse '=' + parseEq(); + + // Read the value, normalizing whitespace + // unless it is CDATA. + if (handler.stringInterning) + { + if (type == "CDATA" || type == null) + { + value = readLiteral(flags); + } + else + { + value = readLiteral(flags | LIT_NORMALIZE); + } + } + else + { + if (type.equals("CDATA") || type == null) + { + value = readLiteral(flags); + } + else + { + value = readLiteral(flags | LIT_NORMALIZE); + } + } - // WFC: no duplicate attributes - for (int i = 0; i < tagAttributePos; i++) - if (aname.equals (tagAttributes [i])) - error ("duplicate attribute", aname, null); - - // Inform the handler about the - // attribute. - handler.attribute (aname, value, true); - dataBufferPos = 0; - - // Note that the attribute has been - // specified. - if (tagAttributePos == tagAttributes.length) { - String newAttrib[] = new String [tagAttributes.length * 2]; - System.arraycopy (tagAttributes, 0, newAttrib, 0, tagAttributePos); - tagAttributes = newAttrib; - } - tagAttributes [tagAttributePos++] = aname; - } + // WFC: no duplicate attributes + for (int i = 0; i < tagAttributePos; i++) + { + if (aname.equals(tagAttributes [i])) + { + error("duplicate attribute", aname, null); + } + } + // Inform the handler about the + // attribute. + handler.attribute(aname, value, true); + dataBufferPos = 0; + + // Note that the attribute has been + // specified. + if (tagAttributePos == tagAttributes.length) + { + String newAttrib[] = new String[tagAttributes.length * 2]; + System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos); + tagAttributes = newAttrib; + } + tagAttributes[tagAttributePos++] = aname; + } - /** - * Parse an equals sign surrounded by optional whitespace. - * <pre> - * [25] Eq ::= S? '=' S? - * </pre> - */ - private void parseEq () + /** + * Parse an equals sign surrounded by optional whitespace. + * <pre> + * [25] Eq ::= S? '=' S? + * </pre> + */ + private void parseEq() throws SAXException, IOException - { - skipWhitespace (); - require ('='); - skipWhitespace (); - } - + { + skipWhitespace(); + require('='); + skipWhitespace(); + } - /** - * Parse an end tag. - * <pre> - * [42] ETag ::= '</' Name S? '>' - * </pre> - * <p>NOTE: parseContent () chains to here, we already read the - * "</". - */ - private void parseETag () + /** + * Parse an end tag. + * <pre> + * [42] ETag ::= '</' Name S? '>' + * </pre> + * <p>NOTE: parseContent () chains to here, we already read the + * "</". + */ + private void parseETag() throws Exception - { - require (currentElement); - skipWhitespace (); - require ('>'); - handler.endElement (currentElement); - // not re-reporting any SAXException re bogus end tags, - // even though that diagnostic might be clearer ... - } - - - /** - * Parse the content of an element. - * <pre> - * [43] content ::= (element | CharData | Reference - * | CDSect | PI | Comment)* - * [67] Reference ::= EntityRef | CharRef - * </pre> - * <p> NOTE: consumes ETtag. - */ - private void parseContent () + { + require(currentElement); + skipWhitespace(); + require('>'); + handler.endElement(currentElement); + // not re-reporting any SAXException re bogus end tags, + // even though that diagnostic might be clearer ... + } + + /** + * Parse the content of an element. + * <pre> + * [43] content ::= (element | CharData | Reference + * | CDSect | PI | Comment)* + * [67] Reference ::= EntityRef | CharRef + * </pre> + * <p> NOTE: consumes ETtag. + */ + private void parseContent() throws Exception - { - char c; - - while (true) { - // consume characters (or ignorable whitspace) until delimiter - parseCharData (); - - // Handle delimiters - c = readCh (); - switch (c) { - - case '&': // Found "&" - c = readCh (); - if (c == '#') { - parseCharRef (); - } else { - unread (c); - parseEntityRef (true); - } - isDirtyCurrentElement = true; - break; - - case '<': // Found "<" - dataBufferFlush (); - c = readCh (); - switch (c) { - case '!': // Found "<!" - c = readCh (); - switch (c) { - case '-': // Found "<!-" - require ('-'); - isDirtyCurrentElement = false; - parseComment (); - break; - case '[': // Found "<![" - isDirtyCurrentElement = false; - require ("CDATA["); - handler.startCDATA (); - inCDATA = true; - parseCDSect (); - inCDATA = false; - handler.endCDATA (); - break; - default: - error ("expected comment or CDATA section", c, null); - break; - } - break; - - case '?': // Found "<?" - isDirtyCurrentElement = false; - parsePI (); - break; - - case '/': // Found "</" - isDirtyCurrentElement = false; - parseETag (); - return; - - default: // Found "<" followed by something else - isDirtyCurrentElement = false; - unread (c); - parseElement (false); - break; - } - } - } - - } - - - /** - * Parse an element type declaration. - * <pre> - * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' - * </pre> - * <p> NOTE: the '<!ELEMENT' has already been read. - */ - private void parseElementDecl () + { + char c; + + while (true) + { + // consume characters (or ignorable whitspace) until delimiter + parseCharData(); + + // Handle delimiters + c = readCh(); + switch (c) + { + case '&': // Found "&" + c = readCh(); + if (c == '#') + { + parseCharRef(); + } + else + { + unread(c); + parseEntityRef(true); + } + isDirtyCurrentElement = true; + break; + + case '<': // Found "<" + dataBufferFlush(); + c = readCh(); + switch (c) + { + case '!': // Found "<!" + c = readCh(); + switch (c) + { + case '-': // Found "<!-" + require('-'); + isDirtyCurrentElement = false; + parseComment(); + break; + case '[': // Found "<![" + isDirtyCurrentElement = false; + require("CDATA["); + handler.startCDATA(); + inCDATA = true; + parseCDSect(); + inCDATA = false; + handler.endCDATA(); + break; + default: + error("expected comment or CDATA section", c, null); + break; + } + break; + + case '?': // Found "<?" + isDirtyCurrentElement = false; + parsePI(); + break; + + case '/': // Found "</" + isDirtyCurrentElement = false; + parseETag(); + return; + + default: // Found "<" followed by something else + isDirtyCurrentElement = false; + unread(c); + parseElement(false); + break; + } + } + } + } + + /** + * Parse an element type declaration. + * <pre> + * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' + * </pre> + * <p> NOTE: the '<!ELEMENT' has already been read. + */ + private void parseElementDecl() throws Exception - { - String name; - - requireWhitespace (); - // Read the element type name. - name = readNmtoken (true); - - requireWhitespace (); - // Read the content model. - parseContentspec (name); - - skipWhitespace (); - require ('>'); - } - + { + String name; + + requireWhitespace(); + // Read the element type name. + name = readNmtoken(true); - /** - * Content specification. - * <pre> - * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements - * </pre> - */ - private void parseContentspec (String name) - throws Exception - { -// FIXME: move elementDecl() into setElement(), pass EMTPY/ANY ... - if (tryRead ("EMPTY")) { - setElement (name, CONTENT_EMPTY, null, null); - if (!skippedPE) - handler.getDeclHandler ().elementDecl (name, "EMPTY"); - return; - } else if (tryRead ("ANY")) { - setElement (name, CONTENT_ANY, null, null); - if (!skippedPE) - handler.getDeclHandler ().elementDecl (name, "ANY"); - return; - } else { - String model; - char saved []; - - require ('('); - saved = readBuffer; - dataBufferAppend ('('); - skipWhitespace (); - if (tryRead ("#PCDATA")) { - dataBufferAppend ("#PCDATA"); - parseMixed (saved); - model = dataBufferToString (); - setElement (name, CONTENT_MIXED, model, null); - } else { - parseElements (saved); - model = dataBufferToString (); - setElement (name, CONTENT_ELEMENTS, model, null); - } - if (!skippedPE) - handler.getDeclHandler ().elementDecl (name, model); - } - } + requireWhitespace(); + // Read the content model. + parseContentspec(name); + + skipWhitespace(); + require('>'); + } - /** - * Parse an element-content model. - * <pre> - * [47] elements ::= (choice | seq) ('?' | '*' | '+')? - * [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')' - * [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' - * </pre> - * - * <p> NOTE: the opening '(' and S have already been read. - * - * @param saved Buffer for entity that should have the terminal ')' - */ - private void parseElements (char saved []) + /** + * Content specification. + * <pre> + * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements + * </pre> + */ + private void parseContentspec(String name) throws Exception - { - char c; - char sep; - - // Parse the first content particle - skipWhitespace (); - parseCp (); - - // Check for end or for a separator. - skipWhitespace (); - c = readCh (); - switch (c) { - case ')': - // VC: Proper Group/PE Nesting - if (readBuffer != saved) - handler.verror ("Illegal Group/PE nesting"); - - dataBufferAppend (')'); - c = readCh (); - switch (c) { - case '*': - case '+': - case '?': - dataBufferAppend (c); - break; - default: - unread (c); - } - return; - case ',': // Register the separator. - case '|': - sep = c; - dataBufferAppend (c); - break; - default: - error ("bad separator in content model", c, null); - return; - } - - // Parse the rest of the content model. - while (true) { - skipWhitespace (); - parseCp (); - skipWhitespace (); - c = readCh (); - if (c == ')') { - // VC: Proper Group/PE Nesting - if (readBuffer != saved) - handler.verror ("Illegal Group/PE nesting"); - - dataBufferAppend (')'); - break; - } else if (c != sep) { - error ("bad separator in content model", c, null); - return; - } else { - dataBufferAppend (c); - } - } - - // Check for the occurrence indicator. - c = readCh (); - switch (c) { - case '?': - case '*': - case '+': - dataBufferAppend (c); - return; - default: - unread (c); - return; - } - } - - - /** - * Parse a content particle. - * <pre> - * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? - * </pre> - */ - private void parseCp () + { + // FIXME: move elementDecl() into setElement(), pass EMTPY/ANY ... + if (tryRead("EMPTY")) + { + setElement(name, CONTENT_EMPTY, null, null); + if (!skippedPE) + { + handler.getDeclHandler().elementDecl(name, "EMPTY"); + } + return; + } + else if (tryRead("ANY")) + { + setElement(name, CONTENT_ANY, null, null); + if (!skippedPE) + { + handler.getDeclHandler().elementDecl(name, "ANY"); + } + return; + } + else + { + String model; + char[] saved; + + require('('); + saved = readBuffer; + dataBufferAppend('('); + skipWhitespace(); + if (tryRead("#PCDATA")) + { + dataBufferAppend("#PCDATA"); + parseMixed(saved); + model = dataBufferToString(); + setElement(name, CONTENT_MIXED, model, null); + } + else + { + parseElements(saved); + model = dataBufferToString(); + setElement(name, CONTENT_ELEMENTS, model, null); + } + if (!skippedPE) + { + handler.getDeclHandler().elementDecl(name, model); + } + } + } + + /** + * Parse an element-content model. + * <pre> + * [47] elements ::= (choice | seq) ('?' | '*' | '+')? + * [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')' + * [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' + * </pre> + * + * <p> NOTE: the opening '(' and S have already been read. + * + * @param saved Buffer for entity that should have the terminal ')' + */ + private void parseElements(char[] saved) throws Exception - { - if (tryRead ('(')) { - dataBufferAppend ('('); - parseElements (readBuffer); - } else { - dataBufferAppend (readNmtoken (true)); - char c = readCh (); - switch (c) { - case '?': - case '*': - case '+': - dataBufferAppend (c); - break; - default: - unread (c); - break; - } - } - } - - - /** - * Parse mixed content. - * <pre> - * [51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*' - * | '(' S? ('#PCDATA') S? ')' - * </pre> - * - * @param saved Buffer for entity that should have the terminal ')' - */ - private void parseMixed (char saved []) + { + char c; + char sep; + + // Parse the first content particle + skipWhitespace(); + parseCp(); + + // Check for end or for a separator. + skipWhitespace(); + c = readCh(); + switch (c) + { + case ')': + // VC: Proper Group/PE Nesting + if (readBuffer != saved) + { + handler.verror("Illegal Group/PE nesting"); + } + + dataBufferAppend(')'); + c = readCh(); + switch (c) + { + case '*': + case '+': + case '?': + dataBufferAppend(c); + break; + default: + unread(c); + } + return; + case ',': // Register the separator. + case '|': + sep = c; + dataBufferAppend(c); + break; + default: + error("bad separator in content model", c, null); + return; + } + + // Parse the rest of the content model. + while (true) + { + skipWhitespace(); + parseCp(); + skipWhitespace(); + c = readCh(); + if (c == ')') + { + // VC: Proper Group/PE Nesting + if (readBuffer != saved) + { + handler.verror("Illegal Group/PE nesting"); + } + + dataBufferAppend(')'); + break; + } + else if (c != sep) + { + error("bad separator in content model", c, null); + return; + } + else + { + dataBufferAppend(c); + } + } + + // Check for the occurrence indicator. + c = readCh(); + switch (c) + { + case '?': + case '*': + case '+': + dataBufferAppend(c); + return; + default: + unread(c); + return; + } + } + + /** + * Parse a content particle. + * <pre> + * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? + * </pre> + */ + private void parseCp() throws Exception - { - // Check for PCDATA alone. - skipWhitespace (); - if (tryRead (')')) { - // VC: Proper Group/PE Nesting - if (readBuffer != saved) - handler.verror ("Illegal Group/PE nesting"); - - dataBufferAppend (")*"); - tryRead ('*'); - return; - } - - // Parse mixed content. - skipWhitespace (); - while (!tryRead (")")) { - require ('|'); - dataBufferAppend ('|'); - skipWhitespace (); - dataBufferAppend (readNmtoken (true)); - skipWhitespace (); - } - - // VC: Proper Group/PE Nesting - if (readBuffer != saved) - handler.verror ("Illegal Group/PE nesting"); - - require ('*'); - dataBufferAppend (")*"); - } - + { + if (tryRead('(')) + { + dataBufferAppend('('); + parseElements(readBuffer); + } + else + { + dataBufferAppend(readNmtoken(true)); + char c = readCh(); + switch (c) + { + case '?': + case '*': + case '+': + dataBufferAppend(c); + break; + default: + unread(c); + break; + } + } + } - /** - * Parse an attribute list declaration. - * <pre> - * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' - * </pre> - * <p>NOTE: the '<!ATTLIST' has already been read. - */ - private void parseAttlistDecl () + /** + * Parse mixed content. + * <pre> + * [51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*' + * | '(' S? ('#PCDATA') S? ')' + * </pre> + * + * @param saved Buffer for entity that should have the terminal ')' + */ + private void parseMixed(char[] saved) throws Exception - { - String elementName; - - requireWhitespace (); - elementName = readNmtoken (true); - boolean white = tryWhitespace (); - while (!tryRead ('>')) { - if (!white) - error ("whitespace required before attribute definition"); - parseAttDef (elementName); - white = tryWhitespace (); - } - } - - - /** - * Parse a single attribute definition. - * <pre> - * [53] AttDef ::= S Name S AttType S DefaultDecl - * </pre> - */ - private void parseAttDef (String elementName) + { + // Check for PCDATA alone. + skipWhitespace(); + if (tryRead(')')) + { + // VC: Proper Group/PE Nesting + if (readBuffer != saved) + { + handler.verror("Illegal Group/PE nesting"); + } + + dataBufferAppend(")*"); + tryRead('*'); + return; + } + + // Parse mixed content. + skipWhitespace(); + while (!tryRead(")")) + { + require('|'); + dataBufferAppend('|'); + skipWhitespace(); + dataBufferAppend(readNmtoken(true)); + skipWhitespace(); + } + + // VC: Proper Group/PE Nesting + if (readBuffer != saved) + { + handler.verror("Illegal Group/PE nesting"); + } + + require('*'); + dataBufferAppend(")*"); + } + + /** + * Parse an attribute list declaration. + * <pre> + * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' + * </pre> + * <p>NOTE: the '<!ATTLIST' has already been read. + */ + private void parseAttlistDecl() throws Exception - { - String name; - String type; - String enumer = null; - - // Read the attribute name. - name = readNmtoken (true); - - // Read the attribute type. - requireWhitespace (); - type = readAttType (); - - // Get the string of enumerated values if necessary. - if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { - if ("ENUMERATION" == type || "NOTATION" == type) - enumer = dataBufferToString (); - } else { - if ("ENUMERATION".equals(type) || "NOTATION".equals(type)) - enumer = dataBufferToString (); + { + String elementName; + + requireWhitespace(); + elementName = readNmtoken(true); + boolean white = tryWhitespace(); + while (!tryRead('>')) + { + if (!white) + { + error("whitespace required before attribute definition"); + } + parseAttDef(elementName); + white = tryWhitespace(); + } } + + /** + * Parse a single attribute definition. + * <pre> + * [53] AttDef ::= S Name S AttType S DefaultDecl + * </pre> + */ + private void parseAttDef(String elementName) + throws Exception + { + String name; + String type; + String enumer = null; + + // Read the attribute name. + name = readNmtoken(true); - // Read the default value. - requireWhitespace (); - parseDefault (elementName, name, type, enumer); - } + // Read the attribute type. + requireWhitespace(); + type = readAttType(); + // Get the string of enumerated values if necessary. + if (handler.stringInterning) + { + if ("ENUMERATION" == type || "NOTATION" == type) + { + enumer = dataBufferToString(); + } + } + else + { + if ("ENUMERATION".equals(type) || "NOTATION".equals(type)) + { + enumer = dataBufferToString(); + } + } + + // Read the default value. + requireWhitespace(); + parseDefault(elementName, name, type, enumer); + } /** * Parse the attribute type. @@ -1598,229 +1926,284 @@ loop: * [54] AttType ::= StringType | TokenizedType | EnumeratedType * [55] StringType ::= 'CDATA' * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' - * | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' + * | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' * [57] EnumeratedType ::= NotationType | Enumeration * </pre> */ - private String readAttType () + private String readAttType() throws Exception { - if (tryRead ('(')) { - parseEnumeration (false); - return "ENUMERATION"; - } else { - String typeString = readNmtoken (true); - if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { - if ("NOTATION" == typeString) { - parseNotationType (); - return typeString; - } else if ("CDATA" == typeString - || "ID" == typeString - || "IDREF" == typeString - || "IDREFS" == typeString - || "ENTITY" == typeString - || "ENTITIES" == typeString - || "NMTOKEN" == typeString - || "NMTOKENS" == typeString) - return typeString; - } else { - if ("NOTATION".equals(typeString)) { - parseNotationType (); - return typeString; - } else if ("CDATA".equals(typeString) - || "ID".equals(typeString) - || "IDREF".equals(typeString) - || "IDREFS".equals(typeString) - || "ENTITY".equals(typeString) - || "ENTITIES".equals(typeString) - || "NMTOKEN".equals(typeString) - || "NMTOKENS".equals(typeString)) - return typeString; - } - error ("illegal attribute type", typeString, null); - return null; - } + if (tryRead('(')) + { + parseEnumeration(false); + return "ENUMERATION"; + } + else + { + String typeString = readNmtoken(true); + if (handler.stringInterning) + { + if ("NOTATION" == typeString) + { + parseNotationType(); + return typeString; + } + else if ("CDATA" == typeString + || "ID" == typeString + || "IDREF" == typeString + || "IDREFS" == typeString + || "ENTITY" == typeString + || "ENTITIES" == typeString + || "NMTOKEN" == typeString + || "NMTOKENS" == typeString) + { + return typeString; + } + } + else + { + if ("NOTATION".equals(typeString)) + { + parseNotationType(); + return typeString; + } + else if ("CDATA".equals(typeString) + || "ID".equals(typeString) + || "IDREF".equals(typeString) + || "IDREFS".equals(typeString) + || "ENTITY".equals(typeString) + || "ENTITIES".equals(typeString) + || "NMTOKEN".equals(typeString) + || "NMTOKENS".equals(typeString)) + { + return typeString; + } + } + error("illegal attribute type", typeString, null); + return null; + } } - - /** - * Parse an enumeration. - * <pre> - * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' - * </pre> - * <p>NOTE: the '(' has already been read. - */ - private void parseEnumeration (boolean isNames) + /** + * Parse an enumeration. + * <pre> + * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' + * </pre> + * <p>NOTE: the '(' has already been read. + */ + private void parseEnumeration(boolean isNames) throws Exception - { - dataBufferAppend ('('); - - // Read the first token. - skipWhitespace (); - dataBufferAppend (readNmtoken (isNames)); - // Read the remaining tokens. - skipWhitespace (); - while (!tryRead (')')) { - require ('|'); - dataBufferAppend ('|'); - skipWhitespace (); - dataBufferAppend (readNmtoken (isNames)); - skipWhitespace (); - } - dataBufferAppend (')'); - } - + { + dataBufferAppend('('); + + // Read the first token. + skipWhitespace(); + dataBufferAppend(readNmtoken(isNames)); + // Read the remaining tokens. + skipWhitespace(); + while (!tryRead(')')) + { + require('|'); + dataBufferAppend('|'); + skipWhitespace(); + dataBufferAppend(readNmtoken (isNames)); + skipWhitespace(); + } + dataBufferAppend(')'); + } - /** - * Parse a notation type for an attribute. - * <pre> - * [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks - * (S? '|' S? name)* S? ')' - * </pre> - * <p>NOTE: the 'NOTATION' has already been read - */ - private void parseNotationType () + /** + * Parse a notation type for an attribute. + * <pre> + * [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks + * (S? '|' S? name)* S? ')' + * </pre> + * <p>NOTE: the 'NOTATION' has already been read + */ + private void parseNotationType() throws Exception - { - requireWhitespace (); - require ('('); - - parseEnumeration (true); - } - - - /** - * Parse the default value for an attribute. - * <pre> - * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' - * | (('#FIXED' S)? AttValue) - * </pre> - */ - private void parseDefault ( - String elementName, - String name, - String type, - String enumer - ) throws Exception - { - int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; - String value = null; - int flags = LIT_ATTRIBUTE; - boolean saved = expandPE; - String defaultType = null; - - // LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace - // chars to spaces (doesn't matter when that's done if it doesn't - // interfere with char refs expanding to whitespace). - - if (!skippedPE) { - flags |= LIT_ENTITY_REF; - if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { - if ("CDATA" != type) - flags |= LIT_NORMALIZE; - } else { - if (!"CDATA".equals(type)) - flags |= LIT_NORMALIZE; - } - } - - expandPE = false; - if (tryRead ('#')) { - if (tryRead ("FIXED")) { - defaultType = "#FIXED"; - valueType = ATTRIBUTE_DEFAULT_FIXED; - requireWhitespace (); - value = readLiteral (flags); - } else if (tryRead ("REQUIRED")) { - defaultType = "#REQUIRED"; - valueType = ATTRIBUTE_DEFAULT_REQUIRED; - } else if (tryRead ("IMPLIED")) { - defaultType = "#IMPLIED"; - valueType = ATTRIBUTE_DEFAULT_IMPLIED; - } else { - error ("illegal keyword for attribute default value"); - } - } else - value = readLiteral (flags); - expandPE = saved; - setAttribute (elementName, name, type, enumer, value, valueType); - if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { - if ("ENUMERATION" == type) - type = enumer; - else if ("NOTATION" == type) - type = "NOTATION " + enumer; - } else { - if ("ENUMERATION".equals(type)) - type = enumer; - else if ("NOTATION".equals(type)) - type = "NOTATION " + enumer; + { + requireWhitespace(); + require('('); + + parseEnumeration(true); } - if (!skippedPE) handler.getDeclHandler () - .attributeDecl (elementName, name, type, defaultType, value); - } - - /** - * Parse a conditional section. - * <pre> - * [61] conditionalSect ::= includeSect || ignoreSect - * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' - * extSubsetDecl ']]>' - * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' - * ignoreSectContents* ']]>' - * [64] ignoreSectContents ::= Ignore - * ('<![' ignoreSectContents* ']]>' Ignore )* - * [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* ) - * </pre> - * <p> NOTE: the '>![' has already been read. - */ - private void parseConditionalSect (char saved []) + /** + * Parse the default value for an attribute. + * <pre> + * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' + * | (('#FIXED' S)? AttValue) + * </pre> + */ + private void parseDefault(String elementName, String name, + String type, String enumer) throws Exception - { - skipWhitespace (); - if (tryRead ("INCLUDE")) { - skipWhitespace (); - require ('['); - // VC: Proper Conditional Section/PE Nesting - if (readBuffer != saved) - handler.verror ("Illegal Conditional Section/PE nesting"); - skipWhitespace (); - while (!tryRead ("]]>")) { - parseMarkupdecl (); - skipWhitespace (); - } - } else if (tryRead ("IGNORE")) { - skipWhitespace (); - require ('['); - // VC: Proper Conditional Section/PE Nesting - if (readBuffer != saved) - handler.verror ("Illegal Conditional Section/PE nesting"); - int nesting = 1; - char c; - expandPE = false; - for (int nest = 1; nest > 0;) { - c = readCh (); - switch (c) { - case '<': - if (tryRead ("![")) { - nest++; - } - case ']': - if (tryRead ("]>")) { - nest--; - } - } - } - expandPE = true; - } else { - error ("conditional section must begin with INCLUDE or IGNORE"); - } - } - - private void parseCharRef () + { + int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; + String value = null; + int flags = LIT_ATTRIBUTE; + boolean saved = expandPE; + String defaultType = null; + + // LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace + // chars to spaces (doesn't matter when that's done if it doesn't + // interfere with char refs expanding to whitespace). + + if (!skippedPE) + { + flags |= LIT_ENTITY_REF; + if (handler.stringInterning) + { + if ("CDATA" != type) + { + flags |= LIT_NORMALIZE; + } + } + else + { + if (!"CDATA".equals(type)) + { + flags |= LIT_NORMALIZE; + } + } + } + + expandPE = false; + if (tryRead('#')) + { + if (tryRead("FIXED")) + { + defaultType = "#FIXED"; + valueType = ATTRIBUTE_DEFAULT_FIXED; + requireWhitespace(); + value = readLiteral(flags); + } + else if (tryRead("REQUIRED")) + { + defaultType = "#REQUIRED"; + valueType = ATTRIBUTE_DEFAULT_REQUIRED; + } + else if (tryRead("IMPLIED")) + { + defaultType = "#IMPLIED"; + valueType = ATTRIBUTE_DEFAULT_IMPLIED; + } + else + { + error("illegal keyword for attribute default value"); + } + } + else + { + value = readLiteral(flags); + } + expandPE = saved; + setAttribute(elementName, name, type, enumer, value, valueType); + if (handler.stringInterning) + { + if ("ENUMERATION" == type) + { + type = enumer; + } + else if ("NOTATION" == type) + { + type = "NOTATION " + enumer; + } + } + else + { + if ("ENUMERATION".equals(type)) + { + type = enumer; + } + else if ("NOTATION".equals(type)) + { + type = "NOTATION " + enumer; + } + } + if (!skippedPE) + { + handler.getDeclHandler().attributeDecl(elementName, name, type, + defaultType, value); + } + } + + /** + * Parse a conditional section. + * <pre> + * [61] conditionalSect ::= includeSect || ignoreSect + * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' + * extSubsetDecl ']]>' + * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' + * ignoreSectContents* ']]>' + * [64] ignoreSectContents ::= Ignore + * ('<![' ignoreSectContents* ']]>' Ignore )* + * [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* ) + * </pre> + * <p> NOTE: the '>![' has already been read. + */ + private void parseConditionalSect(char[] saved) + throws Exception + { + skipWhitespace(); + if (tryRead("INCLUDE")) + { + skipWhitespace(); + require('['); + // VC: Proper Conditional Section/PE Nesting + if (readBuffer != saved) + { + handler.verror("Illegal Conditional Section/PE nesting"); + } + skipWhitespace(); + while (!tryRead("]]>")) + { + parseMarkupdecl(); + skipWhitespace(); + } + } + else if (tryRead("IGNORE")) + { + skipWhitespace(); + require('['); + // VC: Proper Conditional Section/PE Nesting + if (readBuffer != saved) + { + handler.verror("Illegal Conditional Section/PE nesting"); + } + int nesting = 1; + char c; + expandPE = false; + for (int nest = 1; nest > 0; ) + { + c = readCh(); + switch (c) + { + case '<': + if (tryRead("![")) + { + nest++; + } + case ']': + if (tryRead("]>")) + { + nest--; + } + } + } + expandPE = true; + } + else + { + error("conditional section must begin with INCLUDE or IGNORE"); + } + } + + private void parseCharRef() throws SAXException, IOException { - parseCharRef (true /* do flushDataBuffer by default */); + parseCharRef(true /* do flushDataBuffer by default */); } /** @@ -1830,1485 +2213,1717 @@ loop: * </pre> * <p>NOTE: the '&#' has already been read. */ - private void tryReadCharRef () - throws SAXException, IOException + private void tryReadCharRef() + throws SAXException, IOException { - int value = 0; - char c; - - if (tryRead ('x')) { + int value = 0; + char c; + + if (tryRead('x')) + { loop1: - while (true) { - c = readCh (); - int n; - switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - n = c - '0'; - break; - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - n = (c - 'a') + 10; - break; - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - n = (c - 'A') + 10; - break; - case ';': - break loop1; - default: - error ("illegal character in character reference", c, null); - break loop1; - } - value *= 16; - value += n; - } - } else { + while (true) + { + c = readCh(); + if (c == ';') + { + break loop1; + } + else + { + int n = Character.digit(c, 16); + if (n == -1) + { + error("illegal character in character reference", c, null); + break loop1; + } + value *= 16; + value += n; + } + } + } + else + { loop2: - while (true) { - c = readCh (); - switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - value *= 10; - value += c - '0'; - break; - case ';': - break loop2; - default: - error ("illegal character in character reference", c, null); - break loop2; - } - } - } - - // check for character refs being legal XML - if ((value < 0x0020 - && ! (value == '\n' || value == '\t' || value == '\r')) - || (value >= 0xD800 && value <= 0xDFFF) - || value == 0xFFFE || value == 0xFFFF - || value > 0x0010ffff) - error ("illegal XML character reference U+" - + Integer.toHexString (value)); - - // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz - // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: - if (value > 0x0010ffff) { - // too big for surrogate - error ("character reference " + value + " is too large for UTF-16", - new Integer (value).toString (), null); - } - + while (true) + { + c = readCh(); + if (c == ';') + { + break loop2; + } + else + { + int n = Character.digit(c, 10); + if (n == -1) + { + error("illegal character in character reference", c, null); + break loop2; + } + value *= 10; + value += n; + } + } + } + + // check for character refs being legal XML + if ((value < 0x0020 + && ! (value == '\n' || value == '\t' || value == '\r')) + || (value >= 0xD800 && value <= 0xDFFF) + || value == 0xFFFE || value == 0xFFFF + || value > 0x0010ffff) + { + error("illegal XML character reference U+" + + Integer.toHexString(value)); + } + + // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz + // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: + if (value > 0x0010ffff) + { + // too big for surrogate + error("character reference " + value + " is too large for UTF-16", + new Integer(value).toString(), null); + } + } - /** - * Read and interpret a character reference. - * <pre> - * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' - * </pre> - * <p>NOTE: the '&#' has already been read. - */ - private void parseCharRef (boolean doFlush) + /** + * Read and interpret a character reference. + * <pre> + * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' + * </pre> + * <p>NOTE: the '&#' has already been read. + */ + private void parseCharRef(boolean doFlush) throws SAXException, IOException - { - int value = 0; - char c; - - if (tryRead ('x')) { + { + int value = 0; + char c; + + if (tryRead('x')) + { loop1: - while (true) { - c = readCh (); - int n; - switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - n = c - '0'; - break; - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - n = (c - 'a') + 10; - break; - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - n = (c - 'A') + 10; - break; - case ';': - break loop1; - default: - error ("illegal character in character reference", c, null); - break loop1; - } - value *= 16; - value += n; - } - } else { + while (true) + { + c = readCh(); + if (c == ';') + { + break loop1; + } + else + { + int n = Character.digit(c, 16); + if (n == -1) + { + error("illegal character in character reference", c, null); + break loop1; + } + value *= 16; + value += n; + } + } + } + else + { loop2: - while (true) { - c = readCh (); - switch (c) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - value *= 10; - value += c - '0'; - break; - case ';': - break loop2; - default: - error ("illegal character in character reference", c, null); - break loop2; - } - } - } - - // check for character refs being legal XML - if ((value < 0x0020 - && ! (value == '\n' || value == '\t' || value == '\r')) - || (value >= 0xD800 && value <= 0xDFFF) - || value == 0xFFFE || value == 0xFFFF - || value > 0x0010ffff) - error ("illegal XML character reference U+" - + Integer.toHexString (value)); - - // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz - // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: - if (value <= 0x0000ffff) { - // no surrogates needed - dataBufferAppend ((char) value); - } else if (value <= 0x0010ffff) { - value -= 0x10000; - // > 16 bits, surrogate needed - dataBufferAppend ((char) (0xd800 | (value >> 10))); - dataBufferAppend ((char) (0xdc00 | (value & 0x0003ff))); - } else { - // too big for surrogate - error ("character reference " + value + " is too large for UTF-16", - new Integer (value).toString (), null); - } - if (doFlush) dataBufferFlush (); - } - - - /** - * Parse and expand an entity reference. - * <pre> - * [68] EntityRef ::= '&' Name ';' - * </pre> - * <p>NOTE: the '&' has already been read. - * @param externalAllowed External entities are allowed here. - */ - private void parseEntityRef (boolean externalAllowed) + while (true) + { + c = readCh(); + if (c == ';') + { + break loop2; + } + else + { + int n = Character.digit(c, 10); + if (n == -1) + { + error("illegal character in character reference", c, null); + break loop2; + } + value *= 10; + value += c - '0'; + } + } + } + + // check for character refs being legal XML + if ((value < 0x0020 + && ! (value == '\n' || value == '\t' || value == '\r')) + || (value >= 0xD800 && value <= 0xDFFF) + || value == 0xFFFE || value == 0xFFFF + || value > 0x0010ffff) + { + error("illegal XML character reference U+" + + Integer.toHexString(value)); + } + + // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz + // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: + if (value <= 0x0000ffff) + { + // no surrogates needed + dataBufferAppend((char) value); + } + else if (value <= 0x0010ffff) + { + value -= 0x10000; + // > 16 bits, surrogate needed + dataBufferAppend((char) (0xd800 | (value >> 10))); + dataBufferAppend((char) (0xdc00 | (value & 0x0003ff))); + } + else + { + // too big for surrogate + error("character reference " + value + " is too large for UTF-16", + new Integer(value).toString(), null); + } + if (doFlush) + { + dataBufferFlush(); + } + } + + /** + * Parse and expand an entity reference. + * <pre> + * [68] EntityRef ::= '&' Name ';' + * </pre> + * <p>NOTE: the '&' has already been read. + * @param externalAllowed External entities are allowed here. + */ + private void parseEntityRef(boolean externalAllowed) throws SAXException, IOException - { - String name; - - name = readNmtoken (true); - require (';'); - switch (getEntityType (name)) { - case ENTITY_UNDECLARED: - // NOTE: XML REC describes amazingly convoluted handling for - // this case. Nothing as meaningful as being a WFness error - // unless the processor might _legitimately_ not have seen a - // declaration ... which is what this implements. - String message; - - message = "reference to undeclared general entity " + name; - if (skippedPE && !docIsStandalone) { - handler.verror (message); - // we don't know this entity, and it might be external... - if (externalAllowed) - handler.skippedEntity (name); - } else - error (message); - break; - case ENTITY_INTERNAL: - pushString (name, getEntityValue (name)); - - //workaround for possible input pop before marking - //the buffer reading position - char t = readCh (); - unread (t); - int bufferPosMark = readBufferPos; - - int end = readBufferPos + getEntityValue (name).length(); - for(int k = readBufferPos; k < end; k++){ - t = readCh (); - if (t == '&'){ - t = readCh (); - if (t == '#'){ - //try to match a character ref - tryReadCharRef (); - - //everything has been read - if (readBufferPos >= end) - break; - k = readBufferPos; - continue; - } - else if (Character.isLetter(t)){ - //looks like an entity ref - unread (t); - readNmtoken (true); - require (';'); - - //everything has been read - if (readBufferPos >= end) - break; - k = readBufferPos; - continue; - } - error(" malformed entity reference"); - } - + { + String name; + + name = readNmtoken(true); + require(';'); + switch (getEntityType(name)) + { + case ENTITY_UNDECLARED: + // NOTE: XML REC describes amazingly convoluted handling for + // this case. Nothing as meaningful as being a WFness error + // unless the processor might _legitimately_ not have seen a + // declaration ... which is what this implements. + String message; + + message = "reference to undeclared general entity " + name; + if (skippedPE && !docIsStandalone) + { + handler.verror(message); + // we don't know this entity, and it might be external... + if (externalAllowed) + { + handler.skippedEntity(name); + } + } + else + { + error(message); + } + break; + case ENTITY_INTERNAL: + pushString(name, getEntityValue(name)); + + //workaround for possible input pop before marking + //the buffer reading position + char t = readCh(); + unread(t); + int bufferPosMark = readBufferPos; + + int end = readBufferPos + getEntityValue(name).length(); + for (int k = readBufferPos; k < end; k++) + { + t = readCh(); + if (t == '&') + { + t = readCh(); + if (t == '#') + { + //try to match a character ref + tryReadCharRef(); + + //everything has been read + if (readBufferPos >= end) + { + break; + } + k = readBufferPos; + continue; + } + else if (Character.isLetter(t)) + { + //looks like an entity ref + unread(t); + readNmtoken(true); + require(';'); + + //everything has been read + if (readBufferPos >= end) + { + break; + } + k = readBufferPos; + continue; + } + error(" malformed entity reference"); + } + } - readBufferPos = bufferPosMark; - break; - case ENTITY_TEXT: - if (externalAllowed) { - pushURL (false, name, getEntityIds (name), - null, null, null, true); - } else { - error ("reference to external entity in attribute value.", - name, null); - } - break; - case ENTITY_NDATA: - if (externalAllowed) { - error ("unparsed entity reference in content", name, null); - } else { - error ("reference to external entity in attribute value.", - name, null); - } - break; - default: - throw new RuntimeException (); - } - } - - - /** - * Parse and expand a parameter entity reference. - * <pre> - * [69] PEReference ::= '%' Name ';' - * </pre> - * <p>NOTE: the '%' has already been read. - */ - private void parsePEReference () + readBufferPos = bufferPosMark; + break; + case ENTITY_TEXT: + if (externalAllowed) + { + pushURL(false, name, getEntityIds(name), + null, null, null, true); + } + else + { + error("reference to external entity in attribute value.", + name, null); + } + break; + case ENTITY_NDATA: + if (externalAllowed) + { + error("unparsed entity reference in content", name, null); + } + else + { + error("reference to external entity in attribute value.", + name, null); + } + break; + default: + throw new RuntimeException(); + } + } + + /** + * Parse and expand a parameter entity reference. + * <pre> + * [69] PEReference ::= '%' Name ';' + * </pre> + * <p>NOTE: the '%' has already been read. + */ + private void parsePEReference() throws SAXException, IOException - { - String name; - - name = "%" + readNmtoken (true); - require (';'); - switch (getEntityType (name)) { - case ENTITY_UNDECLARED: - // VC: Entity Declared - handler.verror ("reference to undeclared parameter entity " + name); - - // we should disable handling of all subsequent declarations - // unless this is a standalone document (info discarded) - break; - case ENTITY_INTERNAL: - if (inLiteral) - pushString (name, getEntityValue (name)); - else - pushString (name, ' ' + getEntityValue (name) + ' '); - break; - case ENTITY_TEXT: - if (!inLiteral) - pushString (null, " "); - pushURL (true, name, getEntityIds (name), null, null, null, true); - if (!inLiteral) - pushString (null, " "); - break; - } - } - - /** - * Parse an entity declaration. - * <pre> - * [70] EntityDecl ::= GEDecl | PEDecl - * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' - * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' - * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) - * [74] PEDef ::= EntityValue | ExternalID - * [75] ExternalID ::= 'SYSTEM' S SystemLiteral - * | 'PUBLIC' S PubidLiteral S SystemLiteral - * [76] NDataDecl ::= S 'NDATA' S Name - * </pre> - * <p>NOTE: the '<!ENTITY' has already been read. - */ - private void parseEntityDecl () + { + String name; + + name = "%" + readNmtoken(true); + require(';'); + switch (getEntityType(name)) + { + case ENTITY_UNDECLARED: + // VC: Entity Declared + handler.verror("reference to undeclared parameter entity " + name); + + // we should disable handling of all subsequent declarations + // unless this is a standalone document (info discarded) + break; + case ENTITY_INTERNAL: + if (inLiteral) + { + pushString(name, getEntityValue(name)); + } + else + { + pushString(name, ' ' + getEntityValue(name) + ' '); + } + break; + case ENTITY_TEXT: + if (!inLiteral) + { + pushString(null, " "); + } + pushURL(true, name, getEntityIds(name), null, null, null, true); + if (!inLiteral) + { + pushString(null, " "); + } + break; + } + } + + /** + * Parse an entity declaration. + * <pre> + * [70] EntityDecl ::= GEDecl | PEDecl + * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' + * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' + * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) + * [74] PEDef ::= EntityValue | ExternalID + * [75] ExternalID ::= 'SYSTEM' S SystemLiteral + * | 'PUBLIC' S PubidLiteral S SystemLiteral + * [76] NDataDecl ::= S 'NDATA' S Name + * </pre> + * <p>NOTE: the '<!ENTITY' has already been read. + */ + private void parseEntityDecl() throws Exception - { - boolean peFlag = false; - int flags = 0; - - // Check for a parameter entity. - expandPE = false; - requireWhitespace (); - if (tryRead ('%')) { - peFlag = true; - requireWhitespace (); - } - expandPE = true; - - // Read the entity name, and prepend - // '%' if necessary. - String name = readNmtoken (true); - //NE08 - if (name.indexOf(':') >= 0) - error ("Illegal character(':') in entity name ", name, null); - if (peFlag) { - name = "%" + name; - } - - // Read the entity value. - requireWhitespace (); - char c = readCh (); - unread (c); - if (c == '"' || c == '\'') { - // Internal entity ... replacement text has expanded refs - // to characters and PEs, but not to general entities - String value = readLiteral (flags); - setInternalEntity (name, value); - } else { - // Read the external IDs - String ids [] = readExternalIds (false, false); - - // Check for NDATA declaration. - boolean white = tryWhitespace (); - if (!peFlag && tryRead ("NDATA")) { - if (!white) - error ("whitespace required before NDATA"); - requireWhitespace (); - String notationName = readNmtoken (true); - if (!skippedPE) { - setExternalEntity (name, ENTITY_NDATA, ids, notationName); - handler.unparsedEntityDecl (name, ids, notationName); - } - } else if (!skippedPE) { - setExternalEntity (name, ENTITY_TEXT, ids, null); - handler.getDeclHandler () - .externalEntityDecl (name, ids [0], - handler.resolveURIs () - // FIXME: ASSUMES not skipped - // "false" forces error on bad URI - ? handler.absolutize (ids [2], ids [1], false) - : ids [1]); - } - } - - // Finish the declaration. - skipWhitespace (); - require ('>'); - } + { + boolean peFlag = false; + int flags = 0; + + // Check for a parameter entity. + expandPE = false; + requireWhitespace(); + if (tryRead('%')) + { + peFlag = true; + requireWhitespace(); + } + expandPE = true; + + // Read the entity name, and prepend + // '%' if necessary. + String name = readNmtoken(true); + //NE08 + if (name.indexOf(':') >= 0) + { + error("Illegal character(':') in entity name ", name, null); + } + if (peFlag) + { + name = "%" + name; + } + // Read the entity value. + requireWhitespace(); + char c = readCh(); + unread (c); + if (c == '"' || c == '\'') + { + // Internal entity ... replacement text has expanded refs + // to characters and PEs, but not to general entities + String value = readLiteral(flags); + setInternalEntity(name, value); + } + else + { + // Read the external IDs + ExternalIdentifiers ids = readExternalIds(false, false); + + // Check for NDATA declaration. + boolean white = tryWhitespace(); + if (!peFlag && tryRead("NDATA")) + { + if (!white) + { + error("whitespace required before NDATA"); + } + requireWhitespace(); + String notationName = readNmtoken(true); + if (!skippedPE) + { + setExternalEntity(name, ENTITY_NDATA, ids, notationName); + handler.unparsedEntityDecl(name, ids.publicId, ids.systemId, + ids.baseUri, notationName); + } + } + else if (!skippedPE) + { + setExternalEntity(name, ENTITY_TEXT, ids, null); + handler.getDeclHandler() + .externalEntityDecl(name, ids.publicId, + handler.resolveURIs() + // FIXME: ASSUMES not skipped + // "false" forces error on bad URI + ? handler.absolutize(ids.baseUri, + ids.systemId, + false) + : ids.systemId); + } + } + + // Finish the declaration. + skipWhitespace(); + require('>'); + } - /** - * Parse a notation declaration. - * <pre> - * [82] NotationDecl ::= '<!NOTATION' S Name S - * (ExternalID | PublicID) S? '>' - * [83] PublicID ::= 'PUBLIC' S PubidLiteral - * </pre> - * <P>NOTE: the '<!NOTATION' has already been read. - */ - private void parseNotationDecl () + /** + * Parse a notation declaration. + * <pre> + * [82] NotationDecl ::= '<!NOTATION' S Name S + * (ExternalID | PublicID) S? '>' + * [83] PublicID ::= 'PUBLIC' S PubidLiteral + * </pre> + * <P>NOTE: the '<!NOTATION' has already been read. + */ + private void parseNotationDecl() throws Exception - { - String nname, ids[]; - - - requireWhitespace (); - nname = readNmtoken (true); - //NE08 - if (nname.indexOf(':') >= 0) - error ("Illegal character(':') in notation name ", nname, null); - requireWhitespace (); - - // Read the external identifiers. - ids = readExternalIds (true, false); - - // Register the notation. - setNotation (nname, ids); - - skipWhitespace (); - require ('>'); - } + { + String nname; + ExternalIdentifiers ids; + + requireWhitespace(); + nname = readNmtoken(true); + //NE08 + if (nname.indexOf(':') >= 0) + { + error("Illegal character(':') in notation name ", nname, null); + } + requireWhitespace(); + // Read the external identifiers. + ids = readExternalIds(true, false); - /** - * Parse character data. - * <pre> - * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) - * </pre> - */ - private void parseCharData () + // Register the notation. + setNotation(nname, ids); + + skipWhitespace(); + require('>'); + } + + /** + * Parse character data. + * <pre> + * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) + * </pre> + */ + private void parseCharData() throws Exception - { - char c; - int state = 0; - boolean pureWhite = false; - - // assert (dataBufferPos == 0); - - // are we expecting pure whitespace? it might be dirty... - if ((currentElementContent == CONTENT_ELEMENTS) && !isDirtyCurrentElement) - pureWhite = true; + { + char c; + int state = 0; + boolean pureWhite = false; - // always report right out of readBuffer - // to minimize (pointless) buffer copies - while (true) { - int lineAugment = 0; - int columnAugment = 0; - int i; + // assert (dataBufferPos == 0); + + // are we expecting pure whitespace? it might be dirty... + if ((currentElementContent == CONTENT_ELEMENTS) && !isDirtyCurrentElement) + { + pureWhite = true; + } + // always report right out of readBuffer + // to minimize (pointless) buffer copies + while (true) + { + int lineAugment = 0; + int columnAugment = 0; + int i; + loop: - for (i = readBufferPos; i < readBufferLength; i++) { - switch (c = readBuffer [i]) { - case '\n': - lineAugment++; - columnAugment = 0; - // pureWhite unmodified - break; - case '\r': // should not happen!! - case '\t': - case ' ': - // pureWhite unmodified - columnAugment++; - break; - case '&': - case '<': - columnAugment++; - // pureWhite unmodified - // CLEAN end of text sequence - state = 1; - break loop; - case ']': - // that's not a whitespace char, and - // can not terminate pure whitespace either - pureWhite = false; - if ((i + 2) < readBufferLength) { - if (readBuffer [i + 1] == ']' - && readBuffer [i + 2] == '>') { - // ERROR end of text sequence - state = 2; - break loop; - } - } else { - // FIXME missing two end-of-buffer cases - } - columnAugment++; - break; - default: - if ((c < 0x0020 || c > 0xFFFD) - || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085) - && xmlVersion == XML_11)) - error ("illegal XML character U+" - + Integer.toHexString (c)); - // that's not a whitespace char - pureWhite = false; - columnAugment++; - } - } - - // report text thus far - if (lineAugment > 0) { - line += lineAugment; - column = columnAugment; - } else { - column += columnAugment; - } - - // report characters/whitspace - int length = i - readBufferPos; - - if (length != 0) { - if (pureWhite) - handler.ignorableWhitespace (readBuffer, - readBufferPos, length); - else - handler.charData (readBuffer, readBufferPos, length); - readBufferPos = i; - } - - if (state != 0) - break; - - // fill next buffer from this entity, or - // pop stack and continue with previous entity - unread (readCh ()); - } - if (!pureWhite) - isDirtyCurrentElement = true; - // finish, maybe with error - if (state != 1) // finish, no error - error ("character data may not contain ']]>'"); - } - - - ////////////////////////////////////////////////////////////////////// - // High-level reading and scanning methods. - ////////////////////////////////////////////////////////////////////// - - /** - * Require whitespace characters. - */ - private void requireWhitespace () + for (i = readBufferPos; i < readBufferLength; i++) + { + switch (c = readBuffer[i]) + { + case '\n': + lineAugment++; + columnAugment = 0; + // pureWhite unmodified + break; + case '\r': // should not happen!! + case '\t': + case ' ': + // pureWhite unmodified + columnAugment++; + break; + case '&': + case '<': + columnAugment++; + // pureWhite unmodified + // CLEAN end of text sequence + state = 1; + break loop; + case ']': + // that's not a whitespace char, and + // can not terminate pure whitespace either + pureWhite = false; + if ((i + 2) < readBufferLength) + { + if (readBuffer [i + 1] == ']' + && readBuffer [i + 2] == '>') + { + // ERROR end of text sequence + state = 2; + break loop; + } + } + else + { + // FIXME missing two end-of-buffer cases + } + columnAugment++; + break; + default: + if ((c < 0x0020 || c > 0xFFFD) + || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085) + && xmlVersion == XML_11)) + { + error("illegal XML character U+" + + Integer.toHexString(c)); + } + // that's not a whitespace char + pureWhite = false; + columnAugment++; + } + } + + // report text thus far + if (lineAugment > 0) + { + line += lineAugment; + column = columnAugment; + } + else + { + column += columnAugment; + } + + // report characters/whitspace + int length = i - readBufferPos; + + if (length != 0) + { + if (pureWhite) + { + handler.ignorableWhitespace(readBuffer, + readBufferPos, length); + } + else + { + handler.charData(readBuffer, readBufferPos, length); + } + readBufferPos = i; + } + + if (state != 0) + { + break; + } + + // fill next buffer from this entity, or + // pop stack and continue with previous entity + unread(readCh()); + } + if (!pureWhite) + { + isDirtyCurrentElement = true; + } + // finish, maybe with error + if (state != 1) // finish, no error + { + error("character data may not contain ']]>'"); + } + } + + ////////////////////////////////////////////////////////////////////// + // High-level reading and scanning methods. + ////////////////////////////////////////////////////////////////////// + + /** + * Require whitespace characters. + */ + private void requireWhitespace() throws SAXException, IOException - { - char c = readCh (); - if (isWhitespace (c)) { - skipWhitespace (); - } else { - error ("whitespace required", c, null); - } - } - + { + char c = readCh(); + if (isWhitespace(c)) + { + skipWhitespace(); + } + else + { + error("whitespace required", c, null); + } + } - /** - * Skip whitespace characters. - * <pre> - * [3] S ::= (#x20 | #x9 | #xd | #xa)+ - * </pre> - */ - private void skipWhitespace () + /** + * Skip whitespace characters. + * <pre> + * [3] S ::= (#x20 | #x9 | #xd | #xa)+ + * </pre> + */ + private void skipWhitespace() throws SAXException, IOException - { - // Start with a little cheat. Most of - // the time, the white space will fall - // within the current read buffer; if - // not, then fall through. - if (USE_CHEATS) { - int lineAugment = 0; - int columnAugment = 0; - + { + // Start with a little cheat. Most of + // the time, the white space will fall + // within the current read buffer; if + // not, then fall through. + if (USE_CHEATS) + { + int lineAugment = 0; + int columnAugment = 0; + loop: - for (int i = readBufferPos; i < readBufferLength; i++) { - switch (readBuffer [i]) { - case ' ': - case '\t': - case '\r': - columnAugment++; - break; - case '\n': - lineAugment++; - columnAugment = 0; - break; - case '%': - if (expandPE) - break loop; - // else fall through... - default: - readBufferPos = i; - if (lineAugment > 0) { - line += lineAugment; - column = columnAugment; - } else { - column += columnAugment; - } - return; - } - } - } - - // OK, do it the slow way. - char c = readCh (); - while (isWhitespace (c)) { - c = readCh (); - } - unread (c); - } - - - /** - * Read a name or (when parsing an enumeration) name token. - * <pre> - * [5] Name ::= (Letter | '_' | ':') (NameChar)* - * [7] Nmtoken ::= (NameChar)+ - * </pre> - */ - private String readNmtoken (boolean isName) + for (int i = readBufferPos; i < readBufferLength; i++) + { + switch (readBuffer[i]) + { + case ' ': + case '\t': + case '\r': + columnAugment++; + break; + case '\n': + lineAugment++; + columnAugment = 0; + break; + case '%': + if (expandPE) + { + break loop; + } + // else fall through... + default: + readBufferPos = i; + if (lineAugment > 0) + { + line += lineAugment; + column = columnAugment; + } + else + { + column += columnAugment; + } + return; + } + } + } + + // OK, do it the slow way. + char c = readCh (); + while (isWhitespace(c)) + { + c = readCh(); + } + unread(c); + } + + /** + * Read a name or (when parsing an enumeration) name token. + * <pre> + * [5] Name ::= (Letter | '_' | ':') (NameChar)* + * [7] Nmtoken ::= (NameChar)+ + * </pre> + */ + private String readNmtoken(boolean isName) throws SAXException, IOException - { - char c; - - if (USE_CHEATS) { -loop: - for (int i = readBufferPos; i < readBufferLength; i++) { - c = readBuffer [i]; - switch (c) { - case '%': - if (expandPE) - break loop; - // else fall through... - - // What may legitimately come AFTER a name/nmtoken? - case '<': case '>': case '&': - case ',': case '|': case '*': case '+': case '?': - case ')': - case '=': - case '\'': case '"': - case '[': - case ' ': case '\t': case '\r': case '\n': - case ';': - case '/': - int start = readBufferPos; - if (i == start) - error ("name expected", readBuffer [i], null); - readBufferPos = i; - return intern (readBuffer, start, i - start); - - default: -// FIXME ... per IBM's OASIS test submission, these: -// ? U+06dd -// Combining U+309B - //these switches are kind of ugly but at least we won't - //have to go over the whole lits for each char - if (isName && i == readBufferPos){ - char c2 = (char) (c & 0x00f0); - switch (c & 0xff00){ - //starting with 01 - case 0x0100: - switch (c2){ - case 0x0030: - if (c == 0x0132 || c == 0x0133 || c == 0x013f) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x0040: - if (c == 0x0140 || c == 0x0149) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x00c0: - if (c == 0x01c4 || c == 0x01cc) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x00f0: - if (c == 0x01f1 || c == 0x01f3) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x00b0: - if (c == 0x01f1 || c == 0x01f3) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - default: - if (c == 0x017f) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - } - - break; - //starting with 11 - case 0x1100: - switch (c2){ - case 0x0000: - if (c == 0x1104 || c == 0x1108 || - c == 0x110a || c == 0x110d) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x0030: - if (c == 0x113b || c == 0x113f) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x0040: - if (c == 0x1141 || c == 0x114d - || c == 0x114f ) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x0050: - if (c == 0x1151 || c == 0x1156) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x0060: - if (c == 0x1162 || c == 0x1164 - || c == 0x1166 || c == 0x116b - || c == 0x116f) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - case 0x00b0: - if (c == 0x11b6 || c == 0x11b9 - || c == 0x11bb || c == 0x116f) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - break; - default: - if (c == 0x1174 || c == 0x119f - || c == 0x11ac || c == 0x11c3 - || c == 0x11f1) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - } - break; - default: - if (c == 0x0e46 || c == 0x1011 - || c == 0x212f || c == 0x0587 - || c == 0x0230 ) - error ("Not a name start character, U+" - + Integer.toHexString (c)); - } - } - // punt on exact tests from Appendix A; approximate - // them using the Unicode ID start/part rules - if (i == readBufferPos && isName) { - if (!Character.isUnicodeIdentifierStart (c) - && c != ':' && c != '_') - error ("Not a name start character, U+" - + Integer.toHexString (c)); - } else if (!Character.isUnicodeIdentifierPart (c) - && c != '-' && c != ':' && c != '_' && c != '.' - && !isExtender (c)) - error ("Not a name character, U+" - + Integer.toHexString (c)); - } - } - } - - nameBufferPos = 0; - - // Read the first character. + { + char c; + + if (USE_CHEATS) + { loop: - while (true) { - c = readCh (); - switch (c) { - case '%': - case '<': case '>': case '&': - case ',': case '|': case '*': case '+': case '?': - case ')': - case '=': - case '\'': case '"': - case '[': - case ' ': case '\t': case '\n': case '\r': - case ';': - case '/': - unread (c); - if (nameBufferPos == 0) { - error ("name expected"); - } - // punt on exact tests from Appendix A, but approximate them - if (isName - && !Character.isUnicodeIdentifierStart ( - nameBuffer [0]) - && ":_".indexOf (nameBuffer [0]) == -1) - error ("Not a name start character, U+" - + Integer.toHexString (nameBuffer [0])); - String s = intern (nameBuffer, 0, nameBufferPos); - nameBufferPos = 0; - return s; - default: - // punt on exact tests from Appendix A, but approximate them - - if ((nameBufferPos != 0 || !isName) - && !Character.isUnicodeIdentifierPart (c) - && ":-_.".indexOf (c) == -1 - && !isExtender (c)) - error ("Not a name character, U+" - + Integer.toHexString (c)); - if (nameBufferPos >= nameBuffer.length) - nameBuffer = - (char[]) extendArray (nameBuffer, - nameBuffer.length, nameBufferPos); - nameBuffer [nameBufferPos++] = c; - } - } - } - - private static boolean isExtender (char c) - { - // [88] Extender ::= ... - return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387 - || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005 - || (c >= 0x3031 && c <= 0x3035) - || (c >= 0x309d && c <= 0x309e) - || (c >= 0x30fc && c <= 0x30fe); - } + for (int i = readBufferPos; i < readBufferLength; i++) + { + c = readBuffer[i]; + switch (c) + { + case '%': + if (expandPE) + { + break loop; + } + // else fall through... + + // What may legitimately come AFTER a name/nmtoken? + case '<': case '>': case '&': + case ',': case '|': case '*': case '+': case '?': + case ')': + case '=': + case '\'': case '"': + case '[': + case ' ': case '\t': case '\r': case '\n': + case ';': + case '/': + int start = readBufferPos; + if (i == start) + { + error("name expected", readBuffer[i], null); + } + readBufferPos = i; + return intern(readBuffer, start, i - start); + + default: + // FIXME ... per IBM's OASIS test submission, these: + // ? U+06dd + // Combining U+309B + //these switches are kind of ugly but at least we won't + //have to go over the whole lits for each char + if (isName && i == readBufferPos) + { + char c2 = (char) (c & 0x00f0); + switch (c & 0xff00) + { + //starting with 01 + case 0x0100: + switch (c2) + { + case 0x0030: + if (c == 0x0132 || c == 0x0133 || c == 0x013f) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x0040: + if (c == 0x0140 || c == 0x0149) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x00c0: + if (c == 0x01c4 || c == 0x01cc) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x00f0: + if (c == 0x01f1 || c == 0x01f3) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x00b0: + if (c == 0x01f1 || c == 0x01f3) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + default: + if (c == 0x017f) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + } + + break; + //starting with 11 + case 0x1100: + switch (c2) + { + case 0x0000: + if (c == 0x1104 || c == 0x1108 || + c == 0x110a || c == 0x110d) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x0030: + if (c == 0x113b || c == 0x113f) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x0040: + if (c == 0x1141 || c == 0x114d + || c == 0x114f ) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x0050: + if (c == 0x1151 || c == 0x1156) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x0060: + if (c == 0x1162 || c == 0x1164 + || c == 0x1166 || c == 0x116b + || c == 0x116f) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + case 0x00b0: + if (c == 0x11b6 || c == 0x11b9 + || c == 0x11bb || c == 0x116f) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + break; + default: + if (c == 0x1174 || c == 0x119f + || c == 0x11ac || c == 0x11c3 + || c == 0x11f1) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + } + break; + default: + if (c == 0x0e46 || c == 0x1011 + || c == 0x212f || c == 0x0587 + || c == 0x0230 ) + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + } + } + // punt on exact tests from Appendix A; approximate + // them using the Unicode ID start/part rules + if (i == readBufferPos && isName) + { + if (!Character.isUnicodeIdentifierStart(c) + && c != ':' && c != '_') + { + error("Not a name start character, U+" + + Integer.toHexString(c)); + } + } + else if (!Character.isUnicodeIdentifierPart(c) + && c != '-' && c != ':' && c != '_' && c != '.' + && !isExtender(c)) + { + error("Not a name character, U+" + + Integer.toHexString(c)); + } + } + } + } + + nameBufferPos = 0; + // Read the first character. +loop: + while (true) + { + c = readCh(); + switch (c) + { + case '%': + case '<': case '>': case '&': + case ',': case '|': case '*': case '+': case '?': + case ')': + case '=': + case '\'': case '"': + case '[': + case ' ': case '\t': case '\n': case '\r': + case ';': + case '/': + unread(c); + if (nameBufferPos == 0) + { + error ("name expected"); + } + // punt on exact tests from Appendix A, but approximate them + if (isName + && !Character.isUnicodeIdentifierStart(nameBuffer[0]) + && ":_".indexOf(nameBuffer[0]) == -1) + { + error("Not a name start character, U+" + + Integer.toHexString(nameBuffer[0])); + } + String s = intern(nameBuffer, 0, nameBufferPos); + nameBufferPos = 0; + return s; + default: + // punt on exact tests from Appendix A, but approximate them + + if ((nameBufferPos != 0 || !isName) + && !Character.isUnicodeIdentifierPart(c) + && ":-_.".indexOf(c) == -1 + && !isExtender(c)) + { + error("Not a name character, U+" + + Integer.toHexString(c)); + } + if (nameBufferPos >= nameBuffer.length) + { + nameBuffer = + (char[]) extendArray(nameBuffer, + nameBuffer.length, nameBufferPos); + } + nameBuffer[nameBufferPos++] = c; + } + } + } + + private static boolean isExtender(char c) + { + // [88] Extender ::= ... + return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387 + || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005 + || (c >= 0x3031 && c <= 0x3035) + || (c >= 0x309d && c <= 0x309e) + || (c >= 0x30fc && c <= 0x30fe); + } - /** - * Read a literal. With matching single or double quotes as - * delimiters (and not embedded!) this is used to parse: - * <pre> - * [9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ... - * [10] AttValue ::= ... ([^<&] | Reference)* ... - * [11] SystemLiteral ::= ... (URLchar - "'")* ... - * [12] PubidLiteral ::= ... (PubidChar - "'")* ... - * </pre> - * as well as the quoted strings in XML and text declarations - * (for version, encoding, and standalone) which have their - * own constraints. - */ - private String readLiteral (int flags) + /** + * Read a literal. With matching single or double quotes as + * delimiters (and not embedded!) this is used to parse: + * <pre> + * [9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ... + * [10] AttValue ::= ... ([^<&] | Reference)* ... + * [11] SystemLiteral ::= ... (URLchar - "'")* ... + * [12] PubidLiteral ::= ... (PubidChar - "'")* ... + * </pre> + * as well as the quoted strings in XML and text declarations + * (for version, encoding, and standalone) which have their + * own constraints. + */ + private String readLiteral(int flags) throws SAXException, IOException - { - char delim, c; - int startLine = line; - boolean saved = expandPE; - boolean savedReport = doReport; - - // Find the first delimiter. - delim = readCh (); - if (delim != '"' && delim != '\'') { - error ("expected '\"' or \"'\"", delim, null); - return null; - } - inLiteral = true; - if ((flags & LIT_DISABLE_PE) != 0) - expandPE = false; - doReport = false; - - // Each level of input source has its own buffer; remember - // ours, so we won't read the ending delimiter from any - // other input source, regardless of entity processing. - char ourBuf [] = readBuffer; - - // Read the literal. - try { - c = readCh (); - boolean ampRead = false; + { + char delim, c; + int startLine = line; + boolean saved = expandPE; + boolean savedReport = doReport; + + // Find the first delimiter. + delim = readCh(); + if (delim != '"' && delim != '\'') + { + error("expected '\"' or \"'\"", delim, null); + return null; + } + inLiteral = true; + if ((flags & LIT_DISABLE_PE) != 0) + { + expandPE = false; + } + doReport = false; + + // Each level of input source has its own buffer; remember + // ours, so we won't read the ending delimiter from any + // other input source, regardless of entity processing. + char[] ourBuf = readBuffer; + + // Read the literal. + try + { + c = readCh(); + boolean ampRead = false; loop: - while (! (c == delim && readBuffer == ourBuf)) { - switch (c) { - // attributes and public ids are normalized - // in almost the same ways - case '\n': - case '\r': - if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) - c = ' '; - break; - case '\t': - if ((flags & LIT_ATTRIBUTE) != 0) - c = ' '; - break; - case '&': - c = readCh (); - // Char refs are expanded immediately, except for - // all the cases where it's deferred. - if (c == '#') { - if ((flags & LIT_DISABLE_CREF) != 0) { - dataBufferAppend ('&'); - break; - } - parseCharRef (false /* Do not do flushDataBuffer */); - - // exotic WFness risk: this is an entity literal, - // dataBuffer [dataBufferPos - 1] == '&', and - // following chars are a _partial_ entity/char ref - - // It looks like an entity ref ... - } else { - unread (c); - // Expand it? - if ((flags & LIT_ENTITY_REF) > 0) { - parseEntityRef (false); - if (String.valueOf (readBuffer).equals("&")) - ampRead = true; + while (! (c == delim && readBuffer == ourBuf)) + { + switch (c) + { + // attributes and public ids are normalized + // in almost the same ways + case '\n': + case '\r': + if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) + { + c = ' '; + } + break; + case '\t': + if ((flags & LIT_ATTRIBUTE) != 0) + { + c = ' '; + } + break; + case '&': + c = readCh(); + // Char refs are expanded immediately, except for + // all the cases where it's deferred. + if (c == '#') + { + if ((flags & LIT_DISABLE_CREF) != 0) + { + dataBufferAppend('&'); + break; + } + parseCharRef(false /* Do not do flushDataBuffer */); + + // exotic WFness risk: this is an entity literal, + // dataBuffer [dataBufferPos - 1] == '&', and + // following chars are a _partial_ entity/char ref + + // It looks like an entity ref ... + } + else + { + unread(c); + // Expand it? + if ((flags & LIT_ENTITY_REF) > 0) + { + parseEntityRef(false); + if (String.valueOf(readBuffer).equals("&")) + { + ampRead = true; + } //Is it just data? - } else if ((flags & LIT_DISABLE_EREF) != 0) { - dataBufferAppend ('&'); - - // OK, it will be an entity ref -- expanded later. - } else { - String name = readNmtoken (true); - require (';'); - dataBufferAppend ('&'); - dataBufferAppend (name); - dataBufferAppend (';'); - } - } - c = readCh (); - continue loop; - - case '<': - // and why? Perhaps so "&foo;" expands the same - // inside and outside an attribute? - if ((flags & LIT_ATTRIBUTE) != 0) - error ("attribute values may not contain '<'"); - break; - - // We don't worry about case '%' and PE refs, readCh does. - - default: - break; - } - dataBufferAppend (c); - c = readCh (); - } - } catch (EOFException e) { - error ("end of input while looking for delimiter (started on line " - + startLine + ')', null, new Character (delim).toString ()); - } - inLiteral = false; - expandPE = saved; - doReport = savedReport; - - // Normalise whitespace if necessary. - if ((flags & LIT_NORMALIZE) > 0) { - dataBufferNormalize (); - } - - // Return the value. - return dataBufferToString (); - } - - - /** - * Try reading external identifiers. - * A system identifier is not required for notations. - * @param inNotation Are we parsing a notation decl? - * @param isSubset Parsing external subset decl (may be omitted)? - * @return A three-member String array containing the identifiers, - * or nulls. Order: public, system, baseURI. - */ - private String[] readExternalIds (boolean inNotation, boolean isSubset) + } + else if ((flags & LIT_DISABLE_EREF) != 0) + { + dataBufferAppend('&'); + + // OK, it will be an entity ref -- expanded later. + } + else + { + String name = readNmtoken(true); + require(';'); + dataBufferAppend('&'); + dataBufferAppend(name); + dataBufferAppend(';'); + } + } + c = readCh(); + continue loop; + + case '<': + // and why? Perhaps so "&foo;" expands the same + // inside and outside an attribute? + if ((flags & LIT_ATTRIBUTE) != 0) + { + error("attribute values may not contain '<'"); + } + break; + + // We don't worry about case '%' and PE refs, readCh does. + + default: + break; + } + dataBufferAppend(c); + c = readCh(); + } + } + catch (EOFException e) + { + error("end of input while looking for delimiter (started on line " + + startLine + ')', null, new Character(delim).toString()); + } + inLiteral = false; + expandPE = saved; + doReport = savedReport; + + // Normalise whitespace if necessary. + if ((flags & LIT_NORMALIZE) > 0) + { + dataBufferNormalize(); + } + + // Return the value. + return dataBufferToString(); + } + + /** + * Try reading external identifiers. + * A system identifier is not required for notations. + * @param inNotation Are we parsing a notation decl? + * @param isSubset Parsing external subset decl (may be omitted)? + * @return A three-member String array containing the identifiers, + * or nulls. Order: public, system, baseURI. + */ + private ExternalIdentifiers readExternalIds(boolean inNotation, + boolean isSubset) throws Exception - { - char c; - String ids[] = new String [3]; - int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; - - if (tryRead ("PUBLIC")) { - requireWhitespace (); - ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags); - if (inNotation) { - skipWhitespace (); - c = readCh (); - unread (c); - if (c == '"' || c == '\'') { - ids [1] = readLiteral (flags); - } - } else { - requireWhitespace (); - ids [1] = readLiteral (flags); - } - - for (int i = 0; i < ids [0].length (); i++) { - c = ids [0].charAt (i); - if (c >= 'a' && c <= 'z') - continue; - if (c >= 'A' && c <= 'Z') - continue; - if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf (c) != -1) - continue; - error ("illegal PUBLIC id character U+" - + Integer.toHexString (c)); - } - } else if (tryRead ("SYSTEM")) { - requireWhitespace (); - ids [1] = readLiteral (flags); - } else if (!isSubset) - error ("missing SYSTEM or PUBLIC keyword"); - - if (ids [1] != null) { - if (ids [1].indexOf ('#') != -1) - handler.verror ("SYSTEM id has a URI fragment: " + ids [1]); - ids [2] = handler.getSystemId (); - if (ids [2] == null) - handler.warn ("No base URI; hope URI is absolute: " - + ids [1]); - } - - return ids; - } - - - /** - * Test if a character is whitespace. - * <pre> - * [3] S ::= (#x20 | #x9 | #xd | #xa)+ - * </pre> - * @param c The character to test. - * @return true if the character is whitespace. - */ - private final boolean isWhitespace (char c) - { - if (c > 0x20) - return false; - if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d) - return true; - return false; // illegal ... - } - - - ////////////////////////////////////////////////////////////////////// - // Utility routines. - ////////////////////////////////////////////////////////////////////// - - - /** - * Add a character to the data buffer. - */ - private void dataBufferAppend (char c) - { - // Expand buffer if necessary. - if (dataBufferPos >= dataBuffer.length) - dataBuffer = - (char[]) extendArray (dataBuffer, - dataBuffer.length, dataBufferPos); - dataBuffer [dataBufferPos++] = c; - } - - - /** - * Add a string to the data buffer. - */ - private void dataBufferAppend (String s) - { - dataBufferAppend (s.toCharArray (), 0, s.length ()); - } - + { + char c; + ExternalIdentifiers ids = new ExternalIdentifiers(); + int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; + + if (tryRead("PUBLIC")) + { + requireWhitespace(); + ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags); + if (inNotation) + { + skipWhitespace(); + c = readCh(); + unread(c); + if (c == '"' || c == '\'') + { + ids.systemId = readLiteral(flags); + } + } + else + { + requireWhitespace(); + ids.systemId = readLiteral(flags); + } + + for (int i = 0; i < ids.publicId.length(); i++) + { + c = ids.publicId.charAt(i); + if (c >= 'a' && c <= 'z') + { + continue; + } + if (c >= 'A' && c <= 'Z') + { + continue; + } + if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(c) != -1) + { + continue; + } + error("illegal PUBLIC id character U+" + + Integer.toHexString(c)); + } + } + else if (tryRead("SYSTEM")) + { + requireWhitespace(); + ids.systemId = readLiteral(flags); + } + else if (!isSubset) + { + error("missing SYSTEM or PUBLIC keyword"); + } + + if (ids.systemId != null) + { + if (ids.systemId.indexOf('#') != -1) + { + handler.verror("SYSTEM id has a URI fragment: " + ids.systemId); + } + ids.baseUri = handler.getSystemId(); + if (ids.baseUri == null && uriWarnings) + { + handler.warn("No base URI; hope URI is absolute: " + + ids.systemId); + } + } + + return ids; + } - /** - * Append (part of) a character array to the data buffer. - */ - private void dataBufferAppend (char ch[], int start, int length) - { - dataBuffer = (char[]) - extendArray (dataBuffer, dataBuffer.length, - dataBufferPos + length); + /** + * Test if a character is whitespace. + * <pre> + * [3] S ::= (#x20 | #x9 | #xd | #xa)+ + * </pre> + * @param c The character to test. + * @return true if the character is whitespace. + */ + private final boolean isWhitespace(char c) + { + if (c > 0x20) + { + return false; + } + if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d) + { + return true; + } + return false; // illegal ... + } - System.arraycopy (ch, start, dataBuffer, dataBufferPos, length); - dataBufferPos += length; - } + ////////////////////////////////////////////////////////////////////// + // Utility routines. + ////////////////////////////////////////////////////////////////////// + + /** + * Add a character to the data buffer. + */ + private void dataBufferAppend(char c) + { + // Expand buffer if necessary. + if (dataBufferPos >= dataBuffer.length) + { + dataBuffer = (char[]) extendArray(dataBuffer, + dataBuffer.length, dataBufferPos); + } + dataBuffer[dataBufferPos++] = c; + } + /** + * Add a string to the data buffer. + */ + private void dataBufferAppend(String s) + { + dataBufferAppend(s.toCharArray(), 0, s.length()); + } - /** - * Normalise space characters in the data buffer. - */ - private void dataBufferNormalize () - { - int i = 0; - int j = 0; - int end = dataBufferPos; - - // Skip spaces at the start. - while (j < end && dataBuffer [j] == ' ') { - j++; - } - - // Skip whitespace at the end. - while (end > j && dataBuffer [end - 1] == ' ') { - end --; - } - - // Start copying to the left. - while (j < end) { - - char c = dataBuffer [j++]; - - // Normalise all other spaces to - // a single space. - if (c == ' ') { - while (j < end && dataBuffer [j++] == ' ') - continue; - dataBuffer [i++] = ' '; - dataBuffer [i++] = dataBuffer [j - 1]; - } else { - dataBuffer [i++] = c; - } - } - - // The new length is <= the old one. - dataBufferPos = i; - } + /** + * Append (part of) a character array to the data buffer. + */ + private void dataBufferAppend(char[] ch, int start, int length) + { + dataBuffer = (char[]) extendArray(dataBuffer, dataBuffer.length, + dataBufferPos + length); + + System.arraycopy(ch, start, dataBuffer, dataBufferPos, length); + dataBufferPos += length; + } + /** + * Normalise space characters in the data buffer. + */ + private void dataBufferNormalize() + { + int i = 0; + int j = 0; + int end = dataBufferPos; + + // Skip spaces at the start. + while (j < end && dataBuffer[j] == ' ') + { + j++; + } + + // Skip whitespace at the end. + while (end > j && dataBuffer[end - 1] == ' ') + { + end --; + } - /** - * Convert the data buffer to a string. - */ - private String dataBufferToString () - { - String s = new String (dataBuffer, 0, dataBufferPos); - dataBufferPos = 0; - return s; - } + // Start copying to the left. + while (j < end) + { + + char c = dataBuffer[j++]; + + // Normalise all other spaces to + // a single space. + if (c == ' ') + { + while (j < end && dataBuffer[j++] == ' ') + { + continue; + } + dataBuffer[i++] = ' '; + dataBuffer[i++] = dataBuffer[j - 1]; + } + else + { + dataBuffer[i++] = c; + } + } + + // The new length is <= the old one. + dataBufferPos = i; + } + /** + * Convert the data buffer to a string. + */ + private String dataBufferToString() + { + String s = new String(dataBuffer, 0, dataBufferPos); + dataBufferPos = 0; + return s; + } - /** - * Flush the contents of the data buffer to the handler, as - * appropriate, and reset the buffer for new input. - */ - private void dataBufferFlush () + /** + * Flush the contents of the data buffer to the handler, as + * appropriate, and reset the buffer for new input. + */ + private void dataBufferFlush() throws SAXException - { - if (currentElementContent == CONTENT_ELEMENTS - && dataBufferPos > 0 - && !inCDATA - ) { - // We can't just trust the buffer to be whitespace, there - // are (error) cases when it isn't - for (int i = 0; i < dataBufferPos; i++) { - if (!isWhitespace (dataBuffer [i])) { - handler.charData (dataBuffer, 0, dataBufferPos); - dataBufferPos = 0; - } - } - if (dataBufferPos > 0) { - handler.ignorableWhitespace (dataBuffer, 0, dataBufferPos); - dataBufferPos = 0; - } - } else if (dataBufferPos > 0) { - handler.charData (dataBuffer, 0, dataBufferPos); - dataBufferPos = 0; - } - } - + { + if (currentElementContent == CONTENT_ELEMENTS + && dataBufferPos > 0 + && !inCDATA) + { + // We can't just trust the buffer to be whitespace, there + // are (error) cases when it isn't + for (int i = 0; i < dataBufferPos; i++) + { + if (!isWhitespace(dataBuffer[i])) + { + handler.charData(dataBuffer, 0, dataBufferPos); + dataBufferPos = 0; + } + } + if (dataBufferPos > 0) + { + handler.ignorableWhitespace(dataBuffer, 0, dataBufferPos); + dataBufferPos = 0; + } + } + else if (dataBufferPos > 0) + { + handler.charData(dataBuffer, 0, dataBufferPos); + dataBufferPos = 0; + } + } - /** - * Require a string to appear, or throw an exception. - * <p><em>Precondition:</em> Entity expansion is not required. - * <p><em>Precondition:</em> data buffer has no characters that - * will get sent to the application. - */ - private void require (String delim) + /** + * Require a string to appear, or throw an exception. + * <p><em>Precondition:</em> Entity expansion is not required. + * <p><em>Precondition:</em> data buffer has no characters that + * will get sent to the application. + */ + private void require(String delim) throws SAXException, IOException - { - int length = delim.length (); - char ch []; - - if (length < dataBuffer.length) { - ch = dataBuffer; - delim.getChars (0, length, ch, 0); - } else - ch = delim.toCharArray (); - - if (USE_CHEATS - && length <= (readBufferLength - readBufferPos)) { - int offset = readBufferPos; - - for (int i = 0; i < length; i++, offset++) - if (ch [i] != readBuffer [offset]) - error ("required string", null, delim); - readBufferPos = offset; - - } else { - for (int i = 0; i < length; i++) - require (ch [i]); - } - } - + { + int length = delim.length(); + char[] ch; + + if (length < dataBuffer.length) + { + ch = dataBuffer; + delim.getChars(0, length, ch, 0); + } + else + { + ch = delim.toCharArray(); + } + + if (USE_CHEATS && length <= (readBufferLength - readBufferPos)) + { + int offset = readBufferPos; + + for (int i = 0; i < length; i++, offset++) + { + if (ch[i] != readBuffer[offset]) + { + error ("required string", null, delim); + } + } + readBufferPos = offset; + + } + else + { + for (int i = 0; i < length; i++) + { + require(ch[i]); + } + } + } - /** - * Require a character to appear, or throw an exception. - */ - private void require (char delim) + /** + * Require a character to appear, or throw an exception. + */ + private void require(char delim) throws SAXException, IOException - { - char c = readCh (); - - if (c != delim) { - error ("required character", c, new Character (delim).toString ()); - } - } - - - /** - * Create an interned string from a character array. - * Ælfred uses this method to create an interned version - * of all names and name tokens, so that it can test equality - * with <code>==</code> instead of <code>String.equals ()</code>. - * - * <p>This is much more efficient than constructing a non-interned - * string first, and then interning it. - * - * @param ch an array of characters for building the string. - * @param start the starting position in the array. - * @param length the number of characters to place in the string. - * @return an interned string. - * @see #intern (String) - * @see java.lang.String#intern - */ - public String intern (char ch[], int start, int length) - { - int index = 0; - int hash = 0; - Object bucket []; - - // Generate a hash code. This is a widely used string hash, - // often attributed to Brian Kernighan. - for (int i = start; i < start + length; i++) - hash = 31 * hash + ch [i]; - hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH; - - // Get the bucket -- consists of {array,String} pairs - if ((bucket = symbolTable [hash]) == null) { - // first string in this bucket - bucket = new Object [8]; - - // Search for a matching tuple, and - // return the string if we find one. - } else { - while (index < bucket.length) { - char chFound [] = (char []) bucket [index]; - - // Stop when we hit an empty entry. - if (chFound == null) - break; - - // If they're the same length, check for a match. - if (chFound.length == length) { - for (int i = 0; i < chFound.length; i++) { - // continue search on failure - if (ch [start + i] != chFound [i]) { - break; - } else if (i == length - 1) { - // That's it, we have a match! - return (String) bucket [index + 1]; - } - } - } - index += 2; - } - // Not found -- we'll have to add it. - - // Do we have to grow the bucket? - bucket = (Object []) extendArray (bucket, bucket.length, index); - } - symbolTable [hash] = bucket; - - // OK, add it to the end of the bucket -- "local" interning. - // Intern "globally" to let applications share interning benefits. - // That is, "!=" and "==" work on our strings, not just equals(). - String s = new String (ch, start, length).intern (); - bucket [index] = s.toCharArray (); - bucket [index + 1] = s; - return s; - } - - /** - * Ensure the capacity of an array, allocating a new one if - * necessary. Usually extends only for name hash collisions. - */ - private Object extendArray (Object array, int currentSize, int requiredSize) - { - if (requiredSize < currentSize) { - return array; - } else { - Object newArray = null; - int newSize = currentSize * 2; - - if (newSize <= requiredSize) - newSize = requiredSize + 1; - - if (array instanceof char[]) - newArray = new char [newSize]; - else if (array instanceof Object[]) - newArray = new Object [newSize]; - else - throw new RuntimeException (); - - System.arraycopy (array, 0, newArray, 0, currentSize); - return newArray; - } - } - - - ////////////////////////////////////////////////////////////////////// - // XML query routines. - ////////////////////////////////////////////////////////////////////// - - - boolean isStandalone () { return docIsStandalone; } - - - // - // Elements - // - - private int getContentType (Object element [], int defaultType) - { - int retval; - - if (element == null) - return defaultType; - retval = ((Integer) element [0]).intValue (); - if (retval == CONTENT_UNDECLARED) - retval = defaultType; - return retval; - } - - - /** - * Look up the content type of an element. - * @param name The element type name. - * @return An integer constant representing the content type. - * @see #CONTENT_UNDECLARED - * @see #CONTENT_ANY - * @see #CONTENT_EMPTY - * @see #CONTENT_MIXED - * @see #CONTENT_ELEMENTS - */ - public int getElementContentType (String name) - { - Object element [] = (Object []) elementInfo.get (name); - return getContentType (element, CONTENT_UNDECLARED); - } - - - /** - * Register an element. - * Array format: - * [0] element type name - * [1] content model (mixed, elements only) - * [2] attribute hash table - */ - private void setElement ( - String name, - int contentType, - String contentModel, - Hashtable attributes - ) throws SAXException - { - if (skippedPE) - return; - - Object element [] = (Object []) elementInfo.get (name); - - // first <!ELEMENT ...> or <!ATTLIST ...> for this type? - if (element == null) { - element = new Object [3]; - element [0] = new Integer (contentType); - element [1] = contentModel; - element [2] = attributes; - elementInfo.put (name, element); - return; - } - - // <!ELEMENT ...> declaration? - if (contentType != CONTENT_UNDECLARED) { - // ... following an associated <!ATTLIST ...> - if (((Integer) element [0]).intValue () == CONTENT_UNDECLARED) { - element [0] = new Integer (contentType); - element [1] = contentModel; - } else - // VC: Unique Element Type Declaration - handler.verror ("multiple declarations for element type: " - + name); - } - - // first <!ATTLIST ...>, before <!ELEMENT ...> ? - else if (attributes != null) - element [2] = attributes; - } - - - /** - * Look up the attribute hash table for an element. - * The hash table is the second item in the element array. - */ - private Hashtable getElementAttributes (String name) - { - Object element[] = (Object[]) elementInfo.get (name); - if (element == null) - return null; - else - return (Hashtable) element [2]; - } - - - - // - // Attributes - // + { + char c = readCh(); + + if (c != delim) + { + error("required character", c, new Character(delim).toString()); + } + } + + /** + * Create an interned string from a character array. + * Ælfred uses this method to create an interned version + * of all names and name tokens, so that it can test equality + * with <code>==</code> instead of <code>String.equals ()</code>. + * + * <p>This is much more efficient than constructing a non-interned + * string first, and then interning it. + * + * @param ch an array of characters for building the string. + * @param start the starting position in the array. + * @param length the number of characters to place in the string. + * @return an interned string. + * @see #intern (String) + * @see java.lang.String#intern + */ + public String intern(char[] ch, int start, int length) + { + int index = 0; + int hash = 0; + Object[] bucket; + + // Generate a hash code. This is a widely used string hash, + // often attributed to Brian Kernighan. + for (int i = start; i < start + length; i++) + { + hash = 31 * hash + ch[i]; + } + hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH; + + // Get the bucket -- consists of {array,String} pairs + if ((bucket = symbolTable[hash]) == null) + { + // first string in this bucket + bucket = new Object[8]; + + // Search for a matching tuple, and + // return the string if we find one. + } + else + { + while (index < bucket.length) + { + char[] chFound = (char[]) bucket[index]; + + // Stop when we hit an empty entry. + if (chFound == null) + { + break; + } + + // If they're the same length, check for a match. + if (chFound.length == length) + { + for (int i = 0; i < chFound.length; i++) + { + // continue search on failure + if (ch[start + i] != chFound[i]) + { + break; + } + else if (i == length - 1) + { + // That's it, we have a match! + return (String) bucket[index + 1]; + } + } + } + index += 2; + } + // Not found -- we'll have to add it. + + // Do we have to grow the bucket? + bucket = (Object[]) extendArray(bucket, bucket.length, index); + } + symbolTable[hash] = bucket; + + // OK, add it to the end of the bucket -- "local" interning. + // Intern "globally" to let applications share interning benefits. + // That is, "!=" and "==" work on our strings, not just equals(). + String s = new String(ch, start, length).intern(); + bucket[index] = s.toCharArray(); + bucket[index + 1] = s; + return s; + } - /** - * Get the declared attributes for an element type. - * @param elname The name of the element type. - * @return An Enumeration of all the attributes declared for - * a specific element type. The results will be valid only - * after the DTD (if any) has been parsed. - * @see #getAttributeType - * @see #getAttributeEnumeration - * @see #getAttributeDefaultValueType - * @see #getAttributeDefaultValue - * @see #getAttributeExpandedValue - */ - private Enumeration declaredAttributes (Object element []) - { - Hashtable attlist; + /** + * Ensure the capacity of an array, allocating a new one if + * necessary. Usually extends only for name hash collisions. + */ + private Object extendArray(Object array, int currentSize, int requiredSize) + { + if (requiredSize < currentSize) + { + return array; + } + else + { + Object newArray = null; + int newSize = currentSize * 2; + + if (newSize <= requiredSize) + { + newSize = requiredSize + 1; + } + + if (array instanceof char[]) + { + newArray = new char[newSize]; + } + else if (array instanceof Object[]) + { + newArray = new Object[newSize]; + } + else + { + throw new RuntimeException(); + } + + System.arraycopy(array, 0, newArray, 0, currentSize); + return newArray; + } + } - if (element == null) - return null; - if ((attlist = (Hashtable) element [2]) == null) - return null; - return attlist.keys (); - } + ////////////////////////////////////////////////////////////////////// + // XML query routines. + ////////////////////////////////////////////////////////////////////// + + boolean isStandalone() + { + return docIsStandalone; + } + + // + // Elements + // + + private int getContentType(ElementDecl element, int defaultType) + { + int retval; + + if (element == null) + { + return defaultType; + } + retval = element.contentType; + if (retval == CONTENT_UNDECLARED) + { + retval = defaultType; + } + return retval; + } - /** - * Get the declared attributes for an element type. - * @param elname The name of the element type. - * @return An Enumeration of all the attributes declared for - * a specific element type. The results will be valid only - * after the DTD (if any) has been parsed. - * @see #getAttributeType - * @see #getAttributeEnumeration - * @see #getAttributeDefaultValueType - * @see #getAttributeDefaultValue - * @see #getAttributeExpandedValue - */ - public Enumeration declaredAttributes (String elname) - { - return declaredAttributes ((Object []) elementInfo.get (elname)); - } + /** + * Look up the content type of an element. + * @param name The element type name. + * @return An integer constant representing the content type. + * @see #CONTENT_UNDECLARED + * @see #CONTENT_ANY + * @see #CONTENT_EMPTY + * @see #CONTENT_MIXED + * @see #CONTENT_ELEMENTS + */ + public int getElementContentType(String name) + { + ElementDecl element = (ElementDecl) elementInfo.get(name); + return getContentType(element, CONTENT_UNDECLARED); + } + + /** + * Register an element. + * Array format: + * [0] element type name + * [1] content model (mixed, elements only) + * [2] attribute hash table + */ + private void setElement(String name, int contentType, + String contentModel, HashMap attributes) + throws SAXException + { + if (skippedPE) + { + return; + } + ElementDecl element = (ElementDecl) elementInfo.get(name); + + // first <!ELEMENT ...> or <!ATTLIST ...> for this type? + if (element == null) + { + element = new ElementDecl(); + element.contentType = contentType; + element.contentModel = contentModel; + element.attributes = attributes; + elementInfo.put(name, element); + return; + } + + // <!ELEMENT ...> declaration? + if (contentType != CONTENT_UNDECLARED) + { + // ... following an associated <!ATTLIST ...> + if (element.contentType == CONTENT_UNDECLARED) + { + element.contentType = contentType; + element.contentModel = contentModel; + } + else + { + // VC: Unique Element Type Declaration + handler.verror("multiple declarations for element type: " + + name); + } + } + + // first <!ATTLIST ...>, before <!ELEMENT ...> ? + else if (attributes != null) + { + element.attributes = attributes; + } + } + + /** + * Look up the attribute hash table for an element. + * The hash table is the second item in the element array. + */ + private HashMap getElementAttributes(String name) + { + ElementDecl element = (ElementDecl) elementInfo.get(name); + return (element == null) ? null : element.attributes; + } - /** - * Retrieve the declared type of an attribute. - * @param name The name of the associated element. - * @param aname The name of the attribute. - * @return An interend string denoting the type, or null - * indicating an undeclared attribute. - */ - public String getAttributeType (String name, String aname) - { - Object attribute[] = getAttribute (name, aname); - if (attribute == null) { - return null; - } else { - return (String) attribute [0]; - } - } + // + // Attributes + // + + /** + * Get the declared attributes for an element type. + * @param elname The name of the element type. + * @return An iterator over all the attributes declared for + * a specific element type. The results will be valid only + * after the DTD (if any) has been parsed. + * @see #getAttributeType + * @see #getAttributeEnumeration + * @see #getAttributeDefaultValueType + * @see #getAttributeDefaultValue + * @see #getAttributeExpandedValue + */ + private Iterator declaredAttributes(ElementDecl element) + { + HashMap attlist; + + if (element == null) + { + return null; + } + if ((attlist = element.attributes) == null) + { + return null; + } + return attlist.keySet().iterator(); + } + /** + * Get the declared attributes for an element type. + * @param elname The name of the element type. + * @return An iterator over all the attributes declared for + * a specific element type. The results will be valid only + * after the DTD (if any) has been parsed. + * @see #getAttributeType + * @see #getAttributeEnumeration + * @see #getAttributeDefaultValueType + * @see #getAttributeDefaultValue + * @see #getAttributeExpandedValue + */ + public Iterator declaredAttributes(String elname) + { + return declaredAttributes((ElementDecl) elementInfo.get(elname)); + } - /** - * Retrieve the allowed values for an enumerated attribute type. - * @param name The name of the associated element. - * @param aname The name of the attribute. - * @return A string containing the token list. - */ - public String getAttributeEnumeration (String name, String aname) - { - Object attribute[] = getAttribute (name, aname); - if (attribute == null) { - return null; - } else { - // assert: attribute [0] is "ENUMERATION" or "NOTATION" - return (String) attribute [3]; - } - } + /** + * Retrieve the declared type of an attribute. + * @param name The name of the associated element. + * @param aname The name of the attribute. + * @return An interend string denoting the type, or null + * indicating an undeclared attribute. + */ + public String getAttributeType(String name, String aname) + { + AttributeDecl attribute = getAttribute(name, aname); + return (attribute == null) ? null : attribute.type; + } + /** + * Retrieve the allowed values for an enumerated attribute type. + * @param name The name of the associated element. + * @param aname The name of the attribute. + * @return A string containing the token list. + */ + public String getAttributeEnumeration(String name, String aname) + { + AttributeDecl attribute = getAttribute(name, aname); + // assert: attribute.enumeration is "ENUMERATION" or "NOTATION" + return (attribute == null) ? null : attribute.enumeration; + } - /** - * Retrieve the default value of a declared attribute. - * @param name The name of the associated element. - * @param aname The name of the attribute. - * @return The default value, or null if the attribute was - * #IMPLIED or simply undeclared and unspecified. - * @see #getAttributeExpandedValue - */ - public String getAttributeDefaultValue (String name, String aname) - { - Object attribute[] = getAttribute (name, aname); - if (attribute == null) { - return null; - } else { - return (String) attribute [1]; - } - } + /** + * Retrieve the default value of a declared attribute. + * @param name The name of the associated element. + * @param aname The name of the attribute. + * @return The default value, or null if the attribute was + * #IMPLIED or simply undeclared and unspecified. + * @see #getAttributeExpandedValue + */ + public String getAttributeDefaultValue(String name, String aname) + { + AttributeDecl attribute = getAttribute(name, aname); + return (attribute == null) ? null : attribute.value; + } /* @@ -3325,1789 +3940,1896 @@ loop: * @param name The name of the associated element. * @param aname The name of the attribute. * @return The expanded default value, or null if the attribute was - * #IMPLIED or simply undeclared + * #IMPLIED or simply undeclared * @see #getAttributeDefaultValue public String getAttributeExpandedValue (String name, String aname) throws Exception { - Object attribute[] = getAttribute (name, aname); - - if (attribute == null) { - return null; - } else if (attribute [4] == null && attribute [1] != null) { - // we MUST use the same buf for both quotes else the literal - // can't be properly terminated - char buf [] = new char [1]; - int flags = LIT_ENTITY_REF | LIT_ATTRIBUTE; - String type = getAttributeType (name, aname); - - if (type != "CDATA" && type != null) - flags |= LIT_NORMALIZE; - buf [0] = '"'; - pushCharArray (null, buf, 0, 1); - pushString (null, (String) attribute [1]); - pushCharArray (null, buf, 0, 1); - attribute [4] = readLiteral (flags); - } - return (String) attribute [4]; + AttributeDecl attribute = getAttribute (name, aname); + + if (attribute == null) { + return null; + } else if (attribute.defaultValue == null && attribute.value != null) { + // we MUST use the same buf for both quotes else the literal + // can't be properly terminated + char buf [] = new char [1]; + int flags = LIT_ENTITY_REF | LIT_ATTRIBUTE; + String type = getAttributeType (name, aname); + + if (type != "CDATA" && type != null) + flags |= LIT_NORMALIZE; + buf [0] = '"'; + pushCharArray (null, buf, 0, 1); + pushString (null, attribute.value); + pushCharArray (null, buf, 0, 1); + attribute.defaultValue = readLiteral (flags); + } + return attribute.defaultValue; } */ - /** - * Retrieve the default value mode of a declared attribute. - * @see #ATTRIBUTE_DEFAULT_SPECIFIED - * @see #ATTRIBUTE_DEFAULT_IMPLIED - * @see #ATTRIBUTE_DEFAULT_REQUIRED - * @see #ATTRIBUTE_DEFAULT_FIXED - */ - public int getAttributeDefaultValueType (String name, String aname) - { - Object attribute[] = getAttribute (name, aname); - if (attribute == null) { - return ATTRIBUTE_DEFAULT_UNDECLARED; - } else { - return ((Integer) attribute [2]).intValue (); - } - } - - - /** - * Register an attribute declaration for later retrieval. - * Format: - * - String type - * - String default value - * - int value type - * - enumeration - * - processed default value - */ - private void setAttribute (String elName, String name, String type, - String enumeration, - String value, int valueType) + /** + * Retrieve the default value mode of a declared attribute. + * @see #ATTRIBUTE_DEFAULT_SPECIFIED + * @see #ATTRIBUTE_DEFAULT_IMPLIED + * @see #ATTRIBUTE_DEFAULT_REQUIRED + * @see #ATTRIBUTE_DEFAULT_FIXED + */ + public int getAttributeDefaultValueType(String name, String aname) + { + AttributeDecl attribute = getAttribute(name, aname); + return (attribute == null) ? ATTRIBUTE_DEFAULT_UNDECLARED : + attribute.valueType; + } + + /** + * Register an attribute declaration for later retrieval. + * Format: + * - String type + * - String default value + * - int value type + * - enumeration + * - processed default value + */ + private void setAttribute(String elName, String name, String type, + String enumeration, String value, int valueType) throws Exception - { - Hashtable attlist; - - if (skippedPE) - return; - - // Create a new hashtable if necessary. - attlist = getElementAttributes (elName); - if (attlist == null) - attlist = new Hashtable (); - - // ignore multiple attribute declarations! - if (attlist.get (name) != null) { - // warn ... - return; - } else { - Object attribute [] = new Object [5]; - attribute [0] = type; - attribute [1] = value; - attribute [2] = new Integer (valueType); - attribute [3] = enumeration; - attribute [4] = null; - attlist.put (name, attribute); - - // save; but don't overwrite any existing <!ELEMENT ...> - setElement (elName, CONTENT_UNDECLARED, null, attlist); - } - } - - - /** - * Retrieve the array representing an attribute declaration. - */ - private Object[] getAttribute (String elName, String name) - { - Hashtable attlist; - - attlist = getElementAttributes (elName); - if (attlist == null) - return null; - return (Object[]) attlist.get (name); - } - + { + HashMap attlist; + + if (skippedPE) + { + return; + } + + // Create a new hashtable if necessary. + attlist = getElementAttributes(elName); + if (attlist == null) + { + attlist = new HashMap(); + } + + // ignore multiple attribute declarations! + if (attlist.get(name) != null) + { + // warn ... + return; + } + else + { + AttributeDecl attribute = new AttributeDecl(); + attribute.type = type; + attribute.value = value; + attribute.valueType = valueType; + attribute.enumeration = enumeration; + attlist.put(name, attribute); + + // save; but don't overwrite any existing <!ELEMENT ...> + setElement(elName, CONTENT_UNDECLARED, null, attlist); + } + } - // - // Entities - // + /** + * Retrieve the attribute declaration for the given element name and name. + */ + private AttributeDecl getAttribute(String elName, String name) + { + HashMap attlist = getElementAttributes(elName); + return (attlist == null) ? null : (AttributeDecl) attlist.get(name); + } - /** - * Find the type of an entity. - * @returns An integer constant representing the entity type. - * @see #ENTITY_UNDECLARED - * @see #ENTITY_INTERNAL - * @see #ENTITY_NDATA - * @see #ENTITY_TEXT - */ - public int getEntityType (String ename) - { - Object entity[] = (Object[]) entityInfo.get (ename); - if (entity == null) { - return ENTITY_UNDECLARED; - } else { - return ((Integer) entity [0]).intValue (); - } - } + // + // Entities + // + + /** + * Find the type of an entity. + * @returns An integer constant representing the entity type. + * @see #ENTITY_UNDECLARED + * @see #ENTITY_INTERNAL + * @see #ENTITY_NDATA + * @see #ENTITY_TEXT + */ + public int getEntityType(String ename) + { + EntityInfo entity = (EntityInfo) entityInfo.get(ename); + return (entity == null) ? ENTITY_UNDECLARED : entity.type; + } + /** + * Return an external entity's identifiers. + * @param ename The name of the external entity. + * @return The entity's public identifier, system identifier, and base URI. + * Null if the entity was not declared as an external entity. + * @see #getEntityType + */ + public ExternalIdentifiers getEntityIds(String ename) + { + EntityInfo entity = (EntityInfo) entityInfo.get(ename); + return (entity == null) ? null : entity.ids; + } - /** - * Return an external entity's identifier array. - * @param ename The name of the external entity. - * @return Three element array containing (in order) the entity's - * public identifier, system identifier, and base URI. Null if - * the entity was not declared as an external entity. - * @see #getEntityType - */ - public String [] getEntityIds (String ename) - { - Object entity[] = (Object[]) entityInfo.get (ename); - if (entity == null) { - return null; - } else { - return (String []) entity [1]; - } - } + /** + * Return an internal entity's replacement text. + * @param ename The name of the internal entity. + * @return The entity's replacement text, or null if + * the entity was not declared as an internal entity. + * @see #getEntityType + */ + public String getEntityValue(String ename) + { + EntityInfo entity = (EntityInfo) entityInfo.get(ename); + return (entity == null) ? null : entity.value; + } + /** + * Register an entity declaration for later retrieval. + */ + private void setInternalEntity(String eName, String value) + throws SAXException + { + if (skippedPE) + { + return; + } - /** - * Return an internal entity's replacement text. - * @param ename The name of the internal entity. - * @return The entity's replacement text, or null if - * the entity was not declared as an internal entity. - * @see #getEntityType - */ - public String getEntityValue (String ename) - { - Object entity[] = (Object[]) entityInfo.get (ename); - if (entity == null) { - return null; - } else { - return (String) entity [3]; - } - } + if (entityInfo.get(eName) == null) + { + EntityInfo entity = new EntityInfo(); + entity.type = ENTITY_INTERNAL; + entity.value = value; + entityInfo.put(eName, entity); + } + if (handler.stringInterning) + { + if ("lt" == eName || "gt" == eName || "quot" == eName + || "apos" == eName || "amp" == eName) + { + return; + } + } + else + { + if ("lt".equals(eName) || "gt".equals(eName) || "quot".equals(eName) + || "apos".equals(eName) || "amp".equals(eName)) + { + return; + } + } + handler.getDeclHandler().internalEntityDecl(eName, value); + } + /** + * Register an external entity declaration for later retrieval. + */ + private void setExternalEntity(String eName, int eClass, + ExternalIdentifiers ids, String nName) + { + if (entityInfo.get(eName) == null) + { + EntityInfo entity = new EntityInfo(); + entity.type = eClass; + entity.ids = ids; + entity.notationName = nName; + entityInfo.put(eName, entity); + } + } - /** - * Register an entity declaration for later retrieval. - */ - private void setInternalEntity (String eName, String value) + // + // Notations. + // + + /** + * Report a notation declaration, checking for duplicates. + */ + private void setNotation(String nname, ExternalIdentifiers ids) throws SAXException - { - if (skippedPE) - return; - - if (entityInfo.get (eName) == null) { - Object entity[] = new Object [5]; - entity [0] = new Integer (ENTITY_INTERNAL); -// FIXME: shrink!! [2] useless - entity [3] = value; - entityInfo.put (eName, entity); - } - if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { - if ("lt" == eName || "gt" == eName || "quot" == eName - || "apos" == eName || "amp" == eName) - return; - } else { - if ("lt".equals(eName) || "gt".equals(eName) || "quot".equals(eName) - || "apos".equals(eName) || "amp".equals(eName)) - return; + { + if (skippedPE) + { + return; + } + + handler.notationDecl(nname, ids.publicId, ids.systemId, ids.baseUri); + if (notationInfo.get(nname) == null) + { + notationInfo.put(nname, nname); + } + else + { + // VC: Unique Notation Name + handler.verror("Duplicate notation name decl: " + nname); + } + } + + // + // Location. + // + + /** + * Return the current line number. + */ + public int getLineNumber() + { + return line; } - handler.getDeclHandler () - .internalEntityDecl (eName, value); - } + /** + * Return the current column number. + */ + public int getColumnNumber() + { + return column; + } - /** - * Register an external entity declaration for later retrieval. - */ - private void setExternalEntity (String eName, int eClass, - String ids [], String nName) - { - if (entityInfo.get (eName) == null) { - Object entity[] = new Object [5]; - entity [0] = new Integer (eClass); - entity [1] = ids; -// FIXME: shrink!! [2] no longer used, [4] irrelevant given [0] - entity [4] = nName; - entityInfo.put (eName, entity); - } - } + ////////////////////////////////////////////////////////////////////// + // High-level I/O. + ////////////////////////////////////////////////////////////////////// + + /** + * Read a single character from the readBuffer. + * <p>The readDataChunk () method maintains the buffer. + * <p>If we hit the end of an entity, try to pop the stack and + * keep going. + * <p> (This approach doesn't really enforce XML's rules about + * entity boundaries, but this is not currently a validating + * parser). + * <p>This routine also attempts to keep track of the current + * position in external entities, but it's not entirely accurate. + * @return The next available input character. + * @see #unread (char) + * @see #readDataChunk + * @see #readBuffer + * @see #line + * @return The next character from the current input source. + */ + private char readCh() + throws SAXException, IOException + { + // As long as there's nothing in the + // read buffer, try reading more data + // (for an external entity) or popping + // the entity stack (for either). + while (readBufferPos >= readBufferLength) + { + switch (sourceType) + { + case INPUT_READER: + case INPUT_STREAM: + readDataChunk(); + while (readBufferLength < 1) + { + popInput(); + if (readBufferLength < 1) + { + readDataChunk(); + } + } + break; + + default: + + popInput(); + break; + } + } + + char c = readBuffer[readBufferPos++]; + + if (c == '\n') + { + line++; + column = 0; + } + else + { + if (c == '<') + { + /* the most common return to parseContent () ... NOP */ + } + else if (((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD) + || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085) + && xmlVersion == XML_11)) + { + error("illegal XML character U+" + Integer.toHexString(c)); + } + // If we're in the DTD and in a context where PEs get expanded, + // do so ... 1/14/2000 errata identify those contexts. There + // are also spots in the internal subset where PE refs are fatal + // errors, hence yet another flag. + else if (c == '%' && expandPE) + { + if (peIsError) + { + error("PE reference within decl in internal subset."); + } + parsePEReference(); + return readCh(); + } + column++; + } - // - // Notations. - // + return c; + } - /** - * Report a notation declaration, checking for duplicates. - */ - private void setNotation (String nname, String ids []) + /** + * Push a single character back onto the current input stream. + * <p>This method usually pushes the character back onto + * the readBuffer. + * <p>I don't think that this would ever be called with + * readBufferPos = 0, because the methods always reads a character + * before unreading it, but just in case, I've added a boundary + * condition. + * @param c The character to push back. + * @see #readCh + * @see #unread (char[]) + * @see #readBuffer + */ + private void unread(char c) throws SAXException - { - if (skippedPE) - return; - - handler.notationDecl (nname, ids); - if (notationInfo.get (nname) == null) - notationInfo.put (nname, nname); - else - // VC: Unique Notation Name - handler.verror ("Duplicate notation name decl: " + nname); - } - - - // - // Location. - // - - - /** - * Return the current line number. - */ - public int getLineNumber () - { - return line; - } - - - /** - * Return the current column number. - */ - public int getColumnNumber () - { - return column; - } + { + // Normal condition. + if (c == '\n') + { + line--; + column = -1; + } + if (readBufferPos > 0) + { + readBuffer[--readBufferPos] = c; + } + else + { + pushString(null, new Character(c).toString()); + } + } + /** + * Push a char array back onto the current input stream. + * <p>NOTE: you must <em>never</em> push back characters that you + * haven't actually read: use pushString () instead. + * @see #readCh + * @see #unread (char) + * @see #readBuffer + * @see #pushString + */ + private void unread(char[] ch, int length) + throws SAXException + { + for (int i = 0; i < length; i++) + { + if (ch[i] == '\n') + { + line--; + column = -1; + } + } + if (length < readBufferPos) + { + readBufferPos -= length; + } + else + { + pushCharArray(null, ch, 0, length); + } + } - ////////////////////////////////////////////////////////////////////// - // High-level I/O. - ////////////////////////////////////////////////////////////////////// - - - /** - * Read a single character from the readBuffer. - * <p>The readDataChunk () method maintains the buffer. - * <p>If we hit the end of an entity, try to pop the stack and - * keep going. - * <p> (This approach doesn't really enforce XML's rules about - * entity boundaries, but this is not currently a validating - * parser). - * <p>This routine also attempts to keep track of the current - * position in external entities, but it's not entirely accurate. - * @return The next available input character. - * @see #unread (char) - * @see #readDataChunk - * @see #readBuffer - * @see #line - * @return The next character from the current input source. - */ - private char readCh () + /** + * Push, or skip, a new external input source. + * The source will be some kind of parsed entity, such as a PE + * (including the external DTD subset) or content for the body. + * + * @param url The java.net.URL object for the entity. + * @see SAXDriver#resolveEntity + * @see #pushString + * @see #sourceType + * @see #pushInput + * @see #detectEncoding + * @see #sourceType + * @see #readBuffer + */ + private void pushURL(boolean isPE, + String ename, + ExternalIdentifiers ids, + Reader reader, + InputStream stream, + String encoding, + boolean doResolve) throws SAXException, IOException - { - // As long as there's nothing in the - // read buffer, try reading more data - // (for an external entity) or popping - // the entity stack (for either). - while (readBufferPos >= readBufferLength) { - switch (sourceType) { - case INPUT_READER: - case INPUT_STREAM: - readDataChunk (); - while (readBufferLength < 1) { - popInput (); - if (readBufferLength < 1) { - readDataChunk (); - } - } - break; - - default: - - popInput (); - break; - } - } - - char c = readBuffer [readBufferPos++]; - - if (c == '\n') { - line++; - column = 0; - } else { - if (c == '<') { - /* the most common return to parseContent () ... NOP */ - } else if (((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD) - || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085) - && xmlVersion == XML_11)) - error ("illegal XML character U+" - + Integer.toHexString (c)); - - // If we're in the DTD and in a context where PEs get expanded, - // do so ... 1/14/2000 errata identify those contexts. There - // are also spots in the internal subset where PE refs are fatal - // errors, hence yet another flag. - else if (c == '%' && expandPE) { - if (peIsError) - error ("PE reference within decl in internal subset."); - parsePEReference (); - return readCh (); - } - column++; - } - - return c; - } + { + boolean ignoreEncoding; + String systemId; + InputSource source; + if (!isPE) + { + dataBufferFlush(); + } - /** - * Push a single character back onto the current input stream. - * <p>This method usually pushes the character back onto - * the readBuffer. - * <p>I don't think that this would ever be called with - * readBufferPos = 0, because the methods always reads a character - * before unreading it, but just in case, I've added a boundary - * condition. - * @param c The character to push back. - * @see #readCh - * @see #unread (char[]) - * @see #readBuffer - */ - private void unread (char c) - throws SAXException - { - // Normal condition. - if (c == '\n') { - line--; - column = -1; - } - if (readBufferPos > 0) { - readBuffer [--readBufferPos] = c; - } else { - pushString (null, new Character (c).toString ()); - } - } + scratch.setPublicId(ids.publicId); + scratch.setSystemId(ids.systemId); + + // See if we should skip or substitute the entity. + // If we're not skipping, resolving reports startEntity() + // and updates the (handler's) stack of URIs. + if (doResolve) + { + // assert (stream == null && reader == null && encoding == null) + source = handler.resolveEntity(isPE, ename, scratch, ids.baseUri); + if (source == null) + { + handler.warn("skipping entity: " + ename); + handler.skippedEntity(ename); + if (isPE) + { + skippedPE = true; + } + return; + } + // we might be using alternate IDs/encoding + systemId = source.getSystemId(); + // The following warning and setting systemId was deleted bcause + // the application has the option of not setting systemId + // provided that it has set the characte/byte stream. + /* + if (systemId == null) { + handler.warn ("missing system ID, using " + ids.systemId); + systemId = ids.systemId; + } + */ + } + else + { + // "[document]", or "[dtd]" via getExternalSubset() + scratch.setCharacterStream(reader); + scratch.setByteStream(stream); + scratch.setEncoding(encoding); + source = scratch; + systemId = ids.systemId; + if (handler.stringInterning) + { + handler.startExternalEntity(ename, systemId, + "[document]" == ename); + } + else + { + handler.startExternalEntity(ename, systemId, + "[document]".equals(ename)); + } + } - /** - * Push a char array back onto the current input stream. - * <p>NOTE: you must <em>never</em> push back characters that you - * haven't actually read: use pushString () instead. - * @see #readCh - * @see #unread (char) - * @see #readBuffer - * @see #pushString - */ - private void unread (char ch[], int length) - throws SAXException - { - for (int i = 0; i < length; i++) { - if (ch [i] == '\n') { - line--; - column = -1; - } - } - if (length < readBufferPos) { - readBufferPos -= length; - } else { - pushCharArray (null, ch, 0, length); - } - } + // we may have been given I/O streams directly + if (source.getCharacterStream() != null) + { + if (source.getByteStream() != null) + error("InputSource has two streams!"); + reader = source.getCharacterStream(); + } + else if (source.getByteStream() != null) + { + encoding = source.getEncoding(); + if (encoding == null) + { + stream = source.getByteStream(); + } + else + { + try + { + reader = new InputStreamReader(source.getByteStream(), + encoding); + } + catch (IOException e) + { + stream = source.getByteStream(); + } + } + } + else if (systemId == null) + { + error("InputSource has no URI!"); + } + scratch.setCharacterStream(null); + scratch.setByteStream(null); + scratch.setEncoding(null); + + // Push the existing status. + pushInput(ename); + + // Create a new read buffer. + // (Note the four-character margin) + readBuffer = new char[READ_BUFFER_MAX + 4]; + readBufferPos = 0; + readBufferLength = 0; + readBufferOverflow = -1; + is = null; + line = 1; + column = 0; + currentByteCount = 0; + + // If there's an explicit character stream, just + // ignore encoding declarations. + if (reader != null) + { + sourceType = INPUT_READER; + this.reader = reader; + tryEncodingDecl(true); + return; + } + + // Else we handle the conversion, and need to ensure + // it's done right. + sourceType = INPUT_STREAM; + if (stream != null) + { + is = stream; + } + else + { + // We have to open our own stream to the URL. + URL url = new URL(systemId); + + externalEntity = url.openConnection(); + externalEntity.connect(); + is = externalEntity.getInputStream(); + } + + // If we get to here, there must be + // an InputStream available. + if (!is.markSupported()) + { + is = new BufferedInputStream(is); + } + // Get any external encoding label. + if (encoding == null && externalEntity != null) + { + // External labels can be untrustworthy; filesystems in + // particular often have the wrong default for content + // that wasn't locally originated. Those we autodetect. + if (!"file".equals(externalEntity.getURL().getProtocol())) + { + int temp; + + // application/xml;charset=something;otherAttr=... + // ... with many variants on 'something' + encoding = externalEntity.getContentType(); + + // MHK code (fix for Saxon 5.5.1/007): + // protect against encoding==null + if (encoding == null) + { + temp = -1; + } + else + { + temp = encoding.indexOf("charset"); + } + + // RFC 2376 sez MIME text defaults to ASCII, but since the + // JDK will create a MIME type out of thin air, we always + // autodetect when there's no explicit charset attribute. + if (temp < 0) + { + encoding = null; // autodetect + } + else + { + // only this one attribute + if ((temp = encoding.indexOf(';')) > 0) + { + encoding = encoding.substring(0, temp); + } + + if ((temp = encoding.indexOf('=', temp + 7)) > 0) + { + encoding = encoding.substring(temp + 1); + + // attributes can have comment fields (RFC 822) + if ((temp = encoding.indexOf('(')) > 0) + { + encoding = encoding.substring(0, temp); + } + // ... and values may be quoted + if ((temp = encoding.indexOf('"')) > 0) + { + encoding = + encoding.substring(temp + 1, + encoding.indexOf('"', temp + 2)); + } + encoding.trim(); + } + else + { + handler.warn("ignoring illegal MIME attribute: " + + encoding); + encoding = null; + } + } + } + } + + // if we got an external encoding label, use it ... + if (encoding != null) + { + this.encoding = ENCODING_EXTERNAL; + setupDecoding(encoding); + ignoreEncoding = true; + + // ... else autodetect from first bytes. + } + else + { + detectEncoding(); + ignoreEncoding = false; + } - /** - * Push, or skip, a new external input source. - * The source will be some kind of parsed entity, such as a PE - * (including the external DTD subset) or content for the body. - * - * @param url The java.net.URL object for the entity. - * @see SAXDriver#resolveEntity - * @see #pushString - * @see #sourceType - * @see #pushInput - * @see #detectEncoding - * @see #sourceType - * @see #readBuffer - */ - private void pushURL ( - boolean isPE, - String ename, - String ids [], // public, system, baseURI - Reader reader, - InputStream stream, - String encoding, - boolean doResolve - ) throws SAXException, IOException - { - boolean ignoreEncoding; - String systemId; - InputSource source; - - if (!isPE) - dataBufferFlush (); - - scratch.setPublicId (ids [0]); - scratch.setSystemId (ids [1]); - - // See if we should skip or substitute the entity. - // If we're not skipping, resolving reports startEntity() - // and updates the (handler's) stack of URIs. - if (doResolve) { - // assert (stream == null && reader == null && encoding == null) - source = handler.resolveEntity (isPE, ename, scratch, ids [2]); - if (source == null) { - handler.warn ("skipping entity: " + ename); - handler.skippedEntity (ename); - if (isPE) - skippedPE = true; - return; - } - - // we might be using alternate IDs/encoding - systemId = source.getSystemId (); - // The following warning and setting systemId was deleted bcause - // the application has the option of not setting systemId - // provided that it has set the characte/byte stream. - /* - if (systemId == null) { - handler.warn ("missing system ID, using " + ids [1]); - systemId = ids [1]; - } - */ - } else { - // "[document]", or "[dtd]" via getExternalSubset() - scratch.setCharacterStream (reader); - scratch.setByteStream (stream); - scratch.setEncoding (encoding); - source = scratch; - systemId = ids [1]; - if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { - handler.startExternalEntity (ename, systemId, - "[document]" == ename); - } else { - handler.startExternalEntity (ename, systemId, - "[document]".equals(ename)); - } - } - - // we may have been given I/O streams directly - if (source.getCharacterStream () != null) { - if (source.getByteStream () != null) - error ("InputSource has two streams!"); - reader = source.getCharacterStream (); - } else if (source.getByteStream () != null) { - encoding = source.getEncoding (); - if (encoding == null) - stream = source.getByteStream (); - else try { - reader = new InputStreamReader ( - source.getByteStream (), - encoding); - } catch (IOException e) { - stream = source.getByteStream (); - } - } else if (systemId == null) - error ("InputSource has no URI!"); - scratch.setCharacterStream (null); - scratch.setByteStream (null); - scratch.setEncoding (null); - - // Push the existing status. - pushInput (ename); - - // Create a new read buffer. - // (Note the four-character margin) - readBuffer = new char [READ_BUFFER_MAX + 4]; - readBufferPos = 0; - readBufferLength = 0; - readBufferOverflow = -1; - is = null; - line = 1; - column = 0; - currentByteCount = 0; - - // If there's an explicit character stream, just - // ignore encoding declarations. - if (reader != null) { - sourceType = INPUT_READER; - this.reader = reader; - tryEncodingDecl (true); - return; - } - - // Else we handle the conversion, and need to ensure - // it's done right. - sourceType = INPUT_STREAM; - if (stream != null) { - is = stream; - } else { - // We have to open our own stream to the URL. - URL url = new URL (systemId); - - externalEntity = url.openConnection (); - externalEntity.connect (); - is = externalEntity.getInputStream (); - } - - // If we get to here, there must be - // an InputStream available. - if (!is.markSupported ()) { - is = new BufferedInputStream (is); - } - - // Get any external encoding label. - if (encoding == null && externalEntity != null) { - // External labels can be untrustworthy; filesystems in - // particular often have the wrong default for content - // that wasn't locally originated. Those we autodetect. - if (!"file".equals (externalEntity.getURL ().getProtocol ())) { - int temp; - - // application/xml;charset=something;otherAttr=... - // ... with many variants on 'something' - encoding = externalEntity.getContentType (); - - // MHK code (fix for Saxon 5.5.1/007): - // protect against encoding==null - if (encoding==null) { - temp = -1; - } else { - temp = encoding.indexOf ("charset"); - } - - // RFC 2376 sez MIME text defaults to ASCII, but since the - // JDK will create a MIME type out of thin air, we always - // autodetect when there's no explicit charset attribute. - if (temp < 0) - encoding = null; // autodetect - else { - // only this one attribute - if ((temp = encoding.indexOf (';')) > 0) - encoding = encoding.substring (0, temp); - - if ((temp = encoding.indexOf ('=', temp + 7)) > 0) { - encoding = encoding.substring (temp + 1); - - // attributes can have comment fields (RFC 822) - if ((temp = encoding.indexOf ('(')) > 0) - encoding = encoding.substring (0, temp); - // ... and values may be quoted - if ((temp = encoding.indexOf ('"')) > 0) - encoding = encoding.substring (temp + 1, - encoding.indexOf ('"', temp + 2)); - encoding.trim (); - } else { - handler.warn ("ignoring illegal MIME attribute: " - + encoding); - encoding = null; - } - } - } - } - - // if we got an external encoding label, use it ... - if (encoding != null) { - this.encoding = ENCODING_EXTERNAL; - setupDecoding (encoding); - ignoreEncoding = true; - - // ... else autodetect from first bytes. - } else { - detectEncoding (); - ignoreEncoding = false; - } - - // Read any XML or text declaration. - // If we autodetected, it may tell us the "real" encoding. - try { - tryEncodingDecl (ignoreEncoding); - } catch (UnsupportedEncodingException x) { - encoding = x.getMessage (); - - // if we don't handle the declared encoding, - // try letting a JVM InputStreamReader do it - try { - if (sourceType != INPUT_STREAM) - throw x; - - is.reset (); - readBufferPos = 0; - readBufferLength = 0; - readBufferOverflow = -1; - line = 1; - currentByteCount = column = 0; - - sourceType = INPUT_READER; - this.reader = new InputStreamReader (is, encoding); - is = null; - - tryEncodingDecl (true); - - } catch (IOException e) { - error ("unsupported text encoding", - encoding, - null); - } - } - } + // Read any XML or text declaration. + // If we autodetected, it may tell us the "real" encoding. + try + { + tryEncodingDecl(ignoreEncoding); + } + catch (UnsupportedEncodingException x) + { + encoding = x.getMessage(); + // if we don't handle the declared encoding, + // try letting a JVM InputStreamReader do it + try + { + if (sourceType != INPUT_STREAM) + { + throw x; + } + + is.reset(); + readBufferPos = 0; + readBufferLength = 0; + readBufferOverflow = -1; + line = 1; + currentByteCount = column = 0; + + sourceType = INPUT_READER; + this.reader = new InputStreamReader(is, encoding); + is = null; + + tryEncodingDecl(true); + + } + catch (IOException e) + { + error("unsupported text encoding", + encoding, + null); + } + } + } - /** - * Check for an encoding declaration. This is the second part of the - * XML encoding autodetection algorithm, relying on detectEncoding to - * get to the point that this part can read any encoding declaration - * in the document (using only US-ASCII characters). - * - * <p> Because this part starts to fill parser buffers with this data, - * it's tricky to setup a reader so that Java's built-in decoders can be - * used for the character encodings that aren't built in to this parser - * (such as EUC-JP, KOI8-R, Big5, etc). - * - * @return any encoding in the declaration, uppercased; or null - * @see detectEncoding - */ - private String tryEncodingDecl (boolean ignoreEncoding) + /** + * Check for an encoding declaration. This is the second part of the + * XML encoding autodetection algorithm, relying on detectEncoding to + * get to the point that this part can read any encoding declaration + * in the document (using only US-ASCII characters). + * + * <p> Because this part starts to fill parser buffers with this data, + * it's tricky to setup a reader so that Java's built-in decoders can be + * used for the character encodings that aren't built in to this parser + * (such as EUC-JP, KOI8-R, Big5, etc). + * + * @return any encoding in the declaration, uppercased; or null + * @see detectEncoding + */ + private String tryEncodingDecl(boolean ignoreEncoding) throws SAXException, IOException - { - // Read the XML/text declaration. - if (tryRead ("<?xml")) { - if (tryWhitespace ()) { - if (inputStack.size () > 0) { - return parseTextDecl (ignoreEncoding); - } else { - return parseXMLDecl (ignoreEncoding); - } - } else { - // <?xml-stylesheet ...?> or similar - unread ('l'); - unread ('m'); - unread ('x'); - unread ('?'); - unread ('<'); - } - } - return null; - } - + { + // Read the XML/text declaration. + if (tryRead("<?xml")) + { + if (tryWhitespace()) + { + if (inputStack.size() > 0) + { + return parseTextDecl(ignoreEncoding); + } + else + { + return parseXMLDecl(ignoreEncoding); + } + } + else + { + // <?xml-stylesheet ...?> or similar + unread('l'); + unread('m'); + unread('x'); + unread('?'); + unread('<'); + } + } + return null; + } - /** - * Attempt to detect the encoding of an entity. - * <p>The trick here (as suggested in the XML standard) is that - * any entity not in UTF-8, or in UCS-2 with a byte-order mark, - * <b>must</b> begin with an XML declaration or an encoding - * declaration; we simply have to look for "<?xml" in various - * encodings. - * <p>This method has no way to distinguish among 8-bit encodings. - * Instead, it sets up for UTF-8, then (possibly) revises its assumption - * later in setupDecoding (). Any ASCII-derived 8-bit encoding - * should work, but most will be rejected later by setupDecoding (). - * @see #tryEncoding (byte[], byte, byte, byte, byte) - * @see #tryEncoding (byte[], byte, byte) - * @see #setupDecoding - */ - private void detectEncoding () + /** + * Attempt to detect the encoding of an entity. + * <p>The trick here (as suggested in the XML standard) is that + * any entity not in UTF-8, or in UCS-2 with a byte-order mark, + * <b>must</b> begin with an XML declaration or an encoding + * declaration; we simply have to look for "<?xml" in various + * encodings. + * <p>This method has no way to distinguish among 8-bit encodings. + * Instead, it sets up for UTF-8, then (possibly) revises its assumption + * later in setupDecoding (). Any ASCII-derived 8-bit encoding + * should work, but most will be rejected later by setupDecoding (). + * @see #tryEncoding (byte[], byte, byte, byte, byte) + * @see #tryEncoding (byte[], byte, byte) + * @see #setupDecoding + */ + private void detectEncoding() throws SAXException, IOException - { - byte signature[] = new byte [4]; - - // Read the first four bytes for - // autodetection. - is.mark (4); - is.read (signature); - is.reset (); - - // - // FIRST: four byte encodings (who uses these?) - // - if (tryEncoding (signature, (byte) 0x00, (byte) 0x00, - (byte) 0x00, (byte) 0x3c)) { - // UCS-4 must begin with "<?xml" - // 0x00 0x00 0x00 0x3c: UCS-4, big-endian (1234) - // "UTF-32BE" - encoding = ENCODING_UCS_4_1234; - - } else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00, - (byte) 0x00, (byte) 0x00)) { - // 0x3c 0x00 0x00 0x00: UCS-4, little-endian (4321) - // "UTF-32LE" - encoding = ENCODING_UCS_4_4321; - - } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x00, - (byte) 0x3c, (byte) 0x00)) { - // 0x00 0x00 0x3c 0x00: UCS-4, unusual (2143) - encoding = ENCODING_UCS_4_2143; - - } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c, - (byte) 0x00, (byte) 0x00)) { - // 0x00 0x3c 0x00 0x00: UCS-4, unusual (3421) - encoding = ENCODING_UCS_4_3412; - - // 00 00 fe ff UCS_4_1234 (with BOM) - // ff fe 00 00 UCS_4_4321 (with BOM) - } - - // - // SECOND: two byte encodings - // note ... with 1/14/2000 errata the XML spec identifies some - // more "broken UTF-16" autodetection cases, with no XML decl, - // which we don't handle here (that's legal too). - // - else if (tryEncoding (signature, (byte) 0xfe, (byte) 0xff)) { - // UCS-2 with a byte-order marker. (UTF-16) - // 0xfe 0xff: UCS-2, big-endian (12) - encoding = ENCODING_UCS_2_12; - is.read (); is.read (); - - } else if (tryEncoding (signature, (byte) 0xff, (byte) 0xfe)) { - // UCS-2 with a byte-order marker. (UTF-16) - // 0xff 0xfe: UCS-2, little-endian (21) - encoding = ENCODING_UCS_2_21; - is.read (); is.read (); - - } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c, - (byte) 0x00, (byte) 0x3f)) { - // UTF-16BE (otherwise, malformed UTF-16) - // 0x00 0x3c 0x00 0x3f: UCS-2, big-endian, no byte-order mark - encoding = ENCODING_UCS_2_12; - error ("no byte-order mark for UCS-2 entity"); - - } else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00, - (byte) 0x3f, (byte) 0x00)) { - // UTF-16LE (otherwise, malformed UTF-16) - // 0x3c 0x00 0x3f 0x00: UCS-2, little-endian, no byte-order mark - encoding = ENCODING_UCS_2_21; - error ("no byte-order mark for UCS-2 entity"); - } - - // - // THIRD: ASCII-derived encodings, fixed and variable lengths - // - else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x3f, - (byte) 0x78, (byte) 0x6d)) { - // ASCII derived - // 0x3c 0x3f 0x78 0x6d: UTF-8 or other 8-bit markup (read ENCODING) - encoding = ENCODING_UTF_8; - prefetchASCIIEncodingDecl (); - - } else if (signature [0] == (byte) 0xef - && signature [1] == (byte) 0xbb - && signature [2] == (byte) 0xbf) { - // 0xef 0xbb 0xbf: UTF-8 BOM (not part of document text) - // this un-needed notion slipped into XML 2nd ed through a - // "non-normative" erratum; now required by MSFT and UDDI, - // and E22 made it normative. - encoding = ENCODING_UTF_8; - is.read (); is.read (); is.read (); - - } else { - // 4c 6f a7 94 ... we don't understand EBCDIC flavors - // ... but we COULD at least kick in some fixed code page - - // (default) UTF-8 without encoding/XML declaration - encoding = ENCODING_UTF_8; - } - } - + { + byte[] signature = new byte[4]; - /** - * Check for a four-byte signature. - * <p>Utility routine for detectEncoding (). - * <p>Always looks for some part of "<?XML" in a specific encoding. - * @param sig The first four bytes read. - * @param b1 The first byte of the signature - * @param b2 The second byte of the signature - * @param b3 The third byte of the signature - * @param b4 The fourth byte of the signature - * @see #detectEncoding - */ - private static boolean tryEncoding ( - byte sig[], byte b1, byte b2, byte b3, byte b4) - { - return (sig [0] == b1 && sig [1] == b2 - && sig [2] == b3 && sig [3] == b4); - } + // Read the first four bytes for + // autodetection. + is.mark(4); + is.read(signature); + is.reset(); + // + // FIRST: four byte encodings (who uses these?) + // + if (tryEncoding(signature, (byte) 0x00, (byte) 0x00, + (byte) 0x00, (byte) 0x3c)) + { + // UCS-4 must begin with "<?xml" + // 0x00 0x00 0x00 0x3c: UCS-4, big-endian (1234) + // "UTF-32BE" + encoding = ENCODING_UCS_4_1234; + } + else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00, + (byte) 0x00, (byte) 0x00)) + { + // 0x3c 0x00 0x00 0x00: UCS-4, little-endian (4321) + // "UTF-32LE" + encoding = ENCODING_UCS_4_4321; + } + else if (tryEncoding(signature, (byte) 0x00, (byte) 0x00, + (byte) 0x3c, (byte) 0x00)) + { + // 0x00 0x00 0x3c 0x00: UCS-4, unusual (2143) + encoding = ENCODING_UCS_4_2143; + } + else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c, + (byte) 0x00, (byte) 0x00)) + { + // 0x00 0x3c 0x00 0x00: UCS-4, unusual (3421) + encoding = ENCODING_UCS_4_3412; + + // 00 00 fe ff UCS_4_1234 (with BOM) + // ff fe 00 00 UCS_4_4321 (with BOM) + } - /** - * Check for a two-byte signature. - * <p>Looks for a UCS-2 byte-order mark. - * <p>Utility routine for detectEncoding (). - * @param sig The first four bytes read. - * @param b1 The first byte of the signature - * @param b2 The second byte of the signature - * @see #detectEncoding - */ - private static boolean tryEncoding (byte sig[], byte b1, byte b2) - { - return ((sig [0] == b1) && (sig [1] == b2)); - } + // + // SECOND: two byte encodings + // note ... with 1/14/2000 errata the XML spec identifies some + // more "broken UTF-16" autodetection cases, with no XML decl, + // which we don't handle here (that's legal too). + // + else if (tryEncoding(signature, (byte) 0xfe, (byte) 0xff)) + { + // UCS-2 with a byte-order marker. (UTF-16) + // 0xfe 0xff: UCS-2, big-endian (12) + encoding = ENCODING_UCS_2_12; + is.read(); is.read(); + } + else if (tryEncoding(signature, (byte) 0xff, (byte) 0xfe)) + { + // UCS-2 with a byte-order marker. (UTF-16) + // 0xff 0xfe: UCS-2, little-endian (21) + encoding = ENCODING_UCS_2_21; + is.read(); is.read(); + } + else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c, + (byte) 0x00, (byte) 0x3f)) + { + // UTF-16BE (otherwise, malformed UTF-16) + // 0x00 0x3c 0x00 0x3f: UCS-2, big-endian, no byte-order mark + encoding = ENCODING_UCS_2_12; + error("no byte-order mark for UCS-2 entity"); + } + else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00, + (byte) 0x3f, (byte) 0x00)) + { + // UTF-16LE (otherwise, malformed UTF-16) + // 0x3c 0x00 0x3f 0x00: UCS-2, little-endian, no byte-order mark + encoding = ENCODING_UCS_2_21; + error("no byte-order mark for UCS-2 entity"); + } + // + // THIRD: ASCII-derived encodings, fixed and variable lengths + // + else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x3f, + (byte) 0x78, (byte) 0x6d)) + { + // ASCII derived + // 0x3c 0x3f 0x78 0x6d: UTF-8 or other 8-bit markup (read ENCODING) + encoding = ENCODING_UTF_8; + prefetchASCIIEncodingDecl(); + } + else if (signature[0] == (byte) 0xef + && signature[1] == (byte) 0xbb + && signature[2] == (byte) 0xbf) + { + // 0xef 0xbb 0xbf: UTF-8 BOM (not part of document text) + // this un-needed notion slipped into XML 2nd ed through a + // "non-normative" erratum; now required by MSFT and UDDI, + // and E22 made it normative. + encoding = ENCODING_UTF_8; + is.read(); is.read(); is.read(); + } + else + { + // 4c 6f a7 94 ... we don't understand EBCDIC flavors + // ... but we COULD at least kick in some fixed code page + + // (default) UTF-8 without encoding/XML declaration + encoding = ENCODING_UTF_8; + } + } - /** - * This method pushes a string back onto input. - * <p>It is useful either as the expansion of an internal entity, - * or for backtracking during the parse. - * <p>Call pushCharArray () to do the actual work. - * @param s The string to push back onto input. - * @see #pushCharArray - */ - private void pushString (String ename, String s) - throws SAXException - { - char ch[] = s.toCharArray (); - pushCharArray (ename, ch, 0, ch.length); - } + /** + * Check for a four-byte signature. + * <p>Utility routine for detectEncoding (). + * <p>Always looks for some part of "<?XML" in a specific encoding. + * @param sig The first four bytes read. + * @param b1 The first byte of the signature + * @param b2 The second byte of the signature + * @param b3 The third byte of the signature + * @param b4 The fourth byte of the signature + * @see #detectEncoding + */ + private static boolean tryEncoding(byte[] sig, byte b1, byte b2, + byte b3, byte b4) + { + return (sig[0] == b1 && sig[1] == b2 + && sig[2] == b3 && sig[3] == b4); + } + /** + * Check for a two-byte signature. + * <p>Looks for a UCS-2 byte-order mark. + * <p>Utility routine for detectEncoding (). + * @param sig The first four bytes read. + * @param b1 The first byte of the signature + * @param b2 The second byte of the signature + * @see #detectEncoding + */ + private static boolean tryEncoding(byte[] sig, byte b1, byte b2) + { + return ((sig[0] == b1) && (sig[1] == b2)); + } - /** - * Push a new internal input source. - * <p>This method is useful for expanding an internal entity, - * or for unreading a string of characters. It creates a new - * readBuffer containing the characters in the array, instead - * of characters converted from an input byte stream. - * @param ch The char array to push. - * @see #pushString - * @see #pushURL - * @see #readBuffer - * @see #sourceType - * @see #pushInput - */ - private void pushCharArray (String ename, char ch[], int start, int length) + /** + * This method pushes a string back onto input. + * <p>It is useful either as the expansion of an internal entity, + * or for backtracking during the parse. + * <p>Call pushCharArray () to do the actual work. + * @param s The string to push back onto input. + * @see #pushCharArray + */ + private void pushString(String ename, String s) throws SAXException - { - // Push the existing status - pushInput (ename); - if (ename != null && doReport) { - dataBufferFlush (); - handler.startInternalEntity (ename); - } - sourceType = INPUT_INTERNAL; - readBuffer = ch; - readBufferPos = start; - readBufferLength = length; - readBufferOverflow = -1; - } - + { + char[] ch = s.toCharArray(); + pushCharArray(ename, ch, 0, ch.length); + } - /** - * Save the current input source onto the stack. - * <p>This method saves all of the global variables associated with - * the current input source, so that they can be restored when a new - * input source has finished. It also tests for entity recursion. - * <p>The method saves the following global variables onto a stack - * using a fixed-length array: - * <ol> - * <li>sourceType - * <li>externalEntity - * <li>readBuffer - * <li>readBufferPos - * <li>readBufferLength - * <li>line - * <li>encoding - * </ol> - * @param ename The name of the entity (if any) causing the new input. - * @see #popInput - * @see #sourceType - * @see #externalEntity - * @see #readBuffer - * @see #readBufferPos - * @see #readBufferLength - * @see #line - * @see #encoding - */ - private void pushInput (String ename) + /** + * Push a new internal input source. + * <p>This method is useful for expanding an internal entity, + * or for unreading a string of characters. It creates a new + * readBuffer containing the characters in the array, instead + * of characters converted from an input byte stream. + * @param ch The char array to push. + * @see #pushString + * @see #pushURL + * @see #readBuffer + * @see #sourceType + * @see #pushInput + */ + private void pushCharArray(String ename, char[] ch, int start, int length) throws SAXException - { - // Check for entity recursion. - if (ename != null) { - Enumeration entities = entityStack.elements (); - while (entities.hasMoreElements ()) { - String e = (String) entities.nextElement (); - if (e != null && e == ename) { - error ("recursive reference to entity", ename, null); - } - } - } - entityStack.push (ename); - - // Don't bother if there is no current input. - if (sourceType == INPUT_NONE) { - return; - } - - // Set up a snapshot of the current - // input source. - Object input[] = new Object [12]; - - input [0] = new Integer (sourceType); - input [1] = externalEntity; - input [2] = readBuffer; - input [3] = new Integer (readBufferPos); - input [4] = new Integer (readBufferLength); - input [5] = new Integer (line); - input [6] = new Integer (encoding); - input [7] = new Integer (readBufferOverflow); - input [8] = is; - input [9] = new Integer (currentByteCount); - input [10] = new Integer (column); - input [11] = reader; - - // Push it onto the stack. - inputStack.push (input); - } + { + // Push the existing status + pushInput(ename); + if (ename != null && doReport) + { + dataBufferFlush(); + handler.startInternalEntity(ename); + } + sourceType = INPUT_INTERNAL; + readBuffer = ch; + readBufferPos = start; + readBufferLength = length; + readBufferOverflow = -1; + } + /** + * Save the current input source onto the stack. + * <p>This method saves all of the global variables associated with + * the current input source, so that they can be restored when a new + * input source has finished. It also tests for entity recursion. + * <p>The method saves the following global variables onto a stack + * using a fixed-length array: + * <ol> + * <li>sourceType + * <li>externalEntity + * <li>readBuffer + * <li>readBufferPos + * <li>readBufferLength + * <li>line + * <li>encoding + * </ol> + * @param ename The name of the entity (if any) causing the new input. + * @see #popInput + * @see #sourceType + * @see #externalEntity + * @see #readBuffer + * @see #readBufferPos + * @see #readBufferLength + * @see #line + * @see #encoding + */ + private void pushInput(String ename) + throws SAXException + { + // Check for entity recursion. + if (ename != null) + { + Iterator entities = entityStack.iterator(); + while (entities.hasNext()) + { + String e = (String) entities.next(); + if (e != null && e == ename) + { + error("recursive reference to entity", ename, null); + } + } + } + entityStack.addLast(ename); + + // Don't bother if there is no current input. + if (sourceType == INPUT_NONE) + { + return; + } + + // Set up a snapshot of the current + // input source. + Input input = new Input(); + + input.sourceType = sourceType; + input.externalEntity = externalEntity; + input.readBuffer = readBuffer; + input.readBufferPos = readBufferPos; + input.readBufferLength = readBufferLength; + input.line = line; + input.encoding = encoding; + input.readBufferOverflow = readBufferOverflow; + input.is = is; + input.currentByteCount = currentByteCount; + input.column = column; + input.reader = reader; + + // Push it onto the stack. + inputStack.addLast(input); + } - /** - * Restore a previous input source. - * <p>This method restores all of the global variables associated with - * the current input source. - * @exception java.io.EOFException - * If there are no more entries on the input stack. - * @see #pushInput - * @see #sourceType - * @see #externalEntity - * @see #readBuffer - * @see #readBufferPos - * @see #readBufferLength - * @see #line - * @see #encoding - */ - private void popInput () + /** + * Restore a previous input source. + * <p>This method restores all of the global variables associated with + * the current input source. + * @exception java.io.EOFException + * If there are no more entries on the input stack. + * @see #pushInput + * @see #sourceType + * @see #externalEntity + * @see #readBuffer + * @see #readBufferPos + * @see #readBufferLength + * @see #line + * @see #encoding + */ + private void popInput() throws SAXException, IOException - { - String ename = (String) entityStack.pop (); - - if (ename != null && doReport) - dataBufferFlush (); - switch (sourceType) { - case INPUT_STREAM: - handler.endExternalEntity (ename); - is.close (); - break; - case INPUT_READER: - handler.endExternalEntity (ename); - reader.close (); - break; - case INPUT_INTERNAL: - if (ename != null && doReport) - handler.endInternalEntity (ename); - break; - } - - // Throw an EOFException if there - // is nothing else to pop. - if (inputStack.isEmpty ()) { - throw new EOFException ("no more input"); - } - - Object input [] = (Object[]) inputStack.pop (); - - sourceType = ((Integer) input [0]).intValue (); - externalEntity = (URLConnection) input [1]; - readBuffer = (char[]) input [2]; - readBufferPos = ((Integer) input [3]).intValue (); - readBufferLength = ((Integer) input [4]).intValue (); - line = ((Integer) input [5]).intValue (); - encoding = ((Integer) input [6]).intValue (); - readBufferOverflow = ((Integer) input [7]).intValue (); - is = (InputStream) input [8]; - currentByteCount = ((Integer) input [9]).intValue (); - column = ((Integer) input [10]).intValue (); - reader = (Reader) input [11]; - } - + { + String ename = (String) entityStack.removeLast(); - /** - * Return true if we can read the expected character. - * <p>Note that the character will be removed from the input stream - * on success, but will be put back on failure. Do not attempt to - * read the character again if the method succeeds. - * @param delim The character that should appear next. For a - * insensitive match, you must supply this in upper-case. - * @return true if the character was successfully read, or false if - * it was not. - * @see #tryRead (String) - */ - private boolean tryRead (char delim) - throws SAXException, IOException - { - char c; - - // Read the character - c = readCh (); - - // Test for a match, and push the character - // back if the match fails. - if (c == delim) { - return true; - } else { - unread (c); - return false; - } - } + if (ename != null && doReport) + { + dataBufferFlush(); + } + switch (sourceType) + { + case INPUT_STREAM: + handler.endExternalEntity(ename); + is.close(); + break; + case INPUT_READER: + handler.endExternalEntity(ename); + reader.close(); + break; + case INPUT_INTERNAL: + if (ename != null && doReport) + { + handler.endInternalEntity(ename); + } + break; + } + // Throw an EOFException if there + // is nothing else to pop. + if (inputStack.isEmpty()) + { + throw new EOFException("no more input"); + } - /** - * Return true if we can read the expected string. - * <p>This is simply a convenience method. - * <p>Note that the string will be removed from the input stream - * on success, but will be put back on failure. Do not attempt to - * read the string again if the method succeeds. - * <p>This method will push back a character rather than an - * array whenever possible (probably the majority of cases). - * @param delim The string that should appear next. - * @return true if the string was successfully read, or false if - * it was not. - * @see #tryRead (char) - */ - private boolean tryRead (String delim) + Input input = (Input) inputStack.removeLast(); + + sourceType = input.sourceType; + externalEntity = input.externalEntity; + readBuffer = input.readBuffer; + readBufferPos = input.readBufferPos; + readBufferLength = input.readBufferLength; + line = input.line; + encoding = input.encoding; + readBufferOverflow = input.readBufferOverflow; + is = input.is; + currentByteCount = input.currentByteCount; + column = input.column; + reader = input.reader; + } + + /** + * Return true if we can read the expected character. + * <p>Note that the character will be removed from the input stream + * on success, but will be put back on failure. Do not attempt to + * read the character again if the method succeeds. + * @param delim The character that should appear next. For a + * insensitive match, you must supply this in upper-case. + * @return true if the character was successfully read, or false if + * it was not. + * @see #tryRead (String) + */ + private boolean tryRead(char delim) throws SAXException, IOException - { - return tryRead (delim.toCharArray ()); - } + { + char c; + + // Read the character + c = readCh(); + + // Test for a match, and push the character + // back if the match fails. + if (c == delim) + { + return true; + } + else + { + unread(c); + return false; + } + } - private boolean tryRead (char ch []) + /** + * Return true if we can read the expected string. + * <p>This is simply a convenience method. + * <p>Note that the string will be removed from the input stream + * on success, but will be put back on failure. Do not attempt to + * read the string again if the method succeeds. + * <p>This method will push back a character rather than an + * array whenever possible (probably the majority of cases). + * @param delim The string that should appear next. + * @return true if the string was successfully read, or false if + * it was not. + * @see #tryRead (char) + */ + private boolean tryRead(String delim) throws SAXException, IOException - { - char c; - - // Compare the input, character- - // by character. - - for (int i = 0; i < ch.length; i++) { - c = readCh (); - if (c != ch [i]) { - unread (c); - if (i != 0) { - unread (ch, i); - } - return false; - } - } - return true; - } - - + { + return tryRead(delim.toCharArray()); + } - /** - * Return true if we can read some whitespace. - * <p>This is simply a convenience method. - * <p>This method will push back a character rather than an - * array whenever possible (probably the majority of cases). - * @return true if whitespace was found. - */ - private boolean tryWhitespace () + private boolean tryRead(char[] ch) throws SAXException, IOException - { - char c; - c = readCh (); - if (isWhitespace (c)) { - skipWhitespace (); - return true; - } else { - unread (c); - return false; - } - } + { + char c; + // Compare the input, character- + // by character. + + for (int i = 0; i < ch.length; i++) + { + c = readCh(); + if (c != ch[i]) + { + unread(c); + if (i != 0) + { + unread(ch, i); + } + return false; + } + } + return true; + } - /** - * Read all data until we find the specified string. - * This is useful for scanning CDATA sections and PIs. - * <p>This is inefficient right now, since it calls tryRead () - * for every character. - * @param delim The string delimiter - * @see #tryRead (String, boolean) - * @see #readCh - */ - private void parseUntil (String delim) + /** + * Return true if we can read some whitespace. + * <p>This is simply a convenience method. + * <p>This method will push back a character rather than an + * array whenever possible (probably the majority of cases). + * @return true if whitespace was found. + */ + private boolean tryWhitespace() throws SAXException, IOException - { - parseUntil (delim.toCharArray ()); - } - - private void parseUntil (char delim []) + { + char c; + c = readCh(); + if (isWhitespace(c)) + { + skipWhitespace(); + return true; + } + else + { + unread(c); + return false; + } + } + + /** + * Read all data until we find the specified string. + * This is useful for scanning CDATA sections and PIs. + * <p>This is inefficient right now, since it calls tryRead () + * for every character. + * @param delim The string delimiter + * @see #tryRead (String, boolean) + * @see #readCh + */ + private void parseUntil(String delim) throws SAXException, IOException - { - char c; - int startLine = line; - - try { - while (!tryRead (delim)) { - c = readCh (); - dataBufferAppend (c); - } - } catch (EOFException e) { - error ("end of input while looking for delimiter " - + "(started on line " + startLine - + ')', null, new String (delim)); - } - } - - - ////////////////////////////////////////////////////////////////////// - // Low-level I/O. - ////////////////////////////////////////////////////////////////////// + { + parseUntil(delim.toCharArray()); + } + private void parseUntil(char[] delim) + throws SAXException, IOException + { + char c; + int startLine = line; + + try + { + while (!tryRead(delim)) + { + c = readCh(); + dataBufferAppend(c); + } + } + catch (EOFException e) + { + error("end of input while looking for delimiter " + + "(started on line " + startLine + + ')', null, new String(delim)); + } + } - /** - * Prefetch US-ASCII XML/text decl from input stream into read buffer. - * Doesn't buffer more than absolutely needed, so that when an encoding - * decl says we need to create an InputStreamReader, we can discard our - * buffer and reset(). Caller knows the first chars of the decl exist - * in the input stream. - */ - private void prefetchASCIIEncodingDecl () + ////////////////////////////////////////////////////////////////////// + // Low-level I/O. + ////////////////////////////////////////////////////////////////////// + + /** + * Prefetch US-ASCII XML/text decl from input stream into read buffer. + * Doesn't buffer more than absolutely needed, so that when an encoding + * decl says we need to create an InputStreamReader, we can discard our + * buffer and reset(). Caller knows the first chars of the decl exist + * in the input stream. + */ + private void prefetchASCIIEncodingDecl() throws SAXException, IOException - { - int ch; - readBufferPos = readBufferLength = 0; - - is.mark (readBuffer.length); - while (true) { - ch = is.read (); - readBuffer [readBufferLength++] = (char) ch; - switch (ch) { - case (int) '>': - return; - case -1: - error ("file ends before end of XML or encoding declaration.", - null, "?>"); - } - if (readBuffer.length == readBufferLength) - error ("unfinished XML or encoding declaration"); - } - } + { + int ch; + readBufferPos = readBufferLength = 0; + + is.mark(readBuffer.length); + while (true) + { + ch = is.read(); + readBuffer[readBufferLength++] = (char) ch; + switch (ch) + { + case (int) '>': + return; + case -1: + error("file ends before end of XML or encoding declaration.", + null, "?>"); + } + if (readBuffer.length == readBufferLength) + { + error("unfinished XML or encoding declaration"); + } + } + } - /** - * Read a chunk of data from an external input source. - * <p>This is simply a front-end that fills the rawReadBuffer - * with bytes, then calls the appropriate encoding handler. - * @see #encoding - * @see #rawReadBuffer - * @see #readBuffer - * @see #filterCR - * @see #copyUtf8ReadBuffer - * @see #copyIso8859_1ReadBuffer - * @see #copyUcs_2ReadBuffer - * @see #copyUcs_4ReadBuffer - */ - private void readDataChunk () + /** + * Read a chunk of data from an external input source. + * <p>This is simply a front-end that fills the rawReadBuffer + * with bytes, then calls the appropriate encoding handler. + * @see #encoding + * @see #rawReadBuffer + * @see #readBuffer + * @see #filterCR + * @see #copyUtf8ReadBuffer + * @see #copyIso8859_1ReadBuffer + * @see #copyUcs_2ReadBuffer + * @see #copyUcs_4ReadBuffer + */ + private void readDataChunk() throws SAXException, IOException - { - int count; - - // See if we have any overflow (filterCR sets for CR at end) - if (readBufferOverflow > -1) { - readBuffer [0] = (char) readBufferOverflow; - readBufferOverflow = -1; - readBufferPos = 1; - sawCR = true; - } else { - readBufferPos = 0; - sawCR = false; - } - - // input from a character stream. - if (sourceType == INPUT_READER) { - count = reader.read (readBuffer, - readBufferPos, READ_BUFFER_MAX - readBufferPos); - if (count < 0) - readBufferLength = readBufferPos; - else - readBufferLength = readBufferPos + count; - if (readBufferLength > 0) - filterCR (count >= 0); - sawCR = false; - return; - } - - // Read as many bytes as possible into the raw buffer. - count = is.read (rawReadBuffer, 0, READ_BUFFER_MAX); - - // Dispatch to an encoding-specific reader method to populate - // the readBuffer. In most parser speed profiles, these routines - // show up at the top of the CPU usage chart. - if (count > 0) { - switch (encoding) { - // one byte builtins - case ENCODING_ASCII: - copyIso8859_1ReadBuffer (count, (char) 0x0080); - break; - case ENCODING_UTF_8: - copyUtf8ReadBuffer (count); - break; - case ENCODING_ISO_8859_1: - copyIso8859_1ReadBuffer (count, (char) 0); - break; - - // two byte builtins - case ENCODING_UCS_2_12: - copyUcs2ReadBuffer (count, 8, 0); - break; - case ENCODING_UCS_2_21: - copyUcs2ReadBuffer (count, 0, 8); - break; - - // four byte builtins - case ENCODING_UCS_4_1234: - copyUcs4ReadBuffer (count, 24, 16, 8, 0); - break; - case ENCODING_UCS_4_4321: - copyUcs4ReadBuffer (count, 0, 8, 16, 24); - break; - case ENCODING_UCS_4_2143: - copyUcs4ReadBuffer (count, 16, 24, 0, 8); - break; - case ENCODING_UCS_4_3412: - copyUcs4ReadBuffer (count, 8, 0, 24, 16); - break; - } - } else - readBufferLength = readBufferPos; - - readBufferPos = 0; - - // Filter out all carriage returns if we've seen any - // (including any saved from a previous read) - if (sawCR) { - filterCR (count >= 0); - sawCR = false; - - // must actively report EOF, lest some CRs get lost. - if (readBufferLength == 0 && count >= 0) - readDataChunk (); - } - - if (count > 0) - currentByteCount += count; - } + { + int count; + + // See if we have any overflow (filterCR sets for CR at end) + if (readBufferOverflow > -1) + { + readBuffer[0] = (char) readBufferOverflow; + readBufferOverflow = -1; + readBufferPos = 1; + sawCR = true; + } + else + { + readBufferPos = 0; + sawCR = false; + } + // input from a character stream. + if (sourceType == INPUT_READER) + { + count = reader.read(readBuffer, + readBufferPos, READ_BUFFER_MAX - readBufferPos); + if (count < 0) + { + readBufferLength = readBufferPos; + } + else + { + readBufferLength = readBufferPos + count; + } + if (readBufferLength > 0) + { + filterCR(count >= 0); + } + sawCR = false; + return; + } + + // Read as many bytes as possible into the raw buffer. + count = is.read(rawReadBuffer, 0, READ_BUFFER_MAX); + + // Dispatch to an encoding-specific reader method to populate + // the readBuffer. In most parser speed profiles, these routines + // show up at the top of the CPU usage chart. + if (count > 0) + { + switch (encoding) + { + // one byte builtins + case ENCODING_ASCII: + copyIso8859_1ReadBuffer(count, (char) 0x0080); + break; + case ENCODING_UTF_8: + copyUtf8ReadBuffer(count); + break; + case ENCODING_ISO_8859_1: + copyIso8859_1ReadBuffer(count, (char) 0); + break; - /** - * Filter carriage returns in the read buffer. - * CRLF becomes LF; CR becomes LF. - * @param moreData true iff more data might come from the same source - * @see #readDataChunk - * @see #readBuffer - * @see #readBufferOverflow - */ - private void filterCR (boolean moreData) - { - int i, j; + // two byte builtins + case ENCODING_UCS_2_12: + copyUcs2ReadBuffer(count, 8, 0); + break; + case ENCODING_UCS_2_21: + copyUcs2ReadBuffer(count, 0, 8); + break; + + // four byte builtins + case ENCODING_UCS_4_1234: + copyUcs4ReadBuffer(count, 24, 16, 8, 0); + break; + case ENCODING_UCS_4_4321: + copyUcs4ReadBuffer(count, 0, 8, 16, 24); + break; + case ENCODING_UCS_4_2143: + copyUcs4ReadBuffer(count, 16, 24, 0, 8); + break; + case ENCODING_UCS_4_3412: + copyUcs4ReadBuffer(count, 8, 0, 24, 16); + break; + } + } + else + { + readBufferLength = readBufferPos; + } - readBufferOverflow = -1; + readBufferPos = 0; + + // Filter out all carriage returns if we've seen any + // (including any saved from a previous read) + if (sawCR) + { + filterCR(count >= 0); + sawCR = false; + + // must actively report EOF, lest some CRs get lost. + if (readBufferLength == 0 && count >= 0) + { + readDataChunk(); + } + } + + if (count > 0) + { + currentByteCount += count; + } + } + + /** + * Filter carriage returns in the read buffer. + * CRLF becomes LF; CR becomes LF. + * @param moreData true iff more data might come from the same source + * @see #readDataChunk + * @see #readBuffer + * @see #readBufferOverflow + */ + private void filterCR(boolean moreData) + { + int i, j; + readBufferOverflow = -1; + loop: - for (i = j = readBufferPos; j < readBufferLength; i++, j++) { - switch (readBuffer [j]) { - case '\r': - if (j == readBufferLength - 1) { - if (moreData) { - readBufferOverflow = '\r'; - readBufferLength--; - } else // CR at end of buffer - readBuffer [i++] = '\n'; - break loop; - } else if (readBuffer [j + 1] == '\n') { - j++; - } - readBuffer [i] = '\n'; - break; - - case '\n': - default: - readBuffer [i] = readBuffer [j]; - break; - } - } - readBufferLength = i; - } + for (i = j = readBufferPos; j < readBufferLength; i++, j++) + { + switch (readBuffer[j]) + { + case '\r': + if (j == readBufferLength - 1) + { + if (moreData) + { + readBufferOverflow = '\r'; + readBufferLength--; + } + else // CR at end of buffer + { + readBuffer[i++] = '\n'; + } + break loop; + } + else if (readBuffer[j + 1] == '\n') + { + j++; + } + readBuffer[i] = '\n'; + break; - /** - * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters. - * <p>When readDataChunk () calls this method, the raw bytes are in - * rawReadBuffer, and the final characters will appear in - * readBuffer. - * <p>Note that as of Unicode 3.1, good practice became a requirement, - * so that each Unicode character has exactly one UTF-8 representation. - * @param count The number of bytes to convert. - * @see #readDataChunk - * @see #rawReadBuffer - * @see #readBuffer - * @see #getNextUtf8Byte - */ - private void copyUtf8ReadBuffer (int count) + case '\n': + default: + readBuffer[i] = readBuffer[j]; + break; + } + } + readBufferLength = i; + } + + /** + * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters. + * <p>When readDataChunk () calls this method, the raw bytes are in + * rawReadBuffer, and the final characters will appear in + * readBuffer. + * <p>Note that as of Unicode 3.1, good practice became a requirement, + * so that each Unicode character has exactly one UTF-8 representation. + * @param count The number of bytes to convert. + * @see #readDataChunk + * @see #rawReadBuffer + * @see #readBuffer + * @see #getNextUtf8Byte + */ + private void copyUtf8ReadBuffer(int count) throws SAXException, IOException - { - int i = 0; - int j = readBufferPos; - int b1; - char c = 0; - - /* - // check once, so the runtime won't (if it's smart enough) - if (count < 0 || count > rawReadBuffer.length) - throw new ArrayIndexOutOfBoundsException (Integer.toString (count)); - */ - - while (i < count) { - b1 = rawReadBuffer [i++]; - - // Determine whether we are dealing - // with a one-, two-, three-, or four- - // byte sequence. - if (b1 < 0) { - if ((b1 & 0xe0) == 0xc0) { - // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx - c = (char) (((b1 & 0x1f) << 6) - | getNextUtf8Byte (i++, count)); - if (c < 0x0080) - encodingError ("Illegal two byte UTF-8 sequence", - c, 0); - //Sec 2.11 - // [1] the two-character sequence #xD #xA - // [2] the two-character sequence #xD #x85 - if ((c == 0x0085 || c == 0x000a) && sawCR) - continue; - - // Sec 2.11 - // [3] the single character #x85 - - if(c == 0x0085 && xmlVersion == XML_11) - readBuffer[j++] = '\r'; - } else if ((b1 & 0xf0) == 0xe0) { - // 3-byte sequence: - // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx - // most CJKV characters - c = (char) (((b1 & 0x0f) << 12) | - (getNextUtf8Byte (i++, count) << 6) | - getNextUtf8Byte (i++, count)); - //sec 2.11 - //[4] the single character #x2028 - if(c == 0x2028 && xmlVersion == XML_11){ - readBuffer[j++] = '\r'; - sawCR = true; - continue; - } - if (c < 0x0800 || (c >= 0xd800 && c <= 0xdfff)) - encodingError ("Illegal three byte UTF-8 sequence", - c, 0); - } else if ((b1 & 0xf8) == 0xf0) { - // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx - // = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx - // (uuuuu = wwww + 1) - // "Surrogate Pairs" ... from the "Astral Planes" - // Unicode 3.1 assigned the first characters there - int iso646 = b1 & 07; - iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); - iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); - iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); - - if (iso646 <= 0xffff) { - encodingError ("Illegal four byte UTF-8 sequence", - iso646, 0); - } else { - if (iso646 > 0x0010ffff) - encodingError ( - "UTF-8 value out of range for Unicode", - iso646, 0); - iso646 -= 0x010000; - readBuffer [j++] = (char) (0xd800 | (iso646 >> 10)); - readBuffer [j++] = (char) (0xdc00 | (iso646 & 0x03ff)); - continue; - } - } else { - // The five and six byte encodings aren't supported; - // they exceed the Unicode (and XML) range. - encodingError ( - "unsupported five or six byte UTF-8 sequence", - 0xff & b1, i); - // NOTREACHED - c = 0; - } - } else { - // 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx - // (US-ASCII character, "common" case, one branch to here) - c = (char) b1; - } - readBuffer [j++] = c; - if (c == '\r') - sawCR = true; - } - // How many characters have we read? - readBufferLength = j; - } + { + int i = 0; + int j = readBufferPos; + int b1; + char c = 0; + + /* + // check once, so the runtime won't (if it's smart enough) + if (count < 0 || count > rawReadBuffer.length) + throw new ArrayIndexOutOfBoundsException (Integer.toString (count)); + */ + while (i < count) + { + b1 = rawReadBuffer[i++]; - /** - * Return the next byte value in a UTF-8 sequence. - * If it is not possible to get a byte from the current - * entity, throw an exception. - * @param pos The current position in the rawReadBuffer. - * @param count The number of bytes in the rawReadBuffer - * @return The significant six bits of a non-initial byte in - * a UTF-8 sequence. - * @exception EOFException If the sequence is incomplete. - */ - private int getNextUtf8Byte (int pos, int count) + // Determine whether we are dealing + // with a one-, two-, three-, or four- + // byte sequence. + if (b1 < 0) + { + if ((b1 & 0xe0) == 0xc0) + { + // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx + c = (char) (((b1 & 0x1f) << 6) + | getNextUtf8Byte(i++, count)); + if (c < 0x0080) + { + encodingError("Illegal two byte UTF-8 sequence", + c, 0); + } + + //Sec 2.11 + // [1] the two-character sequence #xD #xA + // [2] the two-character sequence #xD #x85 + if ((c == 0x0085 || c == 0x000a) && sawCR) + { + continue; + } + + // Sec 2.11 + // [3] the single character #x85 + + if (c == 0x0085 && xmlVersion == XML_11) + { + readBuffer[j++] = '\r'; + } + } + else if ((b1 & 0xf0) == 0xe0) + { + // 3-byte sequence: + // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx + // most CJKV characters + c = (char) (((b1 & 0x0f) << 12) | + (getNextUtf8Byte(i++, count) << 6) | + getNextUtf8Byte(i++, count)); + //sec 2.11 + //[4] the single character #x2028 + if (c == 0x2028 && xmlVersion == XML_11) + { + readBuffer[j++] = '\r'; + sawCR = true; + continue; + } + if (c < 0x0800 || (c >= 0xd800 && c <= 0xdfff)) + { + encodingError("Illegal three byte UTF-8 sequence", + c, 0); + } + } + else if ((b1 & 0xf8) == 0xf0) + { + // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx + // = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx + // (uuuuu = wwww + 1) + // "Surrogate Pairs" ... from the "Astral Planes" + // Unicode 3.1 assigned the first characters there + int iso646 = b1 & 07; + iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count); + iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count); + iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count); + + if (iso646 <= 0xffff) + { + encodingError("Illegal four byte UTF-8 sequence", + iso646, 0); + } + else + { + if (iso646 > 0x0010ffff) + { + encodingError("UTF-8 value out of range for Unicode", + iso646, 0); + } + iso646 -= 0x010000; + readBuffer[j++] = (char) (0xd800 | (iso646 >> 10)); + readBuffer[j++] = (char) (0xdc00 | (iso646 & 0x03ff)); + continue; + } + } + else + { + // The five and six byte encodings aren't supported; + // they exceed the Unicode (and XML) range. + encodingError("unsupported five or six byte UTF-8 sequence", + 0xff & b1, i); + // NOTREACHED + c = 0; + } + } + else + { + // 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx + // (US-ASCII character, "common" case, one branch to here) + c = (char) b1; + } + readBuffer[j++] = c; + if (c == '\r') + { + sawCR = true; + } + } + // How many characters have we read? + readBufferLength = j; + } + + /** + * Return the next byte value in a UTF-8 sequence. + * If it is not possible to get a byte from the current + * entity, throw an exception. + * @param pos The current position in the rawReadBuffer. + * @param count The number of bytes in the rawReadBuffer + * @return The significant six bits of a non-initial byte in + * a UTF-8 sequence. + * @exception EOFException If the sequence is incomplete. + */ + private int getNextUtf8Byte(int pos, int count) throws SAXException, IOException - { - int val; - - // Take a character from the buffer - // or from the actual input stream. - if (pos < count) { - val = rawReadBuffer [pos]; - } else { - val = is.read (); - if (val == -1) { - encodingError ("unfinished multi-byte UTF-8 sequence at EOF", - -1, pos); - } - } - - // Check for the correct bits at the start. - if ((val & 0xc0) != 0x80) { - encodingError ("bad continuation of multi-byte UTF-8 sequence", - val, pos + 1); - } - - // Return the significant bits. - return (val & 0x3f); - } + { + int val; + + // Take a character from the buffer + // or from the actual input stream. + if (pos < count) + { + val = rawReadBuffer[pos]; + } + else + { + val = is.read(); + if (val == -1) + { + encodingError("unfinished multi-byte UTF-8 sequence at EOF", + -1, pos); + } + } + // Check for the correct bits at the start. + if ((val & 0xc0) != 0x80) + { + encodingError("bad continuation of multi-byte UTF-8 sequence", + val, pos + 1); + } - /** - * Convert a buffer of US-ASCII or ISO-8859-1-encoded bytes into - * UTF-16 characters. - * - * <p>When readDataChunk () calls this method, the raw bytes are in - * rawReadBuffer, and the final characters will appear in - * readBuffer. - * - * @param count The number of bytes to convert. - * @param mask For ASCII conversion, 0x7f; else, 0xff. - * @see #readDataChunk - * @see #rawReadBuffer - * @see #readBuffer - */ - private void copyIso8859_1ReadBuffer (int count, char mask) - throws IOException - { - int i, j; - for (i = 0, j = readBufferPos; i < count; i++, j++) { - char c = (char) (rawReadBuffer [i] & 0xff); - if ((c & mask) != 0) - throw new CharConversionException ("non-ASCII character U+" - + Integer.toHexString (c)); - if (c == 0x0085 && xmlVersion == XML_11) - c = '\r'; - readBuffer [j] = c; - if (c == '\r') { - sawCR = true; - } - } - readBufferLength = j; - } + // Return the significant bits. + return (val & 0x3f); + } + /** + * Convert a buffer of US-ASCII or ISO-8859-1-encoded bytes into + * UTF-16 characters. + * + * <p>When readDataChunk () calls this method, the raw bytes are in + * rawReadBuffer, and the final characters will appear in + * readBuffer. + * + * @param count The number of bytes to convert. + * @param mask For ASCII conversion, 0x7f; else, 0xff. + * @see #readDataChunk + * @see #rawReadBuffer + * @see #readBuffer + */ + private void copyIso8859_1ReadBuffer(int count, char mask) + throws IOException + { + int i, j; + for (i = 0, j = readBufferPos; i < count; i++, j++) + { + char c = (char) (rawReadBuffer[i] & 0xff); + if ((c & mask) != 0) + { + throw new CharConversionException("non-ASCII character U+" + + Integer.toHexString(c)); + } + if (c == 0x0085 && xmlVersion == XML_11) + { + c = '\r'; + } + readBuffer[j] = c; + if (c == '\r') + { + sawCR = true; + } + } + readBufferLength = j; + } - /** - * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters - * (as used in Java string manipulation). - * - * <p>When readDataChunk () calls this method, the raw bytes are in - * rawReadBuffer, and the final characters will appear in - * readBuffer. - * @param count The number of bytes to convert. - * @param shift1 The number of bits to shift byte 1. - * @param shift2 The number of bits to shift byte 2 - * @see #readDataChunk - * @see #rawReadBuffer - * @see #readBuffer - */ - private void copyUcs2ReadBuffer (int count, int shift1, int shift2) + /** + * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters + * (as used in Java string manipulation). + * + * <p>When readDataChunk () calls this method, the raw bytes are in + * rawReadBuffer, and the final characters will appear in + * readBuffer. + * @param count The number of bytes to convert. + * @param shift1 The number of bits to shift byte 1. + * @param shift2 The number of bits to shift byte 2 + * @see #readDataChunk + * @see #rawReadBuffer + * @see #readBuffer + */ + private void copyUcs2ReadBuffer(int count, int shift1, int shift2) throws SAXException - { - int j = readBufferPos; - - if (count > 0 && (count % 2) != 0) { - encodingError ("odd number of bytes in UCS-2 encoding", -1, count); - } - // The loops are faster with less internal brancing; hence two - if (shift1 == 0) { // "UTF-16-LE" - for (int i = 0; i < count; i += 2) { - char c = (char) (rawReadBuffer [i + 1] << 8); - c |= 0xff & rawReadBuffer [i]; - readBuffer [j++] = c; - if (c == '\r') - sawCR = true; - } - } else { // "UTF-16-BE" - for (int i = 0; i < count; i += 2) { - char c = (char) (rawReadBuffer [i] << 8); - c |= 0xff & rawReadBuffer [i + 1]; - readBuffer [j++] = c; - if (c == '\r') - sawCR = true; - } - } - readBufferLength = j; - } - + { + int j = readBufferPos; + + if (count > 0 && (count % 2) != 0) + { + encodingError("odd number of bytes in UCS-2 encoding", -1, count); + } + // The loops are faster with less internal brancing; hence two + if (shift1 == 0) + { // "UTF-16-LE" + for (int i = 0; i < count; i += 2) + { + char c = (char) (rawReadBuffer[i + 1] << 8); + c |= 0xff & rawReadBuffer[i]; + readBuffer[j++] = c; + if (c == '\r') + { + sawCR = true; + } + } + } + else + { // "UTF-16-BE" + for (int i = 0; i < count; i += 2) + { + char c = (char) (rawReadBuffer[i] << 8); + c |= 0xff & rawReadBuffer[i + 1]; + readBuffer[j++] = c; + if (c == '\r') + { + sawCR = true; + } + } + } + readBufferLength = j; + } - /** - * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters. - * - * <p>When readDataChunk () calls this method, the raw bytes are in - * rawReadBuffer, and the final characters will appear in - * readBuffer. - * <p>Java has Unicode chars, and this routine uses surrogate pairs - * for ISO-10646 values between 0x00010000 and 0x000fffff. An - * exception is thrown if the ISO-10646 character has no Unicode - * representation. - * - * @param count The number of bytes to convert. - * @param shift1 The number of bits to shift byte 1. - * @param shift2 The number of bits to shift byte 2 - * @param shift3 The number of bits to shift byte 2 - * @param shift4 The number of bits to shift byte 2 - * @see #readDataChunk - * @see #rawReadBuffer - * @see #readBuffer - */ - private void copyUcs4ReadBuffer (int count, int shift1, int shift2, - int shift3, int shift4) + /** + * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters. + * + * <p>When readDataChunk () calls this method, the raw bytes are in + * rawReadBuffer, and the final characters will appear in + * readBuffer. + * <p>Java has Unicode chars, and this routine uses surrogate pairs + * for ISO-10646 values between 0x00010000 and 0x000fffff. An + * exception is thrown if the ISO-10646 character has no Unicode + * representation. + * + * @param count The number of bytes to convert. + * @param shift1 The number of bits to shift byte 1. + * @param shift2 The number of bits to shift byte 2 + * @param shift3 The number of bits to shift byte 2 + * @param shift4 The number of bits to shift byte 2 + * @see #readDataChunk + * @see #rawReadBuffer + * @see #readBuffer + */ + private void copyUcs4ReadBuffer(int count, int shift1, int shift2, + int shift3, int shift4) throws SAXException - { - int j = readBufferPos; - - if (count > 0 && (count % 4) != 0) { - encodingError ( - "number of bytes in UCS-4 encoding not divisible by 4", - -1, count); - } - for (int i = 0; i < count; i += 4) { - int value = (((rawReadBuffer [i] & 0xff) << shift1) | - ((rawReadBuffer [i + 1] & 0xff) << shift2) | - ((rawReadBuffer [i + 2] & 0xff) << shift3) | - ((rawReadBuffer [i + 3] & 0xff) << shift4)); - if (value < 0x0000ffff) { - readBuffer [j++] = (char) value; - if (value == (int) '\r') { - sawCR = true; - } - } else if (value < 0x0010ffff) { - value -= 0x010000; - readBuffer [j++] = (char) (0xd8 | ((value >> 10) & 0x03ff)); - readBuffer [j++] = (char) (0xdc | (value & 0x03ff)); - } else { - encodingError ("UCS-4 value out of range for Unicode", - value, i); - } - } - readBufferLength = j; - } - + { + int j = readBufferPos; + + if (count > 0 && (count % 4) != 0) + { + encodingError("number of bytes in UCS-4 encoding " + + "not divisible by 4", + -1, count); + } + for (int i = 0; i < count; i += 4) + { + int value = (((rawReadBuffer [i] & 0xff) << shift1) | + ((rawReadBuffer [i + 1] & 0xff) << shift2) | + ((rawReadBuffer [i + 2] & 0xff) << shift3) | + ((rawReadBuffer [i + 3] & 0xff) << shift4)); + if (value < 0x0000ffff) + { + readBuffer [j++] = (char) value; + if (value == (int) '\r') + { + sawCR = true; + } + } + else if (value < 0x0010ffff) + { + value -= 0x010000; + readBuffer[j++] = (char) (0xd8 | ((value >> 10) & 0x03ff)); + readBuffer[j++] = (char) (0xdc | (value & 0x03ff)); + } + else + { + encodingError("UCS-4 value out of range for Unicode", + value, i); + } + } + readBufferLength = j; + } - /** - * Report a character encoding error. - */ - private void encodingError (String message, int value, int offset) + /** + * Report a character encoding error. + */ + private void encodingError(String message, int value, int offset) throws SAXException - { - if (value != -1) - message = message + " (character code: 0x" + - Integer.toHexString (value) + ')'; - error (message); - } + { + if (value != -1) + { + message = message + " (character code: 0x" + + Integer.toHexString(value) + ')'; + error(message); + } + } + + ////////////////////////////////////////////////////////////////////// + // Local Variables. + ////////////////////////////////////////////////////////////////////// + + /** + * Re-initialize the variables for each parse. + */ + private void initializeVariables() + { + // First line + line = 1; + column = 0; + + // Set up the buffers for data and names + dataBufferPos = 0; + dataBuffer = new char[DATA_BUFFER_INITIAL]; + nameBufferPos = 0; + nameBuffer = new char[NAME_BUFFER_INITIAL]; + + // Set up the DTD hash tables + elementInfo = new HashMap(); + entityInfo = new HashMap(); + notationInfo = new HashMap(); + skippedPE = false; + + // Set up the variables for the current + // element context. + currentElement = null; + currentElementContent = CONTENT_UNDECLARED; + + // Set up the input variables + sourceType = INPUT_NONE; + inputStack = new LinkedList(); + entityStack = new LinkedList(); + externalEntity = null; + tagAttributePos = 0; + tagAttributes = new String[100]; + rawReadBuffer = new byte[READ_BUFFER_MAX]; + readBufferOverflow = -1; + + scratch = new InputSource(); + + inLiteral = false; + expandPE = false; + peIsError = false; + + doReport = false; + + inCDATA = false; + + symbolTable = new Object[SYMBOL_TABLE_LENGTH][]; + } + static class ExternalIdentifiers + { - ////////////////////////////////////////////////////////////////////// - // Local Variables. - ////////////////////////////////////////////////////////////////////// + String publicId; + String systemId; + String baseUri; - /** - * Re-initialize the variables for each parse. - */ - private void initializeVariables () + ExternalIdentifiers() { - // First line - line = 1; - column = 0; - - // Set up the buffers for data and names - dataBufferPos = 0; - dataBuffer = new char [DATA_BUFFER_INITIAL]; - nameBufferPos = 0; - nameBuffer = new char [NAME_BUFFER_INITIAL]; - - // Set up the DTD hash tables - elementInfo = new Hashtable (); - entityInfo = new Hashtable (); - notationInfo = new Hashtable (); - skippedPE = false; - - // Set up the variables for the current - // element context. - currentElement = null; - currentElementContent = CONTENT_UNDECLARED; - - // Set up the input variables - sourceType = INPUT_NONE; - inputStack = new Stack (); - entityStack = new Stack (); - externalEntity = null; - tagAttributePos = 0; - tagAttributes = new String [100]; - rawReadBuffer = new byte [READ_BUFFER_MAX]; - readBufferOverflow = -1; - - scratch = new InputSource (); - - inLiteral = false; - expandPE = false; - peIsError = false; - - doReport = false; - - inCDATA = false; - - symbolTable = new Object [SYMBOL_TABLE_LENGTH][]; } + ExternalIdentifiers(String publicId, String systemId, String baseUri) + { + this.publicId = publicId; + this.systemId = systemId; + this.baseUri = baseUri; + } + + } - // - // The current XML handler interface. - // - private SAXDriver handler; - - // - // I/O information. - // - private Reader reader; // current reader - private InputStream is; // current input stream - private int line; // current line number - private int column; // current column number - private int sourceType; // type of input source - private Stack inputStack; // stack of input soruces - private URLConnection externalEntity; // current external entity - private int encoding; // current character encoding - private int currentByteCount; // bytes read from current source - private InputSource scratch; // temporary - - // - // Buffers for decoded but unparsed character input. - // - private char readBuffer []; - private int readBufferPos; - private int readBufferLength; - private int readBufferOverflow; // overflow from last data chunk. - - - // - // Buffer for undecoded raw byte input. - // - private final static int READ_BUFFER_MAX = 16384; - private byte rawReadBuffer []; - - - // - // Buffer for attribute values, char refs, DTD stuff. - // - private static int DATA_BUFFER_INITIAL = 4096; - private char dataBuffer []; - private int dataBufferPos; - - // - // Buffer for parsed names. - // - private static int NAME_BUFFER_INITIAL = 1024; - private char nameBuffer []; - private int nameBufferPos; - - // - // Save any standalone flag - // - private boolean docIsStandalone; - - // - // Hashtables for DTD information on elements, entities, and notations. - // Populated until we start ignoring decls (because of skipping a PE) - // - private Hashtable elementInfo; - private Hashtable entityInfo; - private Hashtable notationInfo; - private boolean skippedPE; - - - // - // Element type currently in force. - // - private String currentElement; - private int currentElementContent; - - // - // Stack of entity names, to detect recursion. - // - private Stack entityStack; - - // - // PE expansion is enabled in most chunks of the DTD, not all. - // When it's enabled, literals are treated differently. - // - private boolean inLiteral; - private boolean expandPE; - private boolean peIsError; - - // - // can't report entity expansion inside two constructs: - // - attribute expansions (internal entities only) - // - markup declarations (parameter entities only) - // - private boolean doReport; - - // - // Symbol table, for caching interned names. - // - // These show up wherever XML names or nmtokens are used: naming elements, - // attributes, PIs, notations, entities, and enumerated attribute values. - // - // NOTE: This hashtable doesn't grow. The default size is intended to be - // rather large for most documents. Example: one snapshot of the DocBook - // XML 4.1 DTD used only about 350 such names. As a rule, only pathological - // documents (ones that don't reuse names) should ever see much collision. - // - // Be sure that SYMBOL_TABLE_LENGTH always stays prime, for best hashing. - // "2039" keeps the hash table size at about two memory pages on typical - // 32 bit hardware. - // - private final static int SYMBOL_TABLE_LENGTH = 2039; + static class EntityInfo + { - private Object symbolTable [][]; + int type; + ExternalIdentifiers ids; + String value; + String notationName; + + } - // - // Hash table of attributes found in current start tag. - // - private String tagAttributes []; - private int tagAttributePos; + static class AttributeDecl + { + + String type; + String value; + int valueType; + String enumeration; + String defaultValue; - // - // Utility flag: have we noticed a CR while reading the last - // data chunk? If so, we will have to go back and normalise - // CR or CR/LF line ends. - // - private boolean sawCR; + } - // - // Utility flag: are we in CDATA? If so, whitespace isn't ignorable. - // - private boolean inCDATA; + static class ElementDecl + { - // - // Xml version. - // - private static final int XML_10 = 0; - private static final int XML_11 = 1; - private int xmlVersion = XML_10; + int contentType; + String contentModel; + HashMap attributes; + + } + + static class Input + { + + int sourceType; + URLConnection externalEntity; + char[] readBuffer; + int readBufferPos; + int readBufferLength; + int line; + int encoding; + int readBufferOverflow; + InputStream is; + int currentByteCount; + int column; + Reader reader; + + } + } + diff --git a/gnu/xml/aelfred2/XmlReader.java b/gnu/xml/aelfred2/XmlReader.java index 96c9c723f..dacf8b103 100644 --- a/gnu/xml/aelfred2/XmlReader.java +++ b/gnu/xml/aelfred2/XmlReader.java @@ -70,246 +70,305 @@ import gnu.xml.pipeline.ValidationConsumer; * * @author David Brownell */ -public final class XmlReader implements XMLReader +public final class XmlReader + implements XMLReader { - private SAXDriver aelfred2 = new SAXDriver (); - private EventFilter filter = new EventFilter (); - private boolean isValidating; - private boolean active; - - /** Constructs a SAX Parser. */ - public XmlReader () - { } - - /** - * Constructs a SAX Parser, optionally treating validity errors - * as if they were fatal errors. - */ - public XmlReader (boolean invalidIsFatal) - { - if (invalidIsFatal) - setErrorHandler (new DefaultHandler2 () { - public void error (SAXParseException e) - throws SAXException - { throw e; } - }); - } - - /** - * <b>SAX2</b>: Returns the object used to report the logical - * content of an XML document. - */ - public ContentHandler getContentHandler () - { return filter.getContentHandler (); } - - /** - * <b>SAX2</b>: Assigns the object used to report the logical - * content of an XML document. - * @exception IllegalStateException if called mid-parse - */ - public void setContentHandler (ContentHandler handler) + static class FatalErrorHandler + extends DefaultHandler2 + { + + public void error(SAXParseException e) + throws SAXException { - if (active) - throw new IllegalStateException ("already parsing"); - filter.setContentHandler (handler); + throw e; } - - /** - * <b>SAX2</b>: Returns the object used to process declarations related - * to notations and unparsed entities. - */ - public DTDHandler getDTDHandler () - { return filter.getDTDHandler (); } - - /** - * <b>SAX1</b> Assigns DTD handler - * @exception IllegalStateException if called mid-parse - */ - public void setDTDHandler (DTDHandler handler) - { - if (active) - throw new IllegalStateException ("already parsing"); - filter.setDTDHandler (handler); - } - - /** - * <b>SAX2</b>: Returns the object used when resolving external - * entities during parsing (both general and parameter entities). - */ - public EntityResolver getEntityResolver () - { return aelfred2.getEntityResolver (); } - - /** <b>SAX1</b> Assigns parser's entity resolver */ - public void setEntityResolver (EntityResolver handler) - { aelfred2.setEntityResolver (handler); } - - /** - * <b>SAX2</b>: Returns the object used to receive callbacks for XML - * errors of all levels (fatal, nonfatal, warning); this is never null; - */ - public ErrorHandler getErrorHandler () - { return aelfred2.getErrorHandler (); } - - /** - * <b>SAX1</b> Assigns error handler - * @exception IllegalStateException if called mid-parse - */ - public void setErrorHandler (ErrorHandler handler) - { - if (active) - throw new IllegalStateException ("already parsing"); - aelfred2.setErrorHandler (handler); - } - - /** - * <b>SAX2</b>: Assigns the specified property. - * @exception IllegalStateException if called mid-parse - */ - public void setProperty (String propertyId, Object value) + + } + + private SAXDriver aelfred2 = new SAXDriver(); + private EventFilter filter = new EventFilter(); + private boolean isValidating; + private boolean active; + + /** + * Constructs a SAX Parser. + */ + public XmlReader() + { + } + + /** + * Constructs a SAX Parser, optionally treating validity errors + * as if they were fatal errors. + */ + public XmlReader(boolean invalidIsFatal) + { + if (invalidIsFatal) + { + setErrorHandler(new FatalErrorHandler()); + } + } + + /** + * <b>SAX2</b>: Returns the object used to report the logical + * content of an XML document. + */ + public ContentHandler getContentHandler() + { + return filter.getContentHandler(); + } + + /** + * <b>SAX2</b>: Assigns the object used to report the logical + * content of an XML document. + * @exception IllegalStateException if called mid-parse + */ + public void setContentHandler(ContentHandler handler) + { + if (active) + { + throw new IllegalStateException("already parsing"); + } + filter.setContentHandler(handler); + } + + /** + * <b>SAX2</b>: Returns the object used to process declarations related + * to notations and unparsed entities. + */ + public DTDHandler getDTDHandler() + { + return filter.getDTDHandler(); + } + + /** + * <b>SAX1</b> Assigns DTD handler + * @exception IllegalStateException if called mid-parse + */ + public void setDTDHandler(DTDHandler handler) + { + if (active) + { + throw new IllegalStateException("already parsing"); + } + filter.setDTDHandler(handler); + } + + /** + * <b>SAX2</b>: Returns the object used when resolving external + * entities during parsing (both general and parameter entities). + */ + public EntityResolver getEntityResolver() + { + return aelfred2.getEntityResolver(); + } + + /** + * <b>SAX1</b> Assigns parser's entity resolver + */ + public void setEntityResolver(EntityResolver handler) + { + aelfred2.setEntityResolver(handler); + } + + /** + * <b>SAX2</b>: Returns the object used to receive callbacks for XML + * errors of all levels (fatal, nonfatal, warning); this is never null; + */ + public ErrorHandler getErrorHandler() + { + return aelfred2.getErrorHandler(); + } + + /** + * <b>SAX1</b> Assigns error handler + * @exception IllegalStateException if called mid-parse + */ + public void setErrorHandler(ErrorHandler handler) + { + if (active) + { + throw new IllegalStateException("already parsing"); + } + aelfred2.setErrorHandler(handler); + } + + /** + * <b>SAX2</b>: Assigns the specified property. + * @exception IllegalStateException if called mid-parse + */ + public void setProperty(String propertyId, Object value) throws SAXNotRecognizedException, SAXNotSupportedException - { - if (active) - throw new IllegalStateException ("already parsing"); - if (getProperty (propertyId) != value) - filter.setProperty (propertyId, value); - } - - /** - * <b>SAX2</b>: Returns the specified property. - */ - public Object getProperty (String propertyId) + { + if (active) + { + throw new IllegalStateException("already parsing"); + } + if (getProperty(propertyId) != value) + { + filter.setProperty(propertyId, value); + } + } + + /** + * <b>SAX2</b>: Returns the specified property. + */ + public Object getProperty(String propertyId) throws SAXNotRecognizedException - { - if ((SAXDriver.PROPERTY + "declaration-handler") - .equals (propertyId) - || (SAXDriver.PROPERTY + "lexical-handler") - .equals (propertyId)) - return filter.getProperty (propertyId); - throw new SAXNotRecognizedException (propertyId); - } - - private void forceValidating () + { + if ((SAXDriver.PROPERTY + "declaration-handler").equals(propertyId) + || (SAXDriver.PROPERTY + "lexical-handler").equals(propertyId)) + { + return filter.getProperty(propertyId); + } + throw new SAXNotRecognizedException(propertyId); + } + + private void forceValidating() throws SAXNotRecognizedException, SAXNotSupportedException - { - aelfred2.setFeature ( - SAXDriver.FEATURE + "namespace-prefixes", - true); - aelfred2.setFeature ( - SAXDriver.FEATURE + "external-general-entities", - true); - aelfred2.setFeature ( - SAXDriver.FEATURE + "external-parameter-entities", - true); - } - - /** - * <b>SAX2</b>: Sets the state of features supported in this parser. - * Note that this parser requires reporting of namespace prefixes when - * validating. - */ - public void setFeature (String featureId, boolean state) + { + aelfred2.setFeature(SAXDriver.FEATURE + "namespace-prefixes", + true); + aelfred2.setFeature(SAXDriver.FEATURE + "external-general-entities", + true); + aelfred2.setFeature(SAXDriver.FEATURE + "external-parameter-entities", + true); + } + + /** + * <b>SAX2</b>: Sets the state of features supported in this parser. + * Note that this parser requires reporting of namespace prefixes when + * validating. + */ + public void setFeature(String featureId, boolean state) throws SAXNotRecognizedException, SAXNotSupportedException - { - boolean value = getFeature (featureId); - - if (state == value) - return; - - if ((SAXDriver.FEATURE + "validation").equals (featureId)) { - if (active) - throw new SAXNotSupportedException ("already parsing"); - if (state) - forceValidating (); - isValidating = state; - } else - aelfred2.setFeature (featureId, state); - } - - /** - * <b>SAX2</b>: Tells whether this parser supports the specified feature. - * At this time, this directly parallels the underlying SAXDriver, - * except that validation is optionally supported. - * - * @see SAXDriver - */ - public boolean getFeature (String featureId) + { + boolean value = getFeature(featureId); + + if (state == value) + { + return; + } + + if ((SAXDriver.FEATURE + "validation").equals(featureId)) + { + if (active) + { + throw new SAXNotSupportedException("already parsing"); + } + if (state) + { + forceValidating(); + } + isValidating = state; + } + else + { + aelfred2.setFeature(featureId, state); + } + } + + /** + * <b>SAX2</b>: Tells whether this parser supports the specified feature. + * At this time, this directly parallels the underlying SAXDriver, + * except that validation is optionally supported. + * + * @see SAXDriver + */ + public boolean getFeature(String featureId) throws SAXNotRecognizedException, SAXNotSupportedException - { - if ((SAXDriver.FEATURE + "validation").equals (featureId)) - return isValidating; - - return aelfred2.getFeature (featureId); - } - - /** - * <b>SAX1</b>: Sets the locale used for diagnostics; currently, - * only locales using the English language are supported. - * @param locale The locale for which diagnostics will be generated - */ - public void setLocale (Locale locale) + { + if ((SAXDriver.FEATURE + "validation").equals(featureId)) + { + return isValidating; + } + + return aelfred2.getFeature(featureId); + } + + /** + * <b>SAX1</b>: Sets the locale used for diagnostics; currently, + * only locales using the English language are supported. + * @param locale The locale for which diagnostics will be generated + */ + public void setLocale(Locale locale) throws SAXException - { aelfred2.setLocale (locale); } + { + aelfred2.setLocale(locale); + } - /** - * <b>SAX1</b>: Preferred API to parse an XML document, using a - * system identifier (URI). + /** + * <b>SAX1</b>: Preferred API to parse an XML document, using a + * system identifier (URI). */ - public void parse (String systemId) + public void parse(String systemId) throws SAXException, IOException - { - parse (new InputSource (systemId)); - } - - /** - * <b>SAX1</b>: Underlying API to parse an XML document, used - * directly when no URI is available. When this is invoked, - * and the parser is set to validate, some features will be - * automatically reset to appropriate values: for reporting - * namespace prefixes, and incorporating external entities. - * - * @param source The XML input source. - * - * @exception IllegalStateException if called mid-parse - * @exception SAXException The handlers may throw any SAXException, - * and the parser normally throws SAXParseException objects. - * @exception IOException IOExceptions are normally through through - * the parser if there are problems reading the source document. - */ - public void parse (InputSource source) + { + parse(new InputSource(systemId)); + } + + /** + * <b>SAX1</b>: Underlying API to parse an XML document, used + * directly when no URI is available. When this is invoked, + * and the parser is set to validate, some features will be + * automatically reset to appropriate values: for reporting + * namespace prefixes, and incorporating external entities. + * + * @param source The XML input source. + * + * @exception IllegalStateException if called mid-parse + * @exception SAXException The handlers may throw any SAXException, + * and the parser normally throws SAXParseException objects. + * @exception IOException IOExceptions are normally through through + * the parser if there are problems reading the source document. + */ + public void parse(InputSource source) throws SAXException, IOException - { - EventFilter next; - boolean nsdecls; - - synchronized (aelfred2) { - if (active) - throw new IllegalStateException ("already parsing"); - active = true; - } - - // set up the output pipeline - if (isValidating) { - forceValidating (); - next = new ValidationConsumer (filter); - } else - next = filter; - - // connect pipeline and error handler - // don't let _this_ call to bind() affect xmlns* attributes - nsdecls = aelfred2.getFeature ( - SAXDriver.FEATURE + "namespace-prefixes"); - EventFilter.bind (aelfred2, next); - if (!nsdecls) - aelfred2.setFeature ( - SAXDriver.FEATURE + "namespace-prefixes", - false); - - // parse, clean up - try { - aelfred2.parse (source); - } finally { - active = false; - } - } + { + EventFilter next; + boolean nsdecls; + + synchronized (aelfred2) + { + if (active) + { + throw new IllegalStateException("already parsing"); + } + active = true; + } + + // set up the output pipeline + if (isValidating) + { + forceValidating(); + next = new ValidationConsumer(filter); + } + else + { + next = filter; + } + + // connect pipeline and error handler + // don't let _this_ call to bind() affect xmlns* attributes + nsdecls = aelfred2.getFeature(SAXDriver.FEATURE + "namespace-prefixes"); + EventFilter.bind(aelfred2, next); + if (!nsdecls) + { + aelfred2.setFeature(SAXDriver.FEATURE + "namespace-prefixes", + false); + } + + // parse, clean up + try + { + aelfred2.parse(source); + } + finally + { + active = false; + } + } + } + |