/* XCat.java -- Copyright (C) 2001, 2015 Free Software Foundation, Inc. This file is part of GNU Classpath. GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Classpath; see the file COPYING. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Linking this library statically or dynamically with other modules is making a combined work based on this library. Thus, the terms and conditions of the GNU General Public License cover the whole combination. As a special exception, the copyright holders of this library give you permission to link this library with independent modules to produce an executable, regardless of the license terms of these independent modules, and to copy and distribute the resulting executable under terms of your choice, provided that you also meet, for each linked independent module, the terms and conditions of the license of that module. An independent module is a module which is not derived from or based on this library. If you modify this library, you may extend this exception to your version of the library, but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. */ package gnu.xml.util; import gnu.java.lang.CPStringBuilder; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.net.URL; import java.util.Enumeration; import java.util.Hashtable; import java.util.StringTokenizer; import java.util.Stack; import java.util.Vector; import org.xml.sax.Attributes; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXNotRecognizedException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.ext.DefaultHandler2; import org.xml.sax.ext.EntityResolver2; import org.xml.sax.helpers.XMLReaderFactory; /** * Packages OASIS XML Catalogs, * primarily for entity resolution by parsers. * That specification defines an XML syntax for mappings between * identifiers declared in DTDs (particularly PUBLIC identifiers) and * locations. SAX has always supported such mappings, but conventions for * an XML file syntax to maintain them have previously been lacking. * *
This has three main operational modes. The primary intended mode is * to create a resolver, then preloading it with one or more site-standard * catalogs before using it with one or more SAX parsers:
* XCat catalog = new XCat (); * catalog.setErrorHandler (diagnosticErrorHandler); * catalog.loadCatalog ("file:/local/catalogs/catalog.cat"); * catalog.loadCatalog ("http://shared/catalog.cat"); * ... * catalog.disableLoading (); * parser1.setEntityResolver (catalog); * parser2.setEntityResolver (catalog); * ...* *
A second mode is to arrange that your application uses instances of * this class as its entity resolver, and automatically loads catalogs * referenced by <?oasis-xml-catalog...?> processing * instructions found before the DTD in documents it parses. * It would then discard the resolver after each parse. * *
A third mode applies catalogs in contexts other than entity * resolution for parsers. * The {@link #resolveURI resolveURI()} method supports resolving URIs * stored in XML application data, rather than inside DTDs. * Catalogs would be loaded as shown above, and the catalog could * be used concurrently for parser entity resolution and for * application URI resolution. *
* *Errors in catalogs implicitly loaded (during resolution) are ignored * beyond being reported through any ErrorHandler assigned using * {@link #setErrorHandler setErrorHandler()}. SAX exceptions * thrown from such a handler won't abort resolution, although throwing a * RuntimeException or Error will normally abort both * resolution and parsing. Useful diagnostic information is available to * any ErrorHandler used to report problems, or from any exception * thrown from an explicit {@link #loadCatalog loadCatalog()} invocation. * Applications can use that information as troubleshooting aids. * *
While this class requires SAX2 Extensions 1.1 classes in * its class path, basic functionality does not require using a SAX2 * parser that supports the extended entity resolution functionality. * See the original SAX1 * {@link #resolveEntity(java.lang.String,java.lang.String) resolveEntity()} * method for a list of restrictions which apply when it is used with * older SAX parsers. * * @see EntityResolver2 * * @author David Brownell */ public class XCat implements EntityResolver2 { private Catalog catalogs []; private boolean usingPublic = true; private boolean loadingPermitted = true; private boolean unified = true; private String parserClass; private ErrorHandler errorHandler; // private EntityResolver next; // chain to next if we fail... // // NOTE: This is a straightforward implementation, and if // there are lots of "nextCatalog" or "delegate*" entries // in use, two tweaks would be worth considering: // // - Centralize some sort of cache (key by URI) for individual // resolvers. That'd avoid multiple copies of a given catalog. // // - Have resolution track what catalogs (+modes) have been // searched. This would support loop detection. // /** * Initializes without preloading a catalog. * This API is convenient when you may want to arrange that catalogs * are automatically loaded when explicitly referenced in documents, * using the oasis-xml-catalog processing instruction. * In such cases you won't usually be able to preload catalogs. */ public XCat () { } /** * Initializes, and preloads a catalog using the default SAX parser. * This API is convenient when you operate with one or more standard * catalogs. * *
This just delegates to {@link #loadCatalog loadCatalog()}; * see it for exception information. * * @param uri absolute URI for the catalog file. */ public XCat (String uri) throws SAXException, IOException { loadCatalog (uri); } /** * Loads an OASIS XML Catalog. * It is appended to the list of currently active catalogs, or * reloaded if a catalog with the same URI was already loaded. * Callers have control over what parser is used, how catalog parsing * errors are reported, and whether URIs will be resolved consistently. * *
The OASIS specification says that errors detected when loading * catalogs "must recover by ignoring the catalog entry file that * failed, and proceeding." In this API, that action can be the * responsibility of applications, when they explicitly load any * catalog using this method. * *
Note that catalogs referenced by this one will not be loaded * at this time. Catalogs referenced through nextCatalog * or delegate* elements are normally loaded only if needed. * * @see #setErrorHandler * @see #setParserClass * @see #setUnified * * @param uri absolute URI for the catalog file. * * @exception IOException As thrown by the parser, typically to * indicate problems reading data from that URI. * @exception SAXException As thrown by the parser, typically to * indicate problems parsing data from that URI. It may also * be thrown if the parser doesn't support necessary handlers. * @exception IllegalStateException When attempting to load a * catalog after loading has been {@link #disableLoading disabled}, * such as after any entity or URI lookup has been performed. */ public synchronized void loadCatalog (String uri) throws SAXException, IOException { Catalog catalog; int index = -1; if (!loadingPermitted) throw new IllegalStateException (); uri = normalizeURI (uri); if (catalogs != null) { // maybe just reload for (index = 0; index < catalogs.length; index++) if (uri.equals (catalogs [index].catalogURI)) break; } catalog = loadCatalog (parserClass, errorHandler, uri, unified); // add to list of catalogs if (catalogs == null) { index = 0; catalogs = new Catalog [1]; } else if (index == catalogs.length) { Catalog tmp []; tmp = new Catalog [index + 1]; System.arraycopy (catalogs, 0, tmp, 0, index); catalogs = tmp; } catalogs [index] = catalog; } /** * "New Style" external entity resolution for parsers. * Calls to this method prevent explicit loading of additional catalogs * using {@link #loadCatalog loadCatalog()}. * *
This supports the full core catalog functionality for locating * (and relocating) parsed entities that have been declared in a * document's DTD. * * @param name Entity name, such as "dudley", "%nell", or "[dtd]". * @param publicId Either a normalized public ID, or null. * @param baseURI Absolute base URI associated with systemId. * @param systemId URI found in entity declaration (may be * relative to baseURI). * * @return Input source for accessing the external entity, or null * if no mapping was found. The input source may have opened * the stream, and will have a fully resolved URI. * * @see #getExternalSubset */ public InputSource resolveEntity ( String name, // UNUSED ... systemId is always non-null String publicId, String baseURI, // UNUSED ... it just lets sysId be relative String systemId ) throws SAXException, IOException { if (loadingPermitted) disableLoading (); try { // steps as found in OASIS XML catalog spec 7.1.2 // steps 1, 8 involve looping over the list of catalogs for (int i = 0; i < catalogs.length; i++) { InputSource retval; retval = catalogs [i].resolve (usingPublic, publicId, systemId); if (retval != null) return retval; } } catch (DoneDelegation x) { // done! } // step 9 involves returning "no match" return null; } /** * "New Style" parser callback to add an external subset. * For documents that don't include an external subset, this may * return one according to doctype catalog entries. * (This functionality is not a core part of the OASIS XML Catalog * specification, though it's presented in an appendix.) * If no such entry is defined, this returns null to indicate that * this document will not be modified to include such a subset. * Calls to this method prevent explicit loading of additional catalogs * using {@link #loadCatalog loadCatalog()}. * *
Warning: That catalog functionality can be dangerous. * It can provide definitions of general entities, and thereby mask * certain well formedess errors. * * @param name Name of the document element, either as declared in * a DOCTYPE declaration or as observed in the text. * @param baseURI Document's base URI (absolute). * * @return Input source for accessing the external subset, or null * if no mapping was found. The input source may have opened * the stream, and will have a fully resolved URI. */ public InputSource getExternalSubset (String name, String baseURI) throws SAXException, IOException { if (loadingPermitted) disableLoading (); try { for (int i = 0; i < catalogs.length; i++) { InputSource retval = catalogs [i].getExternalSubset (name); if (retval != null) return retval; } } catch (DoneDelegation x) { // done! } return null; } /** * "Old Style" external entity resolution for parsers. * This API provides only core functionality. * Calls to this method prevent explicit loading of additional catalogs * using {@link #loadCatalog loadCatalog()}. * *
The functional limitations of this interface include:
Applications can tell whether this limited functionality will be * used: if the feature flag associated with the {@link EntityResolver2} * interface is not true, the limitations apply. Applications * can't usually know whether a given document and catalog will trigger * those limitations. The issue can only be bypassed by operational * procedures such as not using catalogs or documents which involve * those features. * * @param publicId Either a normalized public ID, or null * @param systemId Always an absolute URI. * * @return Input source for accessing the external entity, or null * if no mapping was found. The input source may have opened * the stream, and will have a fully resolved URI. */ final public InputSource resolveEntity (String publicId, String systemId) throws SAXException, IOException { return resolveEntity (null, publicId, null, systemId); } /** * Resolves a URI reference that's not defined to the DTD. * This is intended for use with URIs found in document text, such as * xml-stylesheet processing instructions and in attribute * values, where they are not recognized as URIs by XML parsers. * Calls to this method prevent explicit loading of additional catalogs * using {@link #loadCatalog loadCatalog()}. * *
This functionality is supported by the OASIS XML Catalog * specification, but will never be invoked by an XML parser. * It corresponds closely to functionality for mapping system * identifiers for entities declared in DTDs; closely enough that * this implementation's default behavior is that they be * identical, to minimize potential confusion. * *
This method could be useful when implementing the * {@link javax.xml.transform.URIResolver} interface, wrapping the * input source in a {@link javax.xml.transform.sax.SAXSource}. * * @see #isUnified * @see #setUnified * * @param baseURI The relevant base URI as specified by the XML Base * specification. This recognizes xml:base attributes * as overriding the actual (physical) base URI. * @param uri Either an absolute URI, or one relative to baseURI * * @return Input source for accessing the mapped URI, or null * if no mapping was found. The input source may have opened * the stream, and will have a fully resolved URI. */ public InputSource resolveURI (String baseURI, String uri) throws SAXException, IOException { if (loadingPermitted) disableLoading (); // NOTE: baseURI isn't used here, but caller MUST have it, // and heuristics _might_ use it in the future ... plus, // it's symmetric with resolveEntity (). // steps 1, 6 involve looping try { for (int i = 0; i < catalogs.length; i++) { InputSource tmp = catalogs [i].resolveURI (uri); if (tmp != null) return tmp; } } catch (DoneDelegation x) { // done } // step 7 reports no match return null; } /** * Records that catalog loading is no longer permitted. * Loading is automatically disabled when lookups are performed, * and should be manually disabled when startDTD() (or * any other DTD declaration callback) is invoked, or at the latest * when the document root element is seen. */ public synchronized void disableLoading () { // NOTE: this method and loadCatalog() are synchronized // so that it's impossible to load (top level) catalogs // after lookups start. Likewise, deferred loading is also // synchronized (for "next" and delegated catalogs) to // ensure that parsers can share resolvers. loadingPermitted = false; } /** * Returns the error handler used to report catalog errors. * Null is returned if the parser's default error handling * will be used. * * @see #setErrorHandler */ public ErrorHandler getErrorHandler () { return errorHandler; } /** * Assigns the error handler used to report catalog errors. * These errors may come either from the SAX2 parser or * from the catalog parsing code driven by the parser. * *
If you're sharing the resolver between parsers, don't * change this once lookups have begun. * * @see #getErrorHandler * * @param parser The error handler, or null saying to use the default * (no diagnostics, and only fatal errors terminate loading). */ public void setErrorHandler (ErrorHandler handler) { errorHandler = handler; } /** * Returns the name of the SAX2 parser class used to parse catalogs. * Null is returned if the system default is used. * @see #setParserClass */ public String getParserClass () { return parserClass; } /** * Names the SAX2 parser class used to parse catalogs. * *
If you're sharing the resolver between parsers, don't change * this once lookups have begun. * *
Note that in order to properly support the xml:base * attribute and relative URI resolution, the SAX parser used to parse * the catalog must provide a {@link Locator} and support the optional * declaration and lexical handlers. * * @see #getParserClass * * @param parser The parser class name, or null saying to use the * system default SAX2 parser. */ public void setParserClass (String parser) { parserClass = parser; } /** * Returns true (the default) if all methods resolve * a given URI in the same way. * Returns false if calls resolving URIs as entities (such as * {@link #resolveEntity resolveEntity()}) use different catalog entries * than those resolving them as URIs ({@link #resolveURI resolveURI()}), * which will generally produce different results. * *
The OASIS XML Catalog specification defines two related schemes * to map URIs "as URIs" or "as system IDs". * URIs use uri, rewriteURI, and delegateURI * elements. System IDs do the same things with systemId, * rewriteSystemId, and delegateSystemId. * It's confusing and error prone to maintain two parallel copies of * such data. Accordingly, this class makes that behavior optional. * The unified interpretation of URI mappings is preferred, * since it prevents surprises where one URI gets mapped to different * contents depending on whether the reference happens to have come * from a DTD (or not). * * @see #setUnified */ public boolean isUnified () { return unified; } /** * Assigns the value of the flag returned by {@link #isUnified}. * Set it to false to be strictly conformant with the OASIS XML Catalog * specification. Set it to true to make all mappings for a given URI * give the same result, regardless of the reason for the mapping. * *
Don't change this once you've loaded the first catalog. * * @param value new flag setting */ public void setUnified (boolean value) { unified = value; } /** * Returns true (the default) if a catalog's public identifier * mappings will be used. * When false is returned, such mappings are ignored except when * system IDs are discarded, such as for * entities using the urn:publicid: URI scheme in their * system identifiers. (See RFC 3151 for information about that * URI scheme. Using it in system identifiers may not work well * with many SAX parsers unless the resolve-dtd-uris * feature flag is set to false.) * @see #setUsingPublic */ public boolean isUsingPublic () { return usingPublic; } /** * Specifies which catalog search mode is used. * By default, public identifier mappings are able to override system * identifiers when both are available. * Applications may choose to ignore public * identifier mappings in such cases, so that system identifiers * declared in DTDs will only be overridden by an explicit catalog * match for that system ID. * *
If you're sharing the resolver between parsers, don't
* change this once lookups have begun.
* @see #isUsingPublic
*
* @param value true to always use public identifier mappings,
* false to only use them for system ids using the urn:publicid:
* URI scheme.
*/
public void setUsingPublic (boolean value)
{ usingPublic = value; }
// hmm, what's this do? :)
private static Catalog loadCatalog (
String parserClass,
ErrorHandler eh,
String uri,
boolean unified
) throws SAXException, IOException
{
XMLReader parser;
Loader loader;
boolean doesIntern = false;
if (parserClass == null)
parser = XMLReaderFactory.createXMLReader ();
else
parser = XMLReaderFactory.createXMLReader (parserClass);
if (eh != null)
parser.setErrorHandler (eh);
// resolve-dtd-entities is at default value (unrecognized == true)
try {
doesIntern = parser.getFeature (
"http://xml.org/sax/features/string-interning");
} catch (SAXNotRecognizedException e) { }
loader = new Loader (doesIntern, eh, unified);
loader.cat.parserClass = parserClass;
loader.cat.catalogURI = uri;
parser.setContentHandler (loader);
parser.setProperty (
"http://xml.org/sax/properties/declaration-handler",
loader);
parser.setProperty (
"http://xml.org/sax/properties/lexical-handler",
loader);
parser.parse (uri);
return loader.cat;
}
// perform one or both the normalizations for public ids
private static String normalizePublicId (boolean full, String publicId)
{
if (publicId.startsWith ("urn:publicid:")) {
CPStringBuilder buf = new CPStringBuilder ();
char chars [] = publicId.toCharArray ();
boolean hasbug = false;
for (int i = 13; i < chars.length; i++) {
switch (chars [i]) {
case '+': buf.append (' '); continue;
case ':': buf.append ("//"); continue;
case ';': buf.append ("::"); continue;
case '%':
// FIXME unhex that char! meanwhile, warn and fallthrough ...
hasbug = true;
default: buf.append (chars [i]); continue;
}
}
publicId = buf.toString ();
if (hasbug)
System.err.println ("nyet unhexing public id: " + publicId);
full = true;
}
// SAX parsers do everything except that URN mapping, but
// we can't trust other sources to normalize correctly
if (full) {
StringTokenizer tokens;
tokens = new StringTokenizer (publicId, " \r\n");
publicId = null;
while (tokens.hasMoreTokens ()) {
if (publicId == null)
publicId = tokens.nextToken ();
else
publicId += " " + tokens.nextToken ();
}
}
return publicId;
}
private static boolean isUriExcluded (int c)
{ return c <= 0x20 || c >= 0x7f || "\"<>^`{|}".indexOf (c) != -1; }
private static int hexNibble (int c)
{
if (c < 10)
return c + '0';
return ('a' - 10) + c;
}
// handles URIs with "excluded" characters
private static String normalizeURI (String systemId)
{
int length = systemId.length ();
for (int i = 0; i < length; i++) {
char c = systemId.charAt (i);
// escape non-ASCII plus "excluded" characters
if (isUriExcluded (c)) {
byte buf [];
ByteArrayOutputStream out;
int b;
// a JVM that doesn't know UTF8 and 8859_1 is unusable!
try {
buf = systemId.getBytes ("UTF8");
out = new ByteArrayOutputStream (buf.length + 10);
for (i = 0; i < buf.length; i++) {
b = buf [i] & 0x0ff;
if (isUriExcluded (b)) {
out.write ((int) '%');
out.write (hexNibble (b >> 4));
out.write (hexNibble (b & 0x0f));
} else
out.write (b);
}
return out.toString ("8859_1");
} catch (IOException e) {
throw new RuntimeException (
"can't normalize URI: " + e.getMessage ());
}
}
}
return systemId;
}
// thrown to mark authoritative end of a search
private static class DoneDelegation extends SAXException
{
DoneDelegation () { }
}
/**
* Represents a OASIS XML Catalog, and encapsulates much of
* the catalog functionality.
*/
private static class Catalog
{
// loading infrastructure
String catalogURI;
ErrorHandler eh;
boolean unified;
String parserClass;
// catalog data
boolean hasPreference;
boolean usingPublic;
Hashtable