summaryrefslogtreecommitdiff
path: root/libjava/classpath/gnu/xml/util/XMLWriter.java
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/classpath/gnu/xml/util/XMLWriter.java')
-rw-r--r--libjava/classpath/gnu/xml/util/XMLWriter.java2372
1 files changed, 1186 insertions, 1186 deletions
diff --git a/libjava/classpath/gnu/xml/util/XMLWriter.java b/libjava/classpath/gnu/xml/util/XMLWriter.java
index 27459472019..a371debffb5 100644
--- a/libjava/classpath/gnu/xml/util/XMLWriter.java
+++ b/libjava/classpath/gnu/xml/util/XMLWriter.java
@@ -1,4 +1,4 @@
-/* XMLWriter.java --
+/* XMLWriter.java --
Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -111,74 +111,74 @@ public class XMLWriter
implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler
{
// text prints/escapes differently depending on context
- // CTX_ENTITY ... entity literal value
- // CTX_ATTRIBUTE ... attribute literal value
- // CTX_CONTENT ... content of an element
- // CTX_UNPARSED ... CDATA, comment, PI, names, etc
+ // CTX_ENTITY ... entity literal value
+ // CTX_ATTRIBUTE ... attribute literal value
+ // CTX_CONTENT ... content of an element
+ // CTX_UNPARSED ... CDATA, comment, PI, names, etc
// CTX_NAME ... name or nmtoken, no escapes possible
- private static final int CTX_ENTITY = 1;
- private static final int CTX_ATTRIBUTE = 2;
- private static final int CTX_CONTENT = 3;
- private static final int CTX_UNPARSED = 4;
- private static final int CTX_NAME = 5;
+ private static final int CTX_ENTITY = 1;
+ private static final int CTX_ATTRIBUTE = 2;
+ private static final int CTX_CONTENT = 3;
+ private static final int CTX_UNPARSED = 4;
+ private static final int CTX_NAME = 5;
// FIXME: names (element, attribute, PI, notation, etc) are not
// currently written out with range checks (escapeChars).
// In non-XHTML, some names can't be directly written; panic!
- private static String sysEOL;
+ private static String sysEOL;
static {
- try {
- sysEOL = System.getProperty ("line.separator", "\n");
+ try {
+ sysEOL = System.getProperty ("line.separator", "\n");
- // don't use the system's EOL if it's illegal XML.
- if (!isLineEnd (sysEOL))
- sysEOL = "\n";
+ // don't use the system's EOL if it's illegal XML.
+ if (!isLineEnd (sysEOL))
+ sysEOL = "\n";
- } catch (SecurityException e) {
- sysEOL = "\n";
- }
+ } catch (SecurityException e) {
+ sysEOL = "\n";
+ }
}
private static boolean isLineEnd (String eol)
{
- return "\n".equals (eol)
- || "\r".equals (eol)
- || "\r\n".equals (eol);
+ return "\n".equals (eol)
+ || "\r".equals (eol)
+ || "\r\n".equals (eol);
}
- private Writer out;
- private boolean inCDATA;
- private int elementNestLevel;
- private String eol = sysEOL;
+ private Writer out;
+ private boolean inCDATA;
+ private int elementNestLevel;
+ private String eol = sysEOL;
- private short dangerMask;
- private CPStringBuilder stringBuf;
- private Locator locator;
- private ErrorHandler errHandler;
+ private short dangerMask;
+ private CPStringBuilder stringBuf;
+ private Locator locator;
+ private ErrorHandler errHandler;
- private boolean expandingEntities = false;
- private int entityNestLevel;
- private boolean xhtml;
- private boolean startedDoctype;
- private String encoding;
+ private boolean expandingEntities = false;
+ private int entityNestLevel;
+ private boolean xhtml;
+ private boolean startedDoctype;
+ private String encoding;
- private boolean canonical;
- private boolean inDoctype;
- private boolean inEpilogue;
+ private boolean canonical;
+ private boolean inDoctype;
+ private boolean inEpilogue;
// pretty printing controls
- private boolean prettyPrinting;
- private int column;
- private boolean noWrap;
- private Stack space = new Stack ();
+ private boolean prettyPrinting;
+ private int column;
+ private boolean noWrap;
+ private Stack space = new Stack ();
// this is not a hard'n'fast rule -- longer lines are OK,
// but are to be avoided. Here, prettyprinting is more to
// show structure "cleanly" than to be precise about it.
// better to have ragged layout than one line 24Kb long.
- private static final int lineLength = 75;
+ private static final int lineLength = 75;
/**
@@ -187,7 +187,7 @@ public class XMLWriter
* it's safe to close System.out at the end of the document.
*/
public XMLWriter () throws IOException
- { this (System.out); }
+ { this (System.out); }
/**
* Constructs a handler which writes all input to the output stream
@@ -198,7 +198,7 @@ public class XMLWriter
*/
public XMLWriter (OutputStream out) throws IOException
{
- this (new OutputStreamWriter (out, "UTF8"));
+ this (new OutputStreamWriter (out, "UTF8"));
}
/**
@@ -215,7 +215,7 @@ public class XMLWriter
*/
public XMLWriter (Writer writer)
{
- this (writer, null);
+ this (writer, null);
}
/**
@@ -248,53 +248,53 @@ public class XMLWriter
*
* @param writer XML text is written to this writer.
* @param encoding if non-null, and an XML declaration is written,
- * this is the name that will be used for the character encoding.
+ * this is the name that will be used for the character encoding.
*/
public XMLWriter (Writer writer, String encoding)
{
- setWriter (writer, encoding);
+ setWriter (writer, encoding);
}
-
+
private void setEncoding (String encoding)
{
- if (encoding == null && out instanceof OutputStreamWriter)
- encoding = ((OutputStreamWriter)out).getEncoding ();
-
- if (encoding != null) {
- encoding = encoding.toUpperCase ();
-
- // Use official encoding names where we know them,
- // avoiding the Java-only names. When using common
- // encodings where we can easily tell if characters
- // are out of range, we'll escape out-of-range
- // characters using character refs for safety.
-
- // I _think_ these are all the main synonyms for these!
- if ("UTF8".equals (encoding)) {
- encoding = "UTF-8";
- } else if ("US-ASCII".equals (encoding)
- || "ASCII".equals (encoding)) {
- dangerMask = (short) 0xff80;
- encoding = "US-ASCII";
- } else if ("ISO-8859-1".equals (encoding)
- || "8859_1".equals (encoding)
- || "ISO8859_1".equals (encoding)) {
- dangerMask = (short) 0xff00;
- encoding = "ISO-8859-1";
- } else if ("UNICODE".equals (encoding)
- || "UNICODE-BIG".equals (encoding)
- || "UNICODE-LITTLE".equals (encoding)) {
- encoding = "UTF-16";
-
- // TODO: UTF-16BE, UTF-16LE ... no BOM; what
- // release of JDK supports those Unicode names?
- }
-
- if (dangerMask != 0)
- stringBuf = new CPStringBuilder ();
- }
-
- this.encoding = encoding;
+ if (encoding == null && out instanceof OutputStreamWriter)
+ encoding = ((OutputStreamWriter)out).getEncoding ();
+
+ if (encoding != null) {
+ encoding = encoding.toUpperCase ();
+
+ // Use official encoding names where we know them,
+ // avoiding the Java-only names. When using common
+ // encodings where we can easily tell if characters
+ // are out of range, we'll escape out-of-range
+ // characters using character refs for safety.
+
+ // I _think_ these are all the main synonyms for these!
+ if ("UTF8".equals (encoding)) {
+ encoding = "UTF-8";
+ } else if ("US-ASCII".equals (encoding)
+ || "ASCII".equals (encoding)) {
+ dangerMask = (short) 0xff80;
+ encoding = "US-ASCII";
+ } else if ("ISO-8859-1".equals (encoding)
+ || "8859_1".equals (encoding)
+ || "ISO8859_1".equals (encoding)) {
+ dangerMask = (short) 0xff00;
+ encoding = "ISO-8859-1";
+ } else if ("UNICODE".equals (encoding)
+ || "UNICODE-BIG".equals (encoding)
+ || "UNICODE-LITTLE".equals (encoding)) {
+ encoding = "UTF-16";
+
+ // TODO: UTF-16BE, UTF-16LE ... no BOM; what
+ // release of JDK supports those Unicode names?
+ }
+
+ if (dangerMask != 0)
+ stringBuf = new CPStringBuilder ();
+ }
+
+ this.encoding = encoding;
}
@@ -303,37 +303,37 @@ public class XMLWriter
*
* @param writer XML text is written to this writer.
* @param encoding if non-null, and an XML declaration is written,
- * this is the name that will be used for the character encoding.
+ * this is the name that will be used for the character encoding.
*
* @exception IllegalStateException if the current
- * document hasn't yet ended (with {@link #endDocument})
+ * document hasn't yet ended (with {@link #endDocument})
*/
final public void setWriter (Writer writer, String encoding)
{
- if (out != null)
- throw new IllegalStateException (
- "can't change stream in mid course");
- out = writer;
- if (out != null)
- setEncoding (encoding);
- if (!(out instanceof BufferedWriter))
- out = new BufferedWriter (out);
- space.push ("default");
+ if (out != null)
+ throw new IllegalStateException (
+ "can't change stream in mid course");
+ out = writer;
+ if (out != null)
+ setEncoding (encoding);
+ if (!(out instanceof BufferedWriter))
+ out = new BufferedWriter (out);
+ space.push ("default");
}
/**
* Assigns the line ending style to be used on output.
* @param eolString null to use the system default; else
- * "\n", "\r", or "\r\n".
+ * "\n", "\r", or "\r\n".
*/
final public void setEOL (String eolString)
{
- if (eolString == null)
- eol = sysEOL;
- else if (!isLineEnd (eolString))
- eol = eolString;
- else
- throw new IllegalArgumentException (eolString);
+ if (eolString == null)
+ eol = sysEOL;
+ else if (!isLineEnd (eolString))
+ eol = eolString;
+ else
+ throw new IllegalArgumentException (eolString);
}
/**
@@ -342,7 +342,7 @@ public class XMLWriter
*/
public void setErrorHandler (ErrorHandler handler)
{
- errHandler = handler;
+ errHandler = handler;
}
/**
@@ -355,15 +355,15 @@ public class XMLWriter
protected void fatal (String message, Exception e)
throws SAXException
{
- SAXParseException x;
-
- if (locator == null)
- x = new SAXParseException (message, null, null, -1, -1, e);
- else
- x = new SAXParseException (message, locator, e);
- if (errHandler != null)
- errHandler.fatalError (x);
- throw x;
+ SAXParseException x;
+
+ if (locator == null)
+ x = new SAXParseException (message, null, null, -1, -1, e);
+ else
+ x = new SAXParseException (message, locator, e);
+ if (errHandler != null)
+ errHandler.fatalError (x);
+ throw x;
}
@@ -385,47 +385,47 @@ public class XMLWriter
* be careful of in all cases, as described in the appendix referenced
* above, include: <ul>
*
- * <li> Element and attribute names must be in lower case, both
- * in the document and in any CSS style sheet.
- * <li> All XML constructs must be valid as defined by the XHTML
- * "transitional" DTD (including all familiar constructs,
- * even deprecated ones).
- * <li> The root element must be "html".
- * <li> Elements that must be empty (such as <em>&lt;br&gt;</em>
- * must have no content.
- * <li> Use both <em>lang</em> and <em>xml:lang</em> attributes
- * when specifying language.
- * <li> Similarly, use both <em>id</em> and <em>name</em> attributes
- * when defining elements that may be referred to through
- * URI fragment identifiers ... and make sure that the
- * value is a legal NMTOKEN, since not all such HTML 4.0
- * identifiers are valid in XML.
- * <li> Be careful with character encodings; make sure you provide
- * a <em>&lt;meta http-equiv="Content-type"
- * content="text/xml;charset=..." /&gt;</em> element in
- * the HTML "head" element, naming the same encoding
- * used to create this handler. Also, if that encoding
- * is anything other than US-ASCII, make sure that if
- * the document is given a MIME content type, it has
- * a <em>charset=...</em> attribute with that encoding.
- * </ul>
+ * <li> Element and attribute names must be in lower case, both
+ * in the document and in any CSS style sheet.
+ * <li> All XML constructs must be valid as defined by the XHTML
+ * "transitional" DTD (including all familiar constructs,
+ * even deprecated ones).
+ * <li> The root element must be "html".
+ * <li> Elements that must be empty (such as <em>&lt;br&gt;</em>
+ * must have no content.
+ * <li> Use both <em>lang</em> and <em>xml:lang</em> attributes
+ * when specifying language.
+ * <li> Similarly, use both <em>id</em> and <em>name</em> attributes
+ * when defining elements that may be referred to through
+ * URI fragment identifiers ... and make sure that the
+ * value is a legal NMTOKEN, since not all such HTML 4.0
+ * identifiers are valid in XML.
+ * <li> Be careful with character encodings; make sure you provide
+ * a <em>&lt;meta http-equiv="Content-type"
+ * content="text/xml;charset=..." /&gt;</em> element in
+ * the HTML "head" element, naming the same encoding
+ * used to create this handler. Also, if that encoding
+ * is anything other than US-ASCII, make sure that if
+ * the document is given a MIME content type, it has
+ * a <em>charset=...</em> attribute with that encoding.
+ * </ul>
*
* <p> Additionally, some of the oldest browsers have additional
* quirks, to address with guidelines such as: <ul>
*
- * <li> Processing instructions may be rendered, so avoid them.
- * (Similarly for an XML declaration.)
- * <li> Embedded style sheets and scripts should not contain XML
- * markup delimiters: &amp;, &lt;, and ]]&gt; are trouble.
- * <li> Attribute values should not have line breaks or multiple
- * consecutive white space characters.
- * <li> Use no more than one of the deprecated (transitional)
- * <em>&lt;isindex&gt;</em> elements.
- * <li> Some boolean attributes (such as <em>compact, checked,
- * disabled, readonly, selected,</em> and more) confuse
- * some browsers, since they only understand minimized
- * versions which are illegal in XML.
- * </ul>
+ * <li> Processing instructions may be rendered, so avoid them.
+ * (Similarly for an XML declaration.)
+ * <li> Embedded style sheets and scripts should not contain XML
+ * markup delimiters: &amp;, &lt;, and ]]&gt; are trouble.
+ * <li> Attribute values should not have line breaks or multiple
+ * consecutive white space characters.
+ * <li> Use no more than one of the deprecated (transitional)
+ * <em>&lt;isindex&gt;</em> elements.
+ * <li> Some boolean attributes (such as <em>compact, checked,
+ * disabled, readonly, selected,</em> and more) confuse
+ * some browsers, since they only understand minimized
+ * versions which are illegal in XML.
+ * </ul>
*
* <p> Also, some characteristics of the resulting output may be
* a function of whether the document is later given a MIME
@@ -437,11 +437,11 @@ public class XMLWriter
*/
final public void setXhtml (boolean value)
{
- if (locator != null)
- throw new IllegalStateException ("started parsing");
- xhtml = value;
- if (xhtml)
- canonical = false;
+ if (locator != null)
+ throw new IllegalStateException ("started parsing");
+ xhtml = value;
+ if (xhtml)
+ canonical = false;
}
/**
@@ -452,7 +452,7 @@ public class XMLWriter
*/
final public boolean isXhtml ()
{
- return xhtml;
+ return xhtml;
}
/**
@@ -462,11 +462,11 @@ public class XMLWriter
*/
final public void setExpandingEntities (boolean value)
{
- if (locator != null)
- throw new IllegalStateException ("started parsing");
- expandingEntities = value;
- if (!expandingEntities)
- canonical = false;
+ if (locator != null)
+ throw new IllegalStateException ("started parsing");
+ expandingEntities = value;
+ if (!expandingEntities)
+ canonical = false;
}
/**
@@ -475,7 +475,7 @@ public class XMLWriter
*/
final public boolean isExpandingEntities ()
{
- return expandingEntities;
+ return expandingEntities;
}
/**
@@ -499,15 +499,15 @@ public class XMLWriter
* to make this be more strictly formatted!
*
* @exception IllegalStateException thrown if this method is invoked
- * after output has begun.
+ * after output has begun.
*/
final public void setPrettyPrinting (boolean value)
{
- if (locator != null)
- throw new IllegalStateException ("started parsing");
- prettyPrinting = value;
- if (prettyPrinting)
- canonical = false;
+ if (locator != null)
+ throw new IllegalStateException ("started parsing");
+ prettyPrinting = value;
+ if (prettyPrinting)
+ canonical = false;
}
/**
@@ -515,7 +515,7 @@ public class XMLWriter
*/
final public boolean isPrettyPrinting ()
{
- return prettyPrinting;
+ return prettyPrinting;
}
@@ -524,31 +524,31 @@ public class XMLWriter
* meet requirements that are slightly more stringent than the
* basic well-formedness ones, and include: <ul>
*
- * <li> Namespace prefixes must not have been changed from those
- * in the original document. (This may only be ensured by setting
- * the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
- * by default, it is cleared.)
+ * <li> Namespace prefixes must not have been changed from those
+ * in the original document. (This may only be ensured by setting
+ * the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
+ * by default, it is cleared.)
*
- * <li> Redundant namespace declaration attributes have been
- * removed. (If an ancestor element defines a namespace prefix
- * and that declaration hasn't been overriden, an element must
- * not redeclare it.)
+ * <li> Redundant namespace declaration attributes have been
+ * removed. (If an ancestor element defines a namespace prefix
+ * and that declaration hasn't been overriden, an element must
+ * not redeclare it.)
*
- * <li> If comments are not to be included in the canonical output,
- * they must first be removed from the input event stream; this
- * <em>Canonical XML with comments</em> by default.
+ * <li> If comments are not to be included in the canonical output,
+ * they must first be removed from the input event stream; this
+ * <em>Canonical XML with comments</em> by default.
*
- * <li> If the input character encoding was not UCS-based, the
- * character data must have been normalized using Unicode
- * Normalization Form C. (UTF-8 and UTF-16 are UCS-based.)
+ * <li> If the input character encoding was not UCS-based, the
+ * character data must have been normalized using Unicode
+ * Normalization Form C. (UTF-8 and UTF-16 are UCS-based.)
*
- * <li> Attribute values must have been normalized, as is done
- * by any conformant XML processor which processes all external
- * parameter entities.
+ * <li> Attribute values must have been normalized, as is done
+ * by any conformant XML processor which processes all external
+ * parameter entities.
*
- * <li> Similarly, attribute value defaulting has been performed.
+ * <li> Similarly, attribute value defaulting has been performed.
*
- * </ul>
+ * </ul>
*
* <p> Note that fragments of XML documents, as specified by an XPath
* node set, may be canonicalized. In such cases, elements may need
@@ -556,18 +556,18 @@ public class XMLWriter
* context).
*
* @exception IllegalArgumentException if the output encoding
- * is anything other than UTF-8.
+ * is anything other than UTF-8.
*/
final public void setCanonical (boolean value)
{
- if (value && !"UTF-8".equals (encoding))
- throw new IllegalArgumentException ("encoding != UTF-8");
- canonical = value;
- if (canonical) {
- prettyPrinting = xhtml = false;
- expandingEntities = true;
- eol = "\n";
- }
+ if (value && !"UTF-8".equals (encoding))
+ throw new IllegalArgumentException ("encoding != UTF-8");
+ canonical = value;
+ if (canonical) {
+ prettyPrinting = xhtml = false;
+ expandingEntities = true;
+ eol = "\n";
+ }
}
@@ -576,7 +576,7 @@ public class XMLWriter
*/
final public boolean isCanonical ()
{
- return canonical;
+ return canonical;
}
@@ -588,8 +588,8 @@ public class XMLWriter
final public void flush ()
throws IOException
{
- if (out != null)
- out.flush ();
+ if (out != null)
+ out.flush ();
}
@@ -597,7 +597,7 @@ public class XMLWriter
// FIXME: probably want a subclass that holds a lot of these...
// and maybe more!
-
+
/**
* Writes the string as if characters() had been called on the contents
* of the string. This is particularly useful when applications act as
@@ -606,8 +606,8 @@ public class XMLWriter
final public void write (String data)
throws SAXException
{
- char buf [] = data.toCharArray ();
- characters (buf, 0, buf.length);
+ char buf [] = data.toCharArray ();
+ characters (buf, 0, buf.length);
}
@@ -617,21 +617,21 @@ public class XMLWriter
* @see #startElement
*/
public void writeElement (
- String uri,
- String localName,
- String qName,
- Attributes atts,
- String content
+ String uri,
+ String localName,
+ String qName,
+ Attributes atts,
+ String content
) throws SAXException
{
- if (content == null || content.length () == 0) {
- writeEmptyElement (uri, localName, qName, atts);
- return;
- }
- startElement (uri, localName, qName, atts);
- char chars [] = content.toCharArray ();
- characters (chars, 0, chars.length);
- endElement (uri, localName, qName);
+ if (content == null || content.length () == 0) {
+ writeEmptyElement (uri, localName, qName, atts);
+ return;
+ }
+ startElement (uri, localName, qName, atts);
+ char chars [] = content.toCharArray ();
+ characters (chars, 0, chars.length);
+ endElement (uri, localName, qName);
}
@@ -642,14 +642,14 @@ public class XMLWriter
* @see #startElement
*/
public void writeElement (
- String uri,
- String localName,
- String qName,
- Attributes atts,
- int content
+ String uri,
+ String localName,
+ String qName,
+ Attributes atts,
+ int content
) throws SAXException
{
- writeElement (uri, localName, qName, atts, Integer.toString (content));
+ writeElement (uri, localName, qName, atts, Integer.toString (content));
}
@@ -657,7 +657,7 @@ public class XMLWriter
/** <b>SAX1</b>: provides parser status information */
final public void setDocumentLocator (Locator l)
{
- locator = l;
+ locator = l;
}
@@ -665,7 +665,7 @@ public class XMLWriter
private static final String xhtmlFullDTD =
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
-
+
/**
* <b>SAX1</b>: indicates the beginning of a document parse.
* If you're writing (well formed) fragments of XML, neither
@@ -675,60 +675,60 @@ public class XMLWriter
public void startDocument ()
throws SAXException
{
- try {
- if (out == null)
- throw new IllegalStateException (
- "null Writer given to XMLWriter");
-
- // Not all parsers provide the locator we want; this also
- // flags whether events are being sent to this object yet.
- // We could only have this one call if we only printed whole
- // documents ... but we also print fragments, so most of the
- // callbacks here replicate this test.
-
- if (locator == null)
- locator = new LocatorImpl ();
-
- // Unless the data is in US-ASCII or we're canonicalizing, write
- // the XML declaration if we know the encoding. US-ASCII won't
- // normally get mangled by web server confusion about the
- // character encodings used. Plus, it's an easy way to
- // ensure we can write ASCII that's unlikely to confuse
- // elderly HTML parsers.
-
- if (!canonical
- && dangerMask != (short) 0xff80
- && encoding != null) {
- rawWrite ("<?xml version='1.0'");
- rawWrite (" encoding='" + encoding + "'");
- rawWrite ("?>");
- newline ();
- }
-
- if (xhtml) {
-
- rawWrite ("<!DOCTYPE html PUBLIC");
- newline ();
- rawWrite (" '-//W3C//DTD XHTML 1.0 Transitional//EN'");
- newline ();
- rawWrite (" '");
- // NOTE: URL (above) matches the REC
- rawWrite (xhtmlFullDTD);
- rawWrite ("'>");
- newline ();
- newline ();
-
- // fake the rest of the handler into ignoring
- // everything until the root element, so any
- // XHTML DTD comments, PIs, etc are ignored
- startedDoctype = true;
- }
-
- entityNestLevel = 0;
-
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ try {
+ if (out == null)
+ throw new IllegalStateException (
+ "null Writer given to XMLWriter");
+
+ // Not all parsers provide the locator we want; this also
+ // flags whether events are being sent to this object yet.
+ // We could only have this one call if we only printed whole
+ // documents ... but we also print fragments, so most of the
+ // callbacks here replicate this test.
+
+ if (locator == null)
+ locator = new LocatorImpl ();
+
+ // Unless the data is in US-ASCII or we're canonicalizing, write
+ // the XML declaration if we know the encoding. US-ASCII won't
+ // normally get mangled by web server confusion about the
+ // character encodings used. Plus, it's an easy way to
+ // ensure we can write ASCII that's unlikely to confuse
+ // elderly HTML parsers.
+
+ if (!canonical
+ && dangerMask != (short) 0xff80
+ && encoding != null) {
+ rawWrite ("<?xml version='1.0'");
+ rawWrite (" encoding='" + encoding + "'");
+ rawWrite ("?>");
+ newline ();
+ }
+
+ if (xhtml) {
+
+ rawWrite ("<!DOCTYPE html PUBLIC");
+ newline ();
+ rawWrite (" '-//W3C//DTD XHTML 1.0 Transitional//EN'");
+ newline ();
+ rawWrite (" '");
+ // NOTE: URL (above) matches the REC
+ rawWrite (xhtmlFullDTD);
+ rawWrite ("'>");
+ newline ();
+ newline ();
+
+ // fake the rest of the handler into ignoring
+ // everything until the root element, so any
+ // XHTML DTD comments, PIs, etc are ignored
+ startedDoctype = true;
+ }
+
+ entityNestLevel = 0;
+
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/**
@@ -740,164 +740,164 @@ public class XMLWriter
public void endDocument ()
throws SAXException
{
- try {
- if (!canonical) {
- newline ();
- newline ();
- }
- out.close ();
- out = null;
- locator = null;
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ try {
+ if (!canonical) {
+ newline ();
+ newline ();
+ }
+ out.close ();
+ out = null;
+ locator = null;
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
// XHTML elements declared as EMPTY print differently
final private static boolean isEmptyElementTag (String tag)
{
- switch (tag.charAt (0)) {
- case 'a': return "area".equals (tag);
- case 'b': return "base".equals (tag)
- || "basefont".equals (tag)
- || "br".equals (tag);
- case 'c': return "col".equals (tag);
- case 'f': return "frame".equals (tag);
- case 'h': return "hr".equals (tag);
- case 'i': return "img".equals (tag)
- || "input".equals (tag)
- || "isindex".equals (tag);
- case 'l': return "link".equals (tag);
- case 'm': return "meta".equals (tag);
- case 'p': return "param".equals (tag);
- }
- return false;
+ switch (tag.charAt (0)) {
+ case 'a': return "area".equals (tag);
+ case 'b': return "base".equals (tag)
+ || "basefont".equals (tag)
+ || "br".equals (tag);
+ case 'c': return "col".equals (tag);
+ case 'f': return "frame".equals (tag);
+ case 'h': return "hr".equals (tag);
+ case 'i': return "img".equals (tag)
+ || "input".equals (tag)
+ || "isindex".equals (tag);
+ case 'l': return "link".equals (tag);
+ case 'm': return "meta".equals (tag);
+ case 'p': return "param".equals (tag);
+ }
+ return false;
}
private static boolean indentBefore (String tag)
{
- // basically indent before block content
- // and within structure like tables, lists
- switch (tag.charAt (0)) {
- case 'a': return "applet".equals (tag);
- case 'b': return "body".equals (tag)
- || "blockquote".equals (tag);
- case 'c': return "center".equals (tag);
- case 'f': return "frame".equals (tag)
- || "frameset".equals (tag);
- case 'h': return "head".equals (tag);
- case 'm': return "meta".equals (tag);
- case 'o': return "object".equals (tag);
- case 'p': return "param".equals (tag)
- || "pre".equals (tag);
- case 's': return "style".equals (tag);
- case 't': return "title".equals (tag)
- || "td".equals (tag)
- || "th".equals (tag);
- }
- // ... but not inline elements like "em", "b", "font"
- return false;
+ // basically indent before block content
+ // and within structure like tables, lists
+ switch (tag.charAt (0)) {
+ case 'a': return "applet".equals (tag);
+ case 'b': return "body".equals (tag)
+ || "blockquote".equals (tag);
+ case 'c': return "center".equals (tag);
+ case 'f': return "frame".equals (tag)
+ || "frameset".equals (tag);
+ case 'h': return "head".equals (tag);
+ case 'm': return "meta".equals (tag);
+ case 'o': return "object".equals (tag);
+ case 'p': return "param".equals (tag)
+ || "pre".equals (tag);
+ case 's': return "style".equals (tag);
+ case 't': return "title".equals (tag)
+ || "td".equals (tag)
+ || "th".equals (tag);
+ }
+ // ... but not inline elements like "em", "b", "font"
+ return false;
}
private static boolean spaceBefore (String tag)
{
- // blank line AND INDENT before certain structural content
- switch (tag.charAt (0)) {
- case 'h': return "h1".equals (tag)
- || "h2".equals (tag)
- || "h3".equals (tag)
- || "h4".equals (tag)
- || "h5".equals (tag)
- || "h6".equals (tag)
- || "hr".equals (tag);
- case 'l': return "li".equals (tag);
- case 'o': return "ol".equals (tag);
- case 'p': return "p".equals (tag);
- case 't': return "table".equals (tag)
- || "tr".equals (tag);
- case 'u': return "ul".equals (tag);
- }
- return false;
+ // blank line AND INDENT before certain structural content
+ switch (tag.charAt (0)) {
+ case 'h': return "h1".equals (tag)
+ || "h2".equals (tag)
+ || "h3".equals (tag)
+ || "h4".equals (tag)
+ || "h5".equals (tag)
+ || "h6".equals (tag)
+ || "hr".equals (tag);
+ case 'l': return "li".equals (tag);
+ case 'o': return "ol".equals (tag);
+ case 'p': return "p".equals (tag);
+ case 't': return "table".equals (tag)
+ || "tr".equals (tag);
+ case 'u': return "ul".equals (tag);
+ }
+ return false;
}
// XHTML DTDs say these three have xml:space="preserve"
private static boolean spacePreserve (String tag)
{
- return "pre".equals (tag)
- || "style".equals (tag)
- || "script".equals (tag);
+ return "pre".equals (tag)
+ || "style".equals (tag)
+ || "script".equals (tag);
}
/**
* <b>SAX2</b>: ignored.
*/
final public void startPrefixMapping (String prefix, String uri)
- {}
+ {}
/**
* <b>SAX2</b>: ignored.
*/
final public void endPrefixMapping (String prefix)
- {}
+ {}
private void writeStartTag (
- String name,
- Attributes atts,
- boolean isEmpty
+ String name,
+ Attributes atts,
+ boolean isEmpty
) throws SAXException, IOException
{
- rawWrite ('<');
- rawWrite (name);
+ rawWrite ('<');
+ rawWrite (name);
- // write out attributes ... sorting is particularly useful
- // with output that's been heavily defaulted.
- if (atts != null && atts.getLength () != 0) {
+ // write out attributes ... sorting is particularly useful
+ // with output that's been heavily defaulted.
+ if (atts != null && atts.getLength () != 0) {
- // Set up to write, with optional sorting
- int indices [] = new int [atts.getLength ()];
+ // Set up to write, with optional sorting
+ int indices [] = new int [atts.getLength ()];
- for (int i= 0; i < indices.length; i++)
- indices [i] = i;
-
- // optionally sort
+ for (int i= 0; i < indices.length; i++)
+ indices [i] = i;
+
+ // optionally sort
// FIXME: canon xml demands xmlns nodes go first,
// and sorting by URI first (empty first) then localname
// it should maybe use a different sort
- if (canonical || prettyPrinting) {
-
- // insertion sort by attribute name
- for (int i = 1; i < indices.length; i++) {
- int n = indices [i], j;
- String s = atts.getQName (n);
-
- for (j = i - 1; j >= 0; j--) {
- if (s.compareTo (atts.getQName (indices [j]))
- >= 0)
- break;
- indices [j + 1] = indices [j];
- }
- indices [j + 1] = n;
- }
- }
-
- // write, sorted or no
- for (int i= 0; i < indices.length; i++) {
- String s = atts.getQName (indices [i]);
-
- if (s == null || "".equals (s))
- throw new IllegalArgumentException ("no XML name");
- rawWrite (" ");
- rawWrite (s);
- rawWrite ("=");
- writeQuotedValue (atts.getValue (indices [i]),
- CTX_ATTRIBUTE);
- }
- }
- if (isEmpty)
- rawWrite (" /");
- rawWrite ('>');
+ if (canonical || prettyPrinting) {
+
+ // insertion sort by attribute name
+ for (int i = 1; i < indices.length; i++) {
+ int n = indices [i], j;
+ String s = atts.getQName (n);
+
+ for (j = i - 1; j >= 0; j--) {
+ if (s.compareTo (atts.getQName (indices [j]))
+ >= 0)
+ break;
+ indices [j + 1] = indices [j];
+ }
+ indices [j + 1] = n;
+ }
+ }
+
+ // write, sorted or no
+ for (int i= 0; i < indices.length; i++) {
+ String s = atts.getQName (indices [i]);
+
+ if (s == null || "".equals (s))
+ throw new IllegalArgumentException ("no XML name");
+ rawWrite (" ");
+ rawWrite (s);
+ rawWrite ("=");
+ writeQuotedValue (atts.getValue (indices [i]),
+ CTX_ATTRIBUTE);
+ }
+ }
+ if (isEmpty)
+ rawWrite (" /");
+ rawWrite ('>');
}
/**
@@ -907,59 +907,59 @@ public class XMLWriter
* not all user agents handle them correctly.
*/
final public void startElement (
- String uri,
- String localName,
- String qName,
- Attributes atts
+ String uri,
+ String localName,
+ String qName,
+ Attributes atts
) throws SAXException
{
- startedDoctype = false;
-
- if (locator == null)
- locator = new LocatorImpl ();
-
- if (qName == null || "".equals (qName))
- throw new IllegalArgumentException ("no XML name");
-
- try {
- if (entityNestLevel != 0)
- return;
- if (prettyPrinting) {
- String whitespace = null;
-
- if (xhtml && spacePreserve (qName))
- whitespace = "preserve";
- else if (atts != null)
- whitespace = atts.getValue ("xml:space");
- if (whitespace == null)
- whitespace = (String) space.peek ();
- space.push (whitespace);
-
- if ("default".equals (whitespace)) {
- if (xhtml) {
- if (spaceBefore (qName)) {
- newline ();
- doIndent ();
- } else if (indentBefore (qName))
- doIndent ();
- // else it's inlined, modulo line length
- // FIXME: incrementing element nest level
- // for inlined elements causes ugliness
- } else
- doIndent ();
- }
- }
- elementNestLevel++;
- writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));
-
- if (xhtml) {
+ startedDoctype = false;
+
+ if (locator == null)
+ locator = new LocatorImpl ();
+
+ if (qName == null || "".equals (qName))
+ throw new IllegalArgumentException ("no XML name");
+
+ try {
+ if (entityNestLevel != 0)
+ return;
+ if (prettyPrinting) {
+ String whitespace = null;
+
+ if (xhtml && spacePreserve (qName))
+ whitespace = "preserve";
+ else if (atts != null)
+ whitespace = atts.getValue ("xml:space");
+ if (whitespace == null)
+ whitespace = (String) space.peek ();
+ space.push (whitespace);
+
+ if ("default".equals (whitespace)) {
+ if (xhtml) {
+ if (spaceBefore (qName)) {
+ newline ();
+ doIndent ();
+ } else if (indentBefore (qName))
+ doIndent ();
+ // else it's inlined, modulo line length
+ // FIXME: incrementing element nest level
+ // for inlined elements causes ugliness
+ } else
+ doIndent ();
+ }
+ }
+ elementNestLevel++;
+ writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));
+
+ if (xhtml) {
// FIXME: if this is an XHTML "pre" element, turn
// off automatic wrapping.
- }
+ }
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/**
@@ -967,22 +967,22 @@ public class XMLWriter
* @see #startElement
*/
public void writeEmptyElement (
- String uri,
- String localName,
- String qName,
- Attributes atts
+ String uri,
+ String localName,
+ String qName,
+ Attributes atts
) throws SAXException
{
- if (canonical) {
- startElement (uri, localName, qName, atts);
- endElement (uri, localName, qName);
- } else {
- try {
- writeStartTag (qName, atts, true);
- } catch (IOException e) {
- fatal ("can't write", e);
- }
- }
+ if (canonical) {
+ startElement (uri, localName, qName, atts);
+ endElement (uri, localName, qName);
+ } else {
+ try {
+ writeStartTag (qName, atts, true);
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
+ }
}
@@ -990,68 +990,68 @@ public class XMLWriter
final public void endElement (String uri, String localName, String qName)
throws SAXException
{
- if (qName == null || "".equals (qName))
- throw new IllegalArgumentException ("no XML name");
-
- try {
- elementNestLevel--;
- if (entityNestLevel != 0)
- return;
- if (xhtml && isEmptyElementTag (qName))
- return;
- rawWrite ("</");
- rawWrite (qName);
- rawWrite ('>');
-
- if (prettyPrinting) {
- if (!space.empty ())
- space.pop ();
- else
- fatal ("stack discipline", null);
- }
- if (elementNestLevel == 0)
- inEpilogue = true;
-
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (qName == null || "".equals (qName))
+ throw new IllegalArgumentException ("no XML name");
+
+ try {
+ elementNestLevel--;
+ if (entityNestLevel != 0)
+ return;
+ if (xhtml && isEmptyElementTag (qName))
+ return;
+ rawWrite ("</");
+ rawWrite (qName);
+ rawWrite ('>');
+
+ if (prettyPrinting) {
+ if (!space.empty ())
+ space.pop ();
+ else
+ fatal ("stack discipline", null);
+ }
+ if (elementNestLevel == 0)
+ inEpilogue = true;
+
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX1</b>: reports content characters */
final public void characters (char ch [], int start, int length)
throws SAXException
{
- if (locator == null)
- locator = new LocatorImpl ();
-
- try {
- if (entityNestLevel != 0)
- return;
- if (inCDATA) {
- escapeChars (ch, start, length, CTX_UNPARSED);
- } else {
- escapeChars (ch, start, length, CTX_CONTENT);
- }
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (locator == null)
+ locator = new LocatorImpl ();
+
+ try {
+ if (entityNestLevel != 0)
+ return;
+ if (inCDATA) {
+ escapeChars (ch, start, length, CTX_UNPARSED);
+ } else {
+ escapeChars (ch, start, length, CTX_CONTENT);
+ }
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX1</b>: reports ignorable whitespace */
final public void ignorableWhitespace (char ch [], int start, int length)
throws SAXException
{
- if (locator == null)
- locator = new LocatorImpl ();
-
- try {
- if (entityNestLevel != 0)
- return;
- // don't forget to map NL to CRLF, CR, etc
- escapeChars (ch, start, length, CTX_CONTENT);
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (locator == null)
+ locator = new LocatorImpl ();
+
+ try {
+ if (entityNestLevel != 0)
+ return;
+ // don't forget to map NL to CRLF, CR, etc
+ escapeChars (ch, start, length, CTX_CONTENT);
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/**
@@ -1063,44 +1063,44 @@ public class XMLWriter
final public void processingInstruction (String target, String data)
throws SAXException
{
- if (locator == null)
- locator = new LocatorImpl ();
-
- // don't print internal subset for XHTML
- if (xhtml && startedDoctype)
- return;
-
- // ancient HTML browsers might render these ... their loss.
- // to prevent: "if (xhtml) return;".
-
- try {
- if (entityNestLevel != 0)
- return;
- if (canonical && inEpilogue)
- newline ();
- rawWrite ("<?");
- rawWrite (target);
- rawWrite (' ');
- escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
- rawWrite ("?>");
- if (elementNestLevel == 0 && !(canonical && inEpilogue))
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (locator == null)
+ locator = new LocatorImpl ();
+
+ // don't print internal subset for XHTML
+ if (xhtml && startedDoctype)
+ return;
+
+ // ancient HTML browsers might render these ... their loss.
+ // to prevent: "if (xhtml) return;".
+
+ try {
+ if (entityNestLevel != 0)
+ return;
+ if (canonical && inEpilogue)
+ newline ();
+ rawWrite ("<?");
+ rawWrite (target);
+ rawWrite (' ');
+ escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
+ rawWrite ("?>");
+ if (elementNestLevel == 0 && !(canonical && inEpilogue))
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX1</b>: indicates a non-expanded entity reference */
public void skippedEntity (String name)
throws SAXException
{
- try {
- rawWrite ("&");
- rawWrite (name);
- rawWrite (";");
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ try {
+ rawWrite ("&");
+ rawWrite (name);
+ rawWrite (";");
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
// SAX2 LexicalHandler
@@ -1109,35 +1109,35 @@ public class XMLWriter
final public void startCDATA ()
throws SAXException
{
- if (locator == null)
- locator = new LocatorImpl ();
-
- if (canonical)
- return;
-
- try {
- inCDATA = true;
- if (entityNestLevel == 0)
- rawWrite ("<![CDATA[");
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (locator == null)
+ locator = new LocatorImpl ();
+
+ if (canonical)
+ return;
+
+ try {
+ inCDATA = true;
+ if (entityNestLevel == 0)
+ rawWrite ("<![CDATA[");
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX2</b>: called after parsing CDATA characters */
final public void endCDATA ()
throws SAXException
{
- if (canonical)
- return;
-
- try {
- inCDATA = false;
- if (entityNestLevel == 0)
- rawWrite ("]]>");
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (canonical)
+ return;
+
+ try {
+ inCDATA = false;
+ if (entityNestLevel == 0)
+ rawWrite ("]]>");
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/**
@@ -1148,45 +1148,45 @@ public class XMLWriter
final public void startDTD (String name, String publicId, String systemId)
throws SAXException
{
- if (locator == null)
- locator = new LocatorImpl ();
- if (xhtml)
- return;
- try {
- inDoctype = startedDoctype = true;
- if (canonical)
- return;
- rawWrite ("<!DOCTYPE ");
- rawWrite (name);
- rawWrite (' ');
-
- if (!expandingEntities) {
- if (publicId != null)
- rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
- else if (systemId != null)
- rawWrite ("SYSTEM '" + systemId + "' ");
- }
-
- rawWrite ('[');
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (locator == null)
+ locator = new LocatorImpl ();
+ if (xhtml)
+ return;
+ try {
+ inDoctype = startedDoctype = true;
+ if (canonical)
+ return;
+ rawWrite ("<!DOCTYPE ");
+ rawWrite (name);
+ rawWrite (' ');
+
+ if (!expandingEntities) {
+ if (publicId != null)
+ rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
+ else if (systemId != null)
+ rawWrite ("SYSTEM '" + systemId + "' ");
+ }
+
+ rawWrite ('[');
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX2</b>: called after the doctype is parsed */
final public void endDTD ()
throws SAXException
{
- inDoctype = false;
- if (canonical || xhtml)
- return;
- try {
- rawWrite ("]>");
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ inDoctype = false;
+ if (canonical || xhtml)
+ return;
+ try {
+ rawWrite ("]>");
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/**
@@ -1195,30 +1195,30 @@ public class XMLWriter
final public void startEntity (String name)
throws SAXException
{
- try {
- boolean writeEOL = true;
-
- // Predefined XHTML entities (for characters) will get
- // mapped back later.
- if (xhtml || expandingEntities)
- return;
-
- entityNestLevel++;
- if (name.equals ("[dtd]"))
- return;
- if (entityNestLevel != 1)
- return;
- if (!name.startsWith ("%")) {
- writeEOL = false;
- rawWrite ('&');
- }
- rawWrite (name);
- rawWrite (';');
- if (writeEOL)
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ try {
+ boolean writeEOL = true;
+
+ // Predefined XHTML entities (for characters) will get
+ // mapped back later.
+ if (xhtml || expandingEntities)
+ return;
+
+ entityNestLevel++;
+ if (name.equals ("[dtd]"))
+ return;
+ if (entityNestLevel != 1)
+ return;
+ if (!name.startsWith ("%")) {
+ writeEOL = false;
+ rawWrite ('&');
+ }
+ rawWrite (name);
+ rawWrite (';');
+ if (writeEOL)
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/**
@@ -1227,9 +1227,9 @@ public class XMLWriter
final public void endEntity (String name)
throws SAXException
{
- if (xhtml || expandingEntities)
- return;
- entityNestLevel--;
+ if (xhtml || expandingEntities)
+ return;
+ entityNestLevel--;
}
/**
@@ -1243,294 +1243,294 @@ public class XMLWriter
final public void comment (char ch [], int start, int length)
throws SAXException
{
- if (locator == null)
- locator = new LocatorImpl ();
-
- // don't print internal subset for XHTML
- if (xhtml && startedDoctype)
- return;
- // don't print comment in doctype for canon xml
- if (canonical && inDoctype)
- return;
-
- try {
- boolean indent;
-
- if (prettyPrinting && space.empty ())
- fatal ("stack discipline", null);
- indent = prettyPrinting && "default".equals (space.peek ());
- if (entityNestLevel != 0)
- return;
- if (indent)
- doIndent ();
- if (canonical && inEpilogue)
- newline ();
- rawWrite ("<!--");
- escapeChars (ch, start, length, CTX_UNPARSED);
- rawWrite ("-->");
- if (indent)
- doIndent ();
- if (elementNestLevel == 0 && !(canonical && inEpilogue))
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (locator == null)
+ locator = new LocatorImpl ();
+
+ // don't print internal subset for XHTML
+ if (xhtml && startedDoctype)
+ return;
+ // don't print comment in doctype for canon xml
+ if (canonical && inDoctype)
+ return;
+
+ try {
+ boolean indent;
+
+ if (prettyPrinting && space.empty ())
+ fatal ("stack discipline", null);
+ indent = prettyPrinting && "default".equals (space.peek ());
+ if (entityNestLevel != 0)
+ return;
+ if (indent)
+ doIndent ();
+ if (canonical && inEpilogue)
+ newline ();
+ rawWrite ("<!--");
+ escapeChars (ch, start, length, CTX_UNPARSED);
+ rawWrite ("-->");
+ if (indent)
+ doIndent ();
+ if (elementNestLevel == 0 && !(canonical && inEpilogue))
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
// SAX1 DTDHandler
/** <b>SAX1</b>: called on notation declarations */
final public void notationDecl (String name,
- String publicId, String systemId)
+ String publicId, String systemId)
throws SAXException
{
- if (xhtml)
- return;
- try {
- // At this time, only SAX2 callbacks start these.
- if (!startedDoctype)
- return;
-
- if (entityNestLevel != 0)
- return;
- rawWrite ("<!NOTATION " + name + " ");
- if (publicId != null)
- rawWrite ("PUBLIC \"" + publicId + '"');
- else
- rawWrite ("SYSTEM ");
- if (systemId != null)
- rawWrite ('"' + systemId + '"');
- rawWrite (">");
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (xhtml)
+ return;
+ try {
+ // At this time, only SAX2 callbacks start these.
+ if (!startedDoctype)
+ return;
+
+ if (entityNestLevel != 0)
+ return;
+ rawWrite ("<!NOTATION " + name + " ");
+ if (publicId != null)
+ rawWrite ("PUBLIC \"" + publicId + '"');
+ else
+ rawWrite ("SYSTEM ");
+ if (systemId != null)
+ rawWrite ('"' + systemId + '"');
+ rawWrite (">");
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX1</b>: called on unparsed entity declarations */
final public void unparsedEntityDecl (String name,
- String publicId, String systemId,
- String notationName)
+ String publicId, String systemId,
+ String notationName)
throws SAXException
{
- if (xhtml)
- return;
- try {
- // At this time, only SAX2 callbacks start these.
- if (!startedDoctype) {
- // FIXME: write to temporary buffer, and make the start
- // of the root element write these declarations.
- return;
- }
-
- if (entityNestLevel != 0)
- return;
- rawWrite ("<!ENTITY " + name + " ");
- if (publicId != null)
- rawWrite ("PUBLIC \"" + publicId + '"');
- else
- rawWrite ("SYSTEM ");
- rawWrite ('"' + systemId + '"');
- rawWrite (" NDATA " + notationName + ">");
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (xhtml)
+ return;
+ try {
+ // At this time, only SAX2 callbacks start these.
+ if (!startedDoctype) {
+ // FIXME: write to temporary buffer, and make the start
+ // of the root element write these declarations.
+ return;
+ }
+
+ if (entityNestLevel != 0)
+ return;
+ rawWrite ("<!ENTITY " + name + " ");
+ if (publicId != null)
+ rawWrite ("PUBLIC \"" + publicId + '"');
+ else
+ rawWrite ("SYSTEM ");
+ rawWrite ('"' + systemId + '"');
+ rawWrite (" NDATA " + notationName + ">");
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
// SAX2 DeclHandler
/** <b>SAX2</b>: called on attribute declarations */
final public void attributeDecl (String eName, String aName,
- String type, String mode, String value)
+ String type, String mode, String value)
throws SAXException
{
- if (xhtml)
- return;
- try {
- // At this time, only SAX2 callbacks start these.
- if (!startedDoctype)
- return;
- if (entityNestLevel != 0)
- return;
- rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
- rawWrite (type);
- rawWrite (' ');
- if (mode != null)
- rawWrite (mode + ' ');
- if (value != null)
- writeQuotedValue (value, CTX_ATTRIBUTE);
- rawWrite ('>');
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (xhtml)
+ return;
+ try {
+ // At this time, only SAX2 callbacks start these.
+ if (!startedDoctype)
+ return;
+ if (entityNestLevel != 0)
+ return;
+ rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
+ rawWrite (type);
+ rawWrite (' ');
+ if (mode != null)
+ rawWrite (mode + ' ');
+ if (value != null)
+ writeQuotedValue (value, CTX_ATTRIBUTE);
+ rawWrite ('>');
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX2</b>: called on element declarations */
final public void elementDecl (String name, String model)
throws SAXException
{
- if (xhtml)
- return;
- try {
- // At this time, only SAX2 callbacks start these.
- if (!startedDoctype)
- return;
- if (entityNestLevel != 0)
- return;
- rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (xhtml)
+ return;
+ try {
+ // At this time, only SAX2 callbacks start these.
+ if (!startedDoctype)
+ return;
+ if (entityNestLevel != 0)
+ return;
+ rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX2</b>: called on external entity declarations */
final public void externalEntityDecl (
- String name,
- String publicId,
- String systemId)
+ String name,
+ String publicId,
+ String systemId)
throws SAXException
{
- if (xhtml)
- return;
- try {
- // At this time, only SAX2 callbacks start these.
- if (!startedDoctype)
- return;
- if (entityNestLevel != 0)
- return;
- rawWrite ("<!ENTITY ");
- if (name.startsWith ("%")) {
- rawWrite ("% ");
- rawWrite (name.substring (1));
- } else
- rawWrite (name);
- if (publicId != null)
- rawWrite (" PUBLIC \"" + publicId + '"');
- else
- rawWrite (" SYSTEM ");
- rawWrite ('"' + systemId + "\">");
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (xhtml)
+ return;
+ try {
+ // At this time, only SAX2 callbacks start these.
+ if (!startedDoctype)
+ return;
+ if (entityNestLevel != 0)
+ return;
+ rawWrite ("<!ENTITY ");
+ if (name.startsWith ("%")) {
+ rawWrite ("% ");
+ rawWrite (name.substring (1));
+ } else
+ rawWrite (name);
+ if (publicId != null)
+ rawWrite (" PUBLIC \"" + publicId + '"');
+ else
+ rawWrite (" SYSTEM ");
+ rawWrite ('"' + systemId + "\">");
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
/** <b>SAX2</b>: called on internal entity declarations */
final public void internalEntityDecl (String name, String value)
throws SAXException
{
- if (xhtml)
- return;
- try {
- // At this time, only SAX2 callbacks start these.
- if (!startedDoctype)
- return;
- if (entityNestLevel != 0)
- return;
- rawWrite ("<!ENTITY ");
- if (name.startsWith ("%")) {
- rawWrite ("% ");
- rawWrite (name.substring (1));
- } else
- rawWrite (name);
- rawWrite (' ');
- writeQuotedValue (value, CTX_ENTITY);
- rawWrite ('>');
- newline ();
- } catch (IOException e) {
- fatal ("can't write", e);
- }
+ if (xhtml)
+ return;
+ try {
+ // At this time, only SAX2 callbacks start these.
+ if (!startedDoctype)
+ return;
+ if (entityNestLevel != 0)
+ return;
+ rawWrite ("<!ENTITY ");
+ if (name.startsWith ("%")) {
+ rawWrite ("% ");
+ rawWrite (name.substring (1));
+ } else
+ rawWrite (name);
+ rawWrite (' ');
+ writeQuotedValue (value, CTX_ENTITY);
+ rawWrite ('>');
+ newline ();
+ } catch (IOException e) {
+ fatal ("can't write", e);
+ }
}
private void writeQuotedValue (String value, int code)
throws SAXException, IOException
{
- char buf [] = value.toCharArray ();
- int off = 0, len = buf.length;
-
- // we can't add line breaks to attribute/entity/... values
- noWrap = true;
- rawWrite ('"');
- escapeChars (buf, off, len, code);
- rawWrite ('"');
- noWrap = false;
+ char buf [] = value.toCharArray ();
+ int off = 0, len = buf.length;
+
+ // we can't add line breaks to attribute/entity/... values
+ noWrap = true;
+ rawWrite ('"');
+ escapeChars (buf, off, len, code);
+ rawWrite ('"');
+ noWrap = false;
}
-
+
// From "HTMLlat1x.ent" ... names of entities for ISO-8859-1
// (Latin/1) characters, all codes: 160-255 (0xA0-0xFF).
// Codes 128-159 have no assigned values.
private static final String HTMLlat1x [] = {
- // 160
- "nbsp", "iexcl", "cent", "pound", "curren",
- "yen", "brvbar", "sect", "uml", "copy",
+ // 160
+ "nbsp", "iexcl", "cent", "pound", "curren",
+ "yen", "brvbar", "sect", "uml", "copy",
- // 170
- "ordf", "laquo", "not", "shy", "reg",
- "macr", "deg", "plusmn", "sup2", "sup3",
+ // 170
+ "ordf", "laquo", "not", "shy", "reg",
+ "macr", "deg", "plusmn", "sup2", "sup3",
- // 180
- "acute", "micro", "para", "middot", "cedil",
- "sup1", "ordm", "raquo", "frac14", "frac12",
+ // 180
+ "acute", "micro", "para", "middot", "cedil",
+ "sup1", "ordm", "raquo", "frac14", "frac12",
- // 190
- "frac34", "iquest", "Agrave", "Aacute", "Acirc",
- "Atilde", "Auml", "Aring", "AElig", "Ccedil",
+ // 190
+ "frac34", "iquest", "Agrave", "Aacute", "Acirc",
+ "Atilde", "Auml", "Aring", "AElig", "Ccedil",
- // 200
- "Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
- "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",
+ // 200
+ "Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
+ "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",
- // 210
- "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
- "times", "Oslash", "Ugrave", "Uacute", "Ucirc",
+ // 210
+ "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
+ "times", "Oslash", "Ugrave", "Uacute", "Ucirc",
- // 220
- "Uuml", "Yacute", "THORN", "szlig", "agrave",
- "aacute", "acirc", "atilde", "auml", "aring",
+ // 220
+ "Uuml", "Yacute", "THORN", "szlig", "agrave",
+ "aacute", "acirc", "atilde", "auml", "aring",
- // 230
- "aelig", "ccedil", "egrave", "eacute", "ecirc",
- "euml", "igrave", "iacute", "icirc", "iuml",
+ // 230
+ "aelig", "ccedil", "egrave", "eacute", "ecirc",
+ "euml", "igrave", "iacute", "icirc", "iuml",
- // 240
- "eth", "ntilde", "ograve", "oacute", "ocirc",
- "otilde", "ouml", "divide", "oslash", "ugrave",
+ // 240
+ "eth", "ntilde", "ograve", "oacute", "ocirc",
+ "otilde", "ouml", "divide", "oslash", "ugrave",
- // 250
- "uacute", "ucirc", "uuml", "yacute", "thorn",
- "yuml"
+ // 250
+ "uacute", "ucirc", "uuml", "yacute", "thorn",
+ "yuml"
};
// From "HTMLsymbolx.ent" ... some of the symbols that
// we can conveniently handle. Entities for the Greek.
// alphabet (upper and lower cases) are compact.
private static final String HTMLsymbolx_GR [] = {
- // 913
- "Alpha", "Beta", "Gamma", "Delta", "Epsilon",
- "Zeta", "Eta", "Theta", "Iota", "Kappa",
+ // 913
+ "Alpha", "Beta", "Gamma", "Delta", "Epsilon",
+ "Zeta", "Eta", "Theta", "Iota", "Kappa",
- // 923
- "Lambda", "Mu", "Nu", "Xi", "Omicron",
- "Pi", "Rho", null, "Sigma", "Tau",
+ // 923
+ "Lambda", "Mu", "Nu", "Xi", "Omicron",
+ "Pi", "Rho", null, "Sigma", "Tau",
- // 933
- "Upsilon", "Phi", "Chi", "Psi", "Omega"
+ // 933
+ "Upsilon", "Phi", "Chi", "Psi", "Omega"
};
private static final String HTMLsymbolx_gr [] = {
- // 945
- "alpha", "beta", "gamma", "delta", "epsilon",
- "zeta", "eta", "theta", "iota", "kappa",
+ // 945
+ "alpha", "beta", "gamma", "delta", "epsilon",
+ "zeta", "eta", "theta", "iota", "kappa",
- // 955
- "lambda", "mu", "nu", "xi", "omicron",
- "pi", "rho", "sigmaf", "sigma", "tau",
+ // 955
+ "lambda", "mu", "nu", "xi", "omicron",
+ "pi", "rho", "sigmaf", "sigma", "tau",
- // 965
- "upsilon", "phi", "chi", "psi", "omega"
+ // 965
+ "upsilon", "phi", "chi", "psi", "omega"
};
@@ -1539,268 +1539,268 @@ public class XMLWriter
private void escapeChars (char buf [], int off, int len, int code)
throws SAXException, IOException
{
- int first = 0;
-
- if (off < 0) {
- off = 0;
- len = buf.length;
- }
- for (int i = 0; i < len; i++) {
- String esc;
- char c = buf [off + i];
-
- switch (c) {
- // Note that CTX_ATTRIBUTE isn't explicitly tested here;
- // all syntax delimiters are escaped in CTX_ATTRIBUTE,
- // otherwise it's similar to CTX_CONTENT
-
- // ampersand flags entity references; entity replacement
- // text has unexpanded references, other text doesn't.
- case '&':
- if (code == CTX_ENTITY || code == CTX_UNPARSED)
- continue;
- esc = "amp";
- break;
-
- // attributes and text may NOT have literal '<', but
- // entities may have markup constructs
- case '<':
- if (code == CTX_ENTITY || code == CTX_UNPARSED)
- continue;
- esc = "lt";
- break;
-
- // as above re markup constructs; but otherwise
- // except when canonicalizing, this is for consistency
- case '>':
- if (code == CTX_ENTITY || code == CTX_UNPARSED)
- continue;
- esc = "gt";
- break;
- case '\'':
- if (code == CTX_CONTENT || code == CTX_UNPARSED)
- continue;
- if (canonical)
- continue;
- esc = "apos";
- break;
-
- // needed when printing quoted attribute/entity values
- case '"':
- if (code == CTX_CONTENT || code == CTX_UNPARSED)
- continue;
- esc = "quot";
- break;
-
- // make line ends work per host OS convention
- case '\n':
- esc = eol;
- break;
-
- //
- // No other characters NEED special treatment ... except
- // for encoding-specific issues, like whether the character
- // can really be represented in that encoding.
- //
- default:
- //
- // There are characters we can never write safely; getting
- // them is an error.
- //
- // (a) They're never legal in XML ... detected by range
- // checks, and (eventually) by remerging surrogate
- // pairs on output. (Easy error for apps to prevent.)
- //
- // (b) This encoding can't represent them, and we
- // can't make reference substitution (e.g. inside
- // CDATA sections, names, PI data, etc). (Hard for
- // apps to prevent, except by using UTF-8 or UTF-16
- // as their output encoding.)
- //
- // We know a very little bit about what characters
- // the US-ASCII and ISO-8859-1 encodings support. For
- // other encodings we can't detect the second type of
- // error at all. (Never an issue for UTF-8 or UTF-16.)
- //
+ int first = 0;
+
+ if (off < 0) {
+ off = 0;
+ len = buf.length;
+ }
+ for (int i = 0; i < len; i++) {
+ String esc;
+ char c = buf [off + i];
+
+ switch (c) {
+ // Note that CTX_ATTRIBUTE isn't explicitly tested here;
+ // all syntax delimiters are escaped in CTX_ATTRIBUTE,
+ // otherwise it's similar to CTX_CONTENT
+
+ // ampersand flags entity references; entity replacement
+ // text has unexpanded references, other text doesn't.
+ case '&':
+ if (code == CTX_ENTITY || code == CTX_UNPARSED)
+ continue;
+ esc = "amp";
+ break;
+
+ // attributes and text may NOT have literal '<', but
+ // entities may have markup constructs
+ case '<':
+ if (code == CTX_ENTITY || code == CTX_UNPARSED)
+ continue;
+ esc = "lt";
+ break;
+
+ // as above re markup constructs; but otherwise
+ // except when canonicalizing, this is for consistency
+ case '>':
+ if (code == CTX_ENTITY || code == CTX_UNPARSED)
+ continue;
+ esc = "gt";
+ break;
+ case '\'':
+ if (code == CTX_CONTENT || code == CTX_UNPARSED)
+ continue;
+ if (canonical)
+ continue;
+ esc = "apos";
+ break;
+
+ // needed when printing quoted attribute/entity values
+ case '"':
+ if (code == CTX_CONTENT || code == CTX_UNPARSED)
+ continue;
+ esc = "quot";
+ break;
+
+ // make line ends work per host OS convention
+ case '\n':
+ esc = eol;
+ break;
+
+ //
+ // No other characters NEED special treatment ... except
+ // for encoding-specific issues, like whether the character
+ // can really be represented in that encoding.
+ //
+ default:
+ //
+ // There are characters we can never write safely; getting
+ // them is an error.
+ //
+ // (a) They're never legal in XML ... detected by range
+ // checks, and (eventually) by remerging surrogate
+ // pairs on output. (Easy error for apps to prevent.)
+ //
+ // (b) This encoding can't represent them, and we
+ // can't make reference substitution (e.g. inside
+ // CDATA sections, names, PI data, etc). (Hard for
+ // apps to prevent, except by using UTF-8 or UTF-16
+ // as their output encoding.)
+ //
+ // We know a very little bit about what characters
+ // the US-ASCII and ISO-8859-1 encodings support. For
+ // other encodings we can't detect the second type of
+ // error at all. (Never an issue for UTF-8 or UTF-16.)
+ //
// FIXME: CR in CDATA is an error; in text, turn to a char ref
// FIXME: CR/LF/TAB in attributes should become char refs
- if ((c > 0xfffd)
- || ((c < 0x0020) && !((c == 0x0009)
- || (c == 0x000A) || (c == 0x000D)))
- || (((c & dangerMask) != 0)
- && (code == CTX_UNPARSED))) {
-
- // if case (b) in CDATA, we might end the section,
- // write a reference, then restart ... possible
- // in one DOM L3 draft.
-
- throw new CharConversionException (
- "Illegal or non-writable character: U+"
- + Integer.toHexString (c));
- }
-
- //
- // If the output encoding represents the character
- // directly, let it do so! Else we'll escape it.
- //
- if ((c & dangerMask) == 0)
- continue;
- esc = null;
-
- // Avoid numeric refs where symbolic ones exist, as
- // symbolic ones make more sense to humans reading!
- if (xhtml) {
- // all the HTMLlat1x.ent entities
- // (all the "ISO-8859-1" characters)
- if (c >= 160 && c <= 255)
- esc = HTMLlat1x [c - 160];
-
- // not quite half the HTMLsymbolx.ent entities
- else if (c >= 913 && c <= 937)
- esc = HTMLsymbolx_GR [c - 913];
- else if (c >= 945 && c <= 969)
- esc = HTMLsymbolx_gr [c - 945];
-
- else switch (c) {
- // all of the HTMLspecialx.ent entities
- case 338: esc = "OElig"; break;
- case 339: esc = "oelig"; break;
- case 352: esc = "Scaron"; break;
- case 353: esc = "scaron"; break;
- case 376: esc = "Yuml"; break;
- case 710: esc = "circ"; break;
- case 732: esc = "tilde"; break;
- case 8194: esc = "ensp"; break;
- case 8195: esc = "emsp"; break;
- case 8201: esc = "thinsp"; break;
- case 8204: esc = "zwnj"; break;
- case 8205: esc = "zwj"; break;
- case 8206: esc = "lrm"; break;
- case 8207: esc = "rlm"; break;
- case 8211: esc = "ndash"; break;
- case 8212: esc = "mdash"; break;
- case 8216: esc = "lsquo"; break;
- case 8217: esc = "rsquo"; break;
- case 8218: esc = "sbquo"; break;
- case 8220: esc = "ldquo"; break;
- case 8221: esc = "rdquo"; break;
- case 8222: esc = "bdquo"; break;
- case 8224: esc = "dagger"; break;
- case 8225: esc = "Dagger"; break;
- case 8240: esc = "permil"; break;
- case 8249: esc = "lsaquo"; break;
- case 8250: esc = "rsaquo"; break;
- case 8364: esc = "euro"; break;
-
- // the other HTMLsymbox.ent entities
- case 402: esc = "fnof"; break;
- case 977: esc = "thetasym"; break;
- case 978: esc = "upsih"; break;
- case 982: esc = "piv"; break;
- case 8226: esc = "bull"; break;
- case 8230: esc = "hellip"; break;
- case 8242: esc = "prime"; break;
- case 8243: esc = "Prime"; break;
- case 8254: esc = "oline"; break;
- case 8260: esc = "frasl"; break;
- case 8472: esc = "weierp"; break;
- case 8465: esc = "image"; break;
- case 8476: esc = "real"; break;
- case 8482: esc = "trade"; break;
- case 8501: esc = "alefsym"; break;
- case 8592: esc = "larr"; break;
- case 8593: esc = "uarr"; break;
- case 8594: esc = "rarr"; break;
- case 8595: esc = "darr"; break;
- case 8596: esc = "harr"; break;
- case 8629: esc = "crarr"; break;
- case 8656: esc = "lArr"; break;
- case 8657: esc = "uArr"; break;
- case 8658: esc = "rArr"; break;
- case 8659: esc = "dArr"; break;
- case 8660: esc = "hArr"; break;
- case 8704: esc = "forall"; break;
- case 8706: esc = "part"; break;
- case 8707: esc = "exist"; break;
- case 8709: esc = "empty"; break;
- case 8711: esc = "nabla"; break;
- case 8712: esc = "isin"; break;
- case 8713: esc = "notin"; break;
- case 8715: esc = "ni"; break;
- case 8719: esc = "prod"; break;
- case 8721: esc = "sum"; break;
- case 8722: esc = "minus"; break;
- case 8727: esc = "lowast"; break;
- case 8730: esc = "radic"; break;
- case 8733: esc = "prop"; break;
- case 8734: esc = "infin"; break;
- case 8736: esc = "ang"; break;
- case 8743: esc = "and"; break;
- case 8744: esc = "or"; break;
- case 8745: esc = "cap"; break;
- case 8746: esc = "cup"; break;
- case 8747: esc = "int"; break;
- case 8756: esc = "there4"; break;
- case 8764: esc = "sim"; break;
- case 8773: esc = "cong"; break;
- case 8776: esc = "asymp"; break;
- case 8800: esc = "ne"; break;
- case 8801: esc = "equiv"; break;
- case 8804: esc = "le"; break;
- case 8805: esc = "ge"; break;
- case 8834: esc = "sub"; break;
- case 8835: esc = "sup"; break;
- case 8836: esc = "nsub"; break;
- case 8838: esc = "sube"; break;
- case 8839: esc = "supe"; break;
- case 8853: esc = "oplus"; break;
- case 8855: esc = "otimes"; break;
- case 8869: esc = "perp"; break;
- case 8901: esc = "sdot"; break;
- case 8968: esc = "lceil"; break;
- case 8969: esc = "rceil"; break;
- case 8970: esc = "lfloor"; break;
- case 8971: esc = "rfloor"; break;
- case 9001: esc = "lang"; break;
- case 9002: esc = "rang"; break;
- case 9674: esc = "loz"; break;
- case 9824: esc = "spades"; break;
- case 9827: esc = "clubs"; break;
- case 9829: esc = "hearts"; break;
- case 9830: esc = "diams"; break;
- }
- }
-
- // else escape with numeric char refs
- if (esc == null) {
- stringBuf.setLength (0);
- stringBuf.append ("#x");
- stringBuf.append (Integer.toHexString (c).toUpperCase ());
- esc = stringBuf.toString ();
-
- // FIXME: We don't write surrogate pairs correctly.
- // They should work as one ref per character, since
- // each pair is one character. For reading back into
- // Unicode, it matters beginning in Unicode 3.1 ...
- }
- break;
- }
- if (i != first)
- rawWrite (buf, off + first, i - first);
- first = i + 1;
- if (esc == eol)
- newline ();
- else {
- rawWrite ('&');
- rawWrite (esc);
- rawWrite (';');
- }
- }
- if (first < len)
- rawWrite (buf, off + first, len - first);
+ if ((c > 0xfffd)
+ || ((c < 0x0020) && !((c == 0x0009)
+ || (c == 0x000A) || (c == 0x000D)))
+ || (((c & dangerMask) != 0)
+ && (code == CTX_UNPARSED))) {
+
+ // if case (b) in CDATA, we might end the section,
+ // write a reference, then restart ... possible
+ // in one DOM L3 draft.
+
+ throw new CharConversionException (
+ "Illegal or non-writable character: U+"
+ + Integer.toHexString (c));
+ }
+
+ //
+ // If the output encoding represents the character
+ // directly, let it do so! Else we'll escape it.
+ //
+ if ((c & dangerMask) == 0)
+ continue;
+ esc = null;
+
+ // Avoid numeric refs where symbolic ones exist, as
+ // symbolic ones make more sense to humans reading!
+ if (xhtml) {
+ // all the HTMLlat1x.ent entities
+ // (all the "ISO-8859-1" characters)
+ if (c >= 160 && c <= 255)
+ esc = HTMLlat1x [c - 160];
+
+ // not quite half the HTMLsymbolx.ent entities
+ else if (c >= 913 && c <= 937)
+ esc = HTMLsymbolx_GR [c - 913];
+ else if (c >= 945 && c <= 969)
+ esc = HTMLsymbolx_gr [c - 945];
+
+ else switch (c) {
+ // all of the HTMLspecialx.ent entities
+ case 338: esc = "OElig"; break;
+ case 339: esc = "oelig"; break;
+ case 352: esc = "Scaron"; break;
+ case 353: esc = "scaron"; break;
+ case 376: esc = "Yuml"; break;
+ case 710: esc = "circ"; break;
+ case 732: esc = "tilde"; break;
+ case 8194: esc = "ensp"; break;
+ case 8195: esc = "emsp"; break;
+ case 8201: esc = "thinsp"; break;
+ case 8204: esc = "zwnj"; break;
+ case 8205: esc = "zwj"; break;
+ case 8206: esc = "lrm"; break;
+ case 8207: esc = "rlm"; break;
+ case 8211: esc = "ndash"; break;
+ case 8212: esc = "mdash"; break;
+ case 8216: esc = "lsquo"; break;
+ case 8217: esc = "rsquo"; break;
+ case 8218: esc = "sbquo"; break;
+ case 8220: esc = "ldquo"; break;
+ case 8221: esc = "rdquo"; break;
+ case 8222: esc = "bdquo"; break;
+ case 8224: esc = "dagger"; break;
+ case 8225: esc = "Dagger"; break;
+ case 8240: esc = "permil"; break;
+ case 8249: esc = "lsaquo"; break;
+ case 8250: esc = "rsaquo"; break;
+ case 8364: esc = "euro"; break;
+
+ // the other HTMLsymbox.ent entities
+ case 402: esc = "fnof"; break;
+ case 977: esc = "thetasym"; break;
+ case 978: esc = "upsih"; break;
+ case 982: esc = "piv"; break;
+ case 8226: esc = "bull"; break;
+ case 8230: esc = "hellip"; break;
+ case 8242: esc = "prime"; break;
+ case 8243: esc = "Prime"; break;
+ case 8254: esc = "oline"; break;
+ case 8260: esc = "frasl"; break;
+ case 8472: esc = "weierp"; break;
+ case 8465: esc = "image"; break;
+ case 8476: esc = "real"; break;
+ case 8482: esc = "trade"; break;
+ case 8501: esc = "alefsym"; break;
+ case 8592: esc = "larr"; break;
+ case 8593: esc = "uarr"; break;
+ case 8594: esc = "rarr"; break;
+ case 8595: esc = "darr"; break;
+ case 8596: esc = "harr"; break;
+ case 8629: esc = "crarr"; break;
+ case 8656: esc = "lArr"; break;
+ case 8657: esc = "uArr"; break;
+ case 8658: esc = "rArr"; break;
+ case 8659: esc = "dArr"; break;
+ case 8660: esc = "hArr"; break;
+ case 8704: esc = "forall"; break;
+ case 8706: esc = "part"; break;
+ case 8707: esc = "exist"; break;
+ case 8709: esc = "empty"; break;
+ case 8711: esc = "nabla"; break;
+ case 8712: esc = "isin"; break;
+ case 8713: esc = "notin"; break;
+ case 8715: esc = "ni"; break;
+ case 8719: esc = "prod"; break;
+ case 8721: esc = "sum"; break;
+ case 8722: esc = "minus"; break;
+ case 8727: esc = "lowast"; break;
+ case 8730: esc = "radic"; break;
+ case 8733: esc = "prop"; break;
+ case 8734: esc = "infin"; break;
+ case 8736: esc = "ang"; break;
+ case 8743: esc = "and"; break;
+ case 8744: esc = "or"; break;
+ case 8745: esc = "cap"; break;
+ case 8746: esc = "cup"; break;
+ case 8747: esc = "int"; break;
+ case 8756: esc = "there4"; break;
+ case 8764: esc = "sim"; break;
+ case 8773: esc = "cong"; break;
+ case 8776: esc = "asymp"; break;
+ case 8800: esc = "ne"; break;
+ case 8801: esc = "equiv"; break;
+ case 8804: esc = "le"; break;
+ case 8805: esc = "ge"; break;
+ case 8834: esc = "sub"; break;
+ case 8835: esc = "sup"; break;
+ case 8836: esc = "nsub"; break;
+ case 8838: esc = "sube"; break;
+ case 8839: esc = "supe"; break;
+ case 8853: esc = "oplus"; break;
+ case 8855: esc = "otimes"; break;
+ case 8869: esc = "perp"; break;
+ case 8901: esc = "sdot"; break;
+ case 8968: esc = "lceil"; break;
+ case 8969: esc = "rceil"; break;
+ case 8970: esc = "lfloor"; break;
+ case 8971: esc = "rfloor"; break;
+ case 9001: esc = "lang"; break;
+ case 9002: esc = "rang"; break;
+ case 9674: esc = "loz"; break;
+ case 9824: esc = "spades"; break;
+ case 9827: esc = "clubs"; break;
+ case 9829: esc = "hearts"; break;
+ case 9830: esc = "diams"; break;
+ }
+ }
+
+ // else escape with numeric char refs
+ if (esc == null) {
+ stringBuf.setLength (0);
+ stringBuf.append ("#x");
+ stringBuf.append (Integer.toHexString (c).toUpperCase ());
+ esc = stringBuf.toString ();
+
+ // FIXME: We don't write surrogate pairs correctly.
+ // They should work as one ref per character, since
+ // each pair is one character. For reading back into
+ // Unicode, it matters beginning in Unicode 3.1 ...
+ }
+ break;
+ }
+ if (i != first)
+ rawWrite (buf, off + first, i - first);
+ first = i + 1;
+ if (esc == eol)
+ newline ();
+ else {
+ rawWrite ('&');
+ rawWrite (esc);
+ rawWrite (';');
+ }
+ }
+ if (first < len)
+ rawWrite (buf, off + first, len - first);
}
@@ -1808,45 +1808,45 @@ public class XMLWriter
private void newline ()
throws SAXException, IOException
{
- out.write (eol);
- column = 0;
+ out.write (eol);
+ column = 0;
}
private void doIndent ()
throws SAXException, IOException
{
- int space = elementNestLevel * 2;
-
- newline ();
- column = space;
- // track tabs only at line starts
- while (space > 8) {
- out.write ("\t");
- space -= 8;
- }
- while (space > 0) {
- out.write (" ");
- space -= 2;
- }
+ int space = elementNestLevel * 2;
+
+ newline ();
+ column = space;
+ // track tabs only at line starts
+ while (space > 8) {
+ out.write ("\t");
+ space -= 8;
+ }
+ while (space > 0) {
+ out.write (" ");
+ space -= 2;
+ }
}
private void rawWrite (char c)
throws IOException
{
- out.write (c);
- column++;
+ out.write (c);
+ column++;
}
private void rawWrite (String s)
throws SAXException, IOException
{
- if (prettyPrinting && "default".equals (space.peek ())) {
- char data [] = s.toCharArray ();
- rawWrite (data, 0, data.length);
- } else {
- out.write (s);
- column += s.length ();
- }
+ if (prettyPrinting && "default".equals (space.peek ())) {
+ char data [] = s.toCharArray ();
+ rawWrite (data, 0, data.length);
+ } else {
+ out.write (s);
+ column += s.length ();
+ }
}
// NOTE: if xhtml, the REC gives some rules about whitespace
@@ -1862,70 +1862,70 @@ public class XMLWriter
private void rawWrite (char buf [], int offset, int length)
throws SAXException, IOException
{
- boolean wrap;
-
- if (prettyPrinting && space.empty ())
- fatal ("stack discipline", null);
-
- wrap = prettyPrinting && "default".equals (space.peek ());
- if (!wrap) {
- out.write (buf, offset, length);
- column += length;
- return;
- }
-
- // we're pretty printing and want to fill lines out only
- // to the desired line length.
- while (length > 0) {
- int target = lineLength - column;
- boolean wrote = false;
-
- // Do we even have a problem?
- if (target > length || noWrap) {
- out.write (buf, offset, length);
- column += length;
- return;
- }
-
- // break the line at a space character, trying to fill
- // as much of the line as possible.
- char c;
-
- for (int i = target - 1; i >= 0; i--) {
- if ((c = buf [offset + i]) == ' ' || c == '\t') {
- i++;
- out.write (buf, offset, i);
- doIndent ();
- offset += i;
- length -= i;
- wrote = true;
- break;
- }
- }
- if (wrote)
- continue;
-
- // no space character permitting break before target
- // line length is filled. So, take the next one.
- if (target < 0)
- target = 0;
- for (int i = target; i < length; i++)
- if ((c = buf [offset + i]) == ' ' || c == '\t') {
- i++;
- out.write (buf, offset, i);
- doIndent ();
- offset += i;
- length -= i;
- wrote = true;
- break;
- }
- if (wrote)
- continue;
-
- // no such luck.
- out.write (buf, offset, length);
- column += length;
- break;
- }
+ boolean wrap;
+
+ if (prettyPrinting && space.empty ())
+ fatal ("stack discipline", null);
+
+ wrap = prettyPrinting && "default".equals (space.peek ());
+ if (!wrap) {
+ out.write (buf, offset, length);
+ column += length;
+ return;
+ }
+
+ // we're pretty printing and want to fill lines out only
+ // to the desired line length.
+ while (length > 0) {
+ int target = lineLength - column;
+ boolean wrote = false;
+
+ // Do we even have a problem?
+ if (target > length || noWrap) {
+ out.write (buf, offset, length);
+ column += length;
+ return;
+ }
+
+ // break the line at a space character, trying to fill
+ // as much of the line as possible.
+ char c;
+
+ for (int i = target - 1; i >= 0; i--) {
+ if ((c = buf [offset + i]) == ' ' || c == '\t') {
+ i++;
+ out.write (buf, offset, i);
+ doIndent ();
+ offset += i;
+ length -= i;
+ wrote = true;
+ break;
+ }
+ }
+ if (wrote)
+ continue;
+
+ // no space character permitting break before target
+ // line length is filled. So, take the next one.
+ if (target < 0)
+ target = 0;
+ for (int i = target; i < length; i++)
+ if ((c = buf [offset + i]) == ' ' || c == '\t') {
+ i++;
+ out.write (buf, offset, i);
+ doIndent ();
+ offset += i;
+ length -= i;
+ wrote = true;
+ break;
+ }
+ if (wrote)
+ continue;
+
+ // no such luck.
+ out.write (buf, offset, length);
+ column += length;
+ break;
+ }
}
}