summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--gnu/xml/stream/SAXParser.java3
-rw-r--r--gnu/xml/stream/UnicodeReader.java94
-rw-r--r--gnu/xml/stream/XMLParser.java254
4 files changed, 242 insertions, 122 deletions
diff --git a/ChangeLog b/ChangeLog
index ed7a9218a..439869704 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2006-01-08 Chris Burdess <dog@gnu.org>
+
+ * gnu/xml/stream/SAXParser.java: Check standalone status for mixed
+ content models from external entities.
+ * gnu/xml/stream/UnicodeReader.java: Report error instead of
+ attempting to continue with unpaired surrogates.
+ * gnu/xml/stream/XMLParser.java: Don't normalize LF equivalents when
+ resolving entities with character entity references; better
+ checking of valid character ranges; don't report an error for URI
+ fragments in notation declarations; check unbound namespace
+ prefixes for elements and attributes, including XML 1.1 unbinding
+ syntax; namespace-aware checking of attribute duplicates.
+
2006-01-08 Robert Schuster <robertschuster@fsfe.org>
* java/beans/Statement.java: Doc fixes.
diff --git a/gnu/xml/stream/SAXParser.java b/gnu/xml/stream/SAXParser.java
index 94d51d659..54c8b3624 100644
--- a/gnu/xml/stream/SAXParser.java
+++ b/gnu/xml/stream/SAXParser.java
@@ -675,6 +675,7 @@ public class SAXParser
*/
private boolean isIgnorableWhitespace(XMLParser reader, char[] b,
boolean testCharacters)
+ throws XMLStreamException
{
XMLParser.Doctype doctype = reader.doctype;
if (doctype == null)
@@ -695,6 +696,8 @@ public class SAXParser
XMLParser.ContentModel model = doctype.getElementModel(currentElement);
if (model == null || model.type != XMLParser.ContentModel.ELEMENT)
return false;
+ if (model.external && xmlStandalone)
+ return false;
boolean white = true;
if (testCharacters)
{
diff --git a/gnu/xml/stream/UnicodeReader.java b/gnu/xml/stream/UnicodeReader.java
index e3c179cf7..c38516c30 100644
--- a/gnu/xml/stream/UnicodeReader.java
+++ b/gnu/xml/stream/UnicodeReader.java
@@ -49,8 +49,6 @@ class UnicodeReader
{
final Reader in;
- int carry, markCarry;
- boolean isCarry, isMarkCarry;
UnicodeReader(Reader in)
{
@@ -60,27 +58,18 @@ class UnicodeReader
public void mark(int limit)
throws IOException
{
- in.mark(limit);
- markCarry = carry;
- isMarkCarry = isCarry;
+ in.mark(limit * 2);
}
public void reset()
throws IOException
{
in.reset();
- carry = markCarry;
- isCarry = isMarkCarry;
}
public int read()
throws IOException
{
- if (isCarry)
- {
- isCarry = false;
- return carry;
- }
int ret = in.read();
if (ret == -1)
return ret;
@@ -91,11 +80,12 @@ class UnicodeReader
if (low >= 0xdc00 && low < 0xe000)
ret = Character.toCodePoint((char) ret, (char) low);
else
- {
- carry = low;
- isCarry = true;
- }
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(ret));
}
+ else if (ret >= 0xdc00 && ret < 0xe000)
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(ret));
return ret;
}
@@ -104,19 +94,13 @@ class UnicodeReader
{
if (len == 0)
return 0;
- if (isCarry)
- {
- isCarry = false;
- buf[off] = carry;
- return 1;
- }
char[] b2 = new char[len];
int ret = in.read(b2, 0, len);
if (ret <= 0)
return ret;
int l = ret - 1;
- int j = off;
- for (int i = 0; i < l; i++)
+ int i = 0, j = off;
+ for (; i < l; i++)
{
char c = b2[i];
if (c >= 0xd800 && c < 0xdc00)
@@ -129,26 +113,36 @@ class UnicodeReader
i++;
continue;
}
+ else
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
}
+ else if (c >= 0xdc00 && c < 0xe000)
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
buf[j++] = (int) c;
}
- // last char
- char c = b2[l];
- if (c >= 0xd800 && c < 0xdc00)
+ if (i == l)
{
- int low = in.read();
- if (low >= 0xdc00 && low < 0xe000)
- {
- buf[j++] = Character.toCodePoint(c, (char) low);
- return j;
- }
- else
+ // last char
+ char c = b2[l];
+ if (c >= 0xd800 && c < 0xdc00)
{
- carry = low;
- isCarry = true;
+ int low = in.read();
+ if (low >= 0xdc00 && low < 0xe000)
+ {
+ buf[j++] = Character.toCodePoint(c, (char) low);
+ return j;
+ }
+ else
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
}
+ else if (c >= 0xdc00 && c < 0xe000)
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
+ buf[j++] = (int) c;
}
- buf[j++] = (int) c;
return j;
}
@@ -159,14 +153,15 @@ class UnicodeReader
}
public static int[] toCodePointArray(String text)
+ throws IOException
{
char[] b2 = text.toCharArray();
int[] buf = new int[b2.length];
if (b2.length > 0)
{
int l = b2.length - 1;
- int j = 0;
- for (int i = 0; i < l; i++)
+ int i = 0, j = 0;
+ for (; i < l; i++)
{
char c = b2[i];
if (c >= 0xd800 && c < 0xdc00)
@@ -179,16 +174,25 @@ class UnicodeReader
i++;
continue;
}
+ else
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
}
+ else if (c >= 0xdc00 && c < 0xe000)
+ throw new IOException("unpaired surrogate: U+" +
+ Integer.toHexString(c));
buf[j++] = (int) c;
}
- // last char
- buf[j++] = (int) b2[l];
- if (j < buf.length)
+ if (i == l)
{
- int[] buf2 = new int[j];
- System.arraycopy(buf, 0, buf2, 0, j);
- buf = buf2;
+ // last char
+ buf[j++] = (int) b2[l];
+ if (j < buf.length)
+ {
+ int[] buf2 = new int[j];
+ System.arraycopy(buf, 0, buf2, 0, j);
+ buf = buf2;
+ }
}
}
return buf;
diff --git a/gnu/xml/stream/XMLParser.java b/gnu/xml/stream/XMLParser.java
index 7505e6e60..f856f9372 100644
--- a/gnu/xml/stream/XMLParser.java
+++ b/gnu/xml/stream/XMLParser.java
@@ -408,7 +408,7 @@ public class XMLParser
ids = new HashSet();
idrefs = new HashSet();
}
- pushInput(new Input(in, null, null, systemId, null, null, false));
+ pushInput(new Input(in, null, null, systemId, null, null, false, true));
}
/**
@@ -465,7 +465,7 @@ public class XMLParser
ids = new HashSet();
idrefs = new HashSet();
}
- pushInput(new Input(null, reader, null, systemId, null, null, false));
+ pushInput(new Input(null, reader, null, systemId, null, null, false, true));
}
// -- NamespaceContext --
@@ -963,16 +963,11 @@ public class XMLParser
public boolean hasNext()
throws XMLStreamException
{
+ if (event == XMLStreamConstants.END_DOCUMENT)
+ return false;
if (!lookahead)
{
- try
- {
- next();
- }
- catch (NoSuchElementException e)
- {
- event = -1;
- }
+ next();
lookahead = true;
}
return event != -1;
@@ -1072,7 +1067,15 @@ public class XMLParser
}
else if (replaceERefs && !isUnparsedEntity(ref))
{
- expandEntity(ref, false); //report start-entity
+ // this will report a start-entity event
+ boolean external = false;
+ if (doctype != null)
+ {
+ Object entity = doctype.getEntity(ref);
+ if (entity instanceof ExternalIds)
+ external = true;
+ }
+ expandEntity(ref, false, external);
event = next();
}
else
@@ -1358,18 +1361,9 @@ public class XMLParser
error("illegal XML 1.1 character",
"U+" + Integer.toHexString(c));
}
- else
- {
- if (c < 0x20 && c != 0x09 && c != 0x0a && c != 0x0d)
- error("illegal XML character",
- "U+" + Integer.toHexString(c));
- else if (c > '\ud7ff' && c < '\ue000')
- error("illegal XML character",
- "U+" + Integer.toHexString(c));
- else if (c > '\ufffd')
- error("illegal XML character",
- "U+" + Integer.toHexString(c));
- }
+ else if (!isChar(c))
+ error("illegal XML character",
+ "U+" + Integer.toHexString(c));
buf.append(Character.toChars(c));
}
}
@@ -1460,7 +1454,8 @@ public class XMLParser
/**
* Push the specified text input source.
*/
- private void pushInput(String name, String text, boolean report)
+ private void pushInput(String name, String text, boolean report,
+ boolean normalize)
throws IOException, XMLStreamException
{
// Check for recursion
@@ -1476,13 +1471,15 @@ public class XMLParser
else
report = false;
pushInput(new Input(null, new StringReader(text), input.publicId,
- input.systemId, name, input.inputEncoding, report));
+ input.systemId, name, input.inputEncoding, report,
+ normalize));
}
/**
* Push the specified external input source.
*/
- private void pushInput(String name, ExternalIds ids, boolean report)
+ private void pushInput(String name, ExternalIds ids, boolean report,
+ boolean normalize)
throws IOException, XMLStreamException
{
if (!externalEntities)
@@ -1512,7 +1509,8 @@ public class XMLParser
if (in == null)
error("unable to resolve external entity",
(ids.systemId != null) ? ids.systemId : ids.publicId);
- pushInput(new Input(in, null, ids.publicId, url, name, null, report));
+ pushInput(new Input(in, null, ids.publicId, url, name, null, report,
+ normalize));
input.init();
if (tryRead(TEST_XML_DECL))
readTextDecl();
@@ -1752,8 +1750,8 @@ public class XMLParser
// Parse external subset
if (ids.systemId != null && externalEntities)
{
- pushInput("", ">", false);
- pushInput("[dtd]", ids, true);
+ pushInput("", ">", false, false);
+ pushInput("[dtd]", ids, true, true);
// loop until we get back to ">"
while (true)
{
@@ -1778,6 +1776,7 @@ public class XMLParser
error("external subset has unmatched '>'");
popInput();
}
+ checkDoctype();
if (validating)
validateDoctype();
@@ -1787,6 +1786,15 @@ public class XMLParser
}
/**
+ * Checks the well-formedness of the DTD.
+ */
+ private void checkDoctype()
+ throws XMLStreamException
+ {
+ // TODO check entity recursion
+ }
+
+ /**
* Parse the markupdecl production.
*/
private void readMarkupdecl(boolean inExternalSubset)
@@ -2488,16 +2496,18 @@ public class XMLParser
c = readCh();
reset();
if (c == 0x22 || c == 0x27) // " | '
- ids.systemId = absolutize(input.systemId,
- readLiteral(flags, false));
+ {
+ String href = readLiteral(flags, false);
+ ids.systemId = absolutize(input.systemId, href);
+ }
}
else
{
requireWhitespace();
- ids.systemId = absolutize(input.systemId,
- readLiteral(flags, false));
+ String href = readLiteral(flags, false);
+ ids.systemId = absolutize(input.systemId, href);
}
-
+ // Check valid URI characters
for (int i = 0; i < ids.publicId.length(); i++)
{
char d = ids.publicId.charAt(i);
@@ -2514,13 +2524,14 @@ public class XMLParser
else if (tryRead("SYSTEM"))
{
requireWhitespace();
- ids.systemId = absolutize(input.systemId, readLiteral(flags, false));
+ String href = readLiteral(flags, false);
+ ids.systemId = absolutize(input.systemId, href);
}
else if (!isSubset)
{
error("missing SYSTEM or PUBLIC keyword");
}
- if (ids.systemId != null)
+ if (ids.systemId != null && !inNotation)
{
if (ids.systemId.indexOf('#') != -1)
error("SYSTEM id has a URI fragment", ids.systemId);
@@ -2637,15 +2648,24 @@ public class XMLParser
if (ci != -1)
{
String prefix = elementName.substring(0, ci);
- if (getNamespaceURI(prefix) == null)
+ String uri = getNamespaceURI(prefix);
+ if (uri == null)
error("unbound element prefix", prefix);
+ else if (input.xml11 && "".equals(uri))
+ error("XML 1.1 unbound element prefix", prefix);
}
for (Iterator i = attrs.iterator(); i.hasNext(); )
{
Attribute attr = (Attribute) i.next();
- if (attr.prefix != null && getNamespaceURI(attr.prefix) == null &&
+ if (attr.prefix != null &&
!XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
- error("unbound attribute prefix", attr.prefix);
+ {
+ String uri = getNamespaceURI(attr.prefix);
+ if (uri == null)
+ error("unbound attribute prefix", attr.prefix);
+ else if (input.xml11 && "".equals(uri))
+ error("XML 1.1 unbound attribute prefix", attr.prefix);
+ }
}
}
if (validating && doctype != null)
@@ -2818,6 +2838,8 @@ public class XMLParser
error("Duplicate default namespace declaration");
if (XMLConstants.XML_NS_URI.equals(attr.value))
error("can't bind XML namespace");
+ if ("".equals(attr.value) && !input.xml11)
+ error("illegal use of 1.1-style prefix unbinding in 1.0 document");
ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
return true;
}
@@ -2840,6 +2862,8 @@ public class XMLParser
error("can't redeclare xmlns prefix");
if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
error("can't bind non-xmlns prefix to XML Namespace namespace");
+ if ("".equals(attr.value) && !input.xml11)
+ error("illegal use of 1.1-style prefix unbinding in 1.0 document");
ctx.put(attr.localName, attr.value);
return true;
}
@@ -3032,7 +3056,7 @@ public class XMLParser
buf.append(text);
else
{
- pushInput("", "&" + entityName + ";", false);
+ pushInput("", "&" + entityName + ";", false, false);
done = true;
break;
}
@@ -3067,14 +3091,15 @@ public class XMLParser
done = true;
break; // end of text sequence
default:
- if ((c < 0x0020 || c > 0xfffd) ||
- (c >= 0xd800 && c < 0xdc00) ||
- (input.xml11 && (c >= 0x007f) &&
- (c <= 0x009f) && (c != 0x0085)))
+ if (input.xml11)
{
- error("illegal XML character",
- "U+" + Integer.toHexString(c));
+ if (!isXML11Char(c))
+ error("illegal XML 1.1 character",
+ "U+" + Integer.toHexString(c));
}
+ else if (!isChar(c))
+ error("illegal XML character",
+ "U+" + Integer.toHexString(c));
white = false;
buf.append(Character.toChars(c));
}
@@ -3092,7 +3117,7 @@ public class XMLParser
/**
* Expands the specified entity.
*/
- private void expandEntity(String name, boolean inAttr)
+ private void expandEntity(String name, boolean inAttr, boolean normalize)
throws IOException, XMLStreamException
{
if (doctype != null)
@@ -3119,12 +3144,12 @@ public class XMLParser
String text = (String) value;
if (inAttr && text.indexOf('<') != -1)
error("< in attribute value");
- pushInput(name, text, !inAttr);
+ pushInput(name, text, !inAttr, normalize);
}
else if (inAttr)
error("reference to external entity in attribute value", name);
else
- pushInput(name, (ExternalIds) value, !inAttr);
+ pushInput(name, (ExternalIds) value, !inAttr, normalize);
return;
}
}
@@ -3260,7 +3285,8 @@ public class XMLParser
literalBuf.append(text);
else
expandEntity(entityName,
- (flags & LIT_ATTRIBUTE) != 0);
+ (flags & LIT_ATTRIBUTE) != 0,
+ true);
entities = true;
continue;
}
@@ -3363,13 +3389,13 @@ public class XMLParser
}
if (entity instanceof String)
{
- pushInput(name, (String) entity, false);
+ pushInput(name, (String) entity, false, input.normalize);
//pushInput(name, " " + (String) entity + " ");
}
else
{
//pushInput("", " ");
- pushInput(name, (ExternalIds) entity, false);
+ pushInput(name, (ExternalIds) entity, false, input.normalize);
//pushInput("", " ");
}
}
@@ -3496,7 +3522,7 @@ public class XMLParser
private boolean isXML11Char(int c)
{
return ((c >= 0x0001 && c <= 0xD7FF) ||
- (c >= 0xE000 && c <= 0xFFFD) ||
+ (c >= 0xE000 && c < 0xFFFD) || // NB exclude 0xfffd
(c >= 0x10000 && c <= 0x10FFFF));
}
@@ -3519,25 +3545,32 @@ public class XMLParser
*/
private boolean isNmtoken(String text, boolean isName)
{
- int[] cp = UnicodeReader.toCodePointArray(text);
- if (cp.length == 0)
- return false;
- if (isName)
- {
- if (!isNameStartCharacter(cp[0]))
- return false;
- }
- else
+ try
{
- if (!isNameCharacter(cp[0]))
+ int[] cp = UnicodeReader.toCodePointArray(text);
+ if (cp.length == 0)
return false;
+ if (isName)
+ {
+ if (!isNameStartCharacter(cp[0]))
+ return false;
+ }
+ else
+ {
+ if (!isNameCharacter(cp[0]))
+ return false;
+ }
+ for (int i = 1; i < cp.length; i++)
+ {
+ if (!isNameCharacter(cp[i]))
+ return false;
+ }
+ return true;
}
- for (int i = 1; i < cp.length; i++)
+ catch (IOException e)
{
- if (!isNameCharacter(cp[i]))
- return false;
+ return false;
}
- return true;
}
/**
@@ -3961,6 +3994,18 @@ public class XMLParser
(c >= 0x309D && c <= 0x309E) ||
(c >= 0x30FC && c <= 0x30FE));
}
+
+ /**
+ * Indicates whether the specified Unicode character matches the Char
+ * production.
+ */
+ public static boolean isChar(int c)
+ {
+ return (c >= 0x20 && c < 0xd800) ||
+ (c >= 0xe00 && c < 0xfffd) || // NB exclude 0xfffd
+ (c >= 0x10000 && c < 0x110000) ||
+ c == 0xa || c == 0x9 || c == 0xd;
+ }
/**
* Interns the specified text or not, depending on the value of
@@ -4218,6 +4263,8 @@ public class XMLParser
while (reader.hasNext())
{
event = reader.next();
+ Location loc = reader.getLocation();
+ System.out.print(loc.getLineNumber()+":"+loc.getColumnNumber()+" ");
switch (event)
{
case XMLStreamConstants.START_DOCUMENT:
@@ -4242,13 +4289,13 @@ public class XMLParser
System.out.println("END_ELEMENT "+reader.getName());
break;
case XMLStreamConstants.CHARACTERS:
- System.out.println("CHARACTERS '"+reader.getText()+"'");
+ System.out.println("CHARACTERS '"+encodeText(reader.getText())+"'");
break;
case XMLStreamConstants.CDATA:
- System.out.println("CDATA '"+reader.getText()+"'");
+ System.out.println("CDATA '"+encodeText(reader.getText())+"'");
break;
case XMLStreamConstants.SPACE:
- System.out.println("SPACE '"+reader.getText()+"'");
+ System.out.println("SPACE '"+encodeText(reader.getText())+"'");
break;
case XMLStreamConstants.DTD:
System.out.println("DTD "+reader.getText());
@@ -4257,7 +4304,7 @@ public class XMLParser
System.out.println("ENTITY_REFERENCE "+reader.getText());
break;
case XMLStreamConstants.COMMENT:
- System.out.println("COMMENT '"+reader.getText()+"'");
+ System.out.println("COMMENT '"+encodeText(reader.getText())+"'");
break;
case XMLStreamConstants.PROCESSING_INSTRUCTION:
System.out.println("PROCESSING_INSTRUCTION "+reader.getPITarget()+
@@ -4285,6 +4332,34 @@ public class XMLParser
}
/**
+ * Escapes control characters in the specified text. For debugging.
+ */
+ private static String encodeText(String text)
+ {
+ StringBuffer b = new StringBuffer();
+ int len = text.length();
+ for (int i = 0; i < len; i++)
+ {
+ char c = text.charAt(i);
+ switch (c)
+ {
+ case '\t':
+ b.append("\\t");
+ break;
+ case '\n':
+ b.append("\\n");
+ break;
+ case '\r':
+ b.append("\\r");
+ break;
+ default:
+ b.append(c);
+ }
+ }
+ return b.toString();
+ }
+
+ /**
* An attribute instance.
*/
class Attribute
@@ -4343,7 +4418,26 @@ public class XMLParser
{
if (other instanceof Attribute)
{
- return ((Attribute) other).name.equals(name);
+ Attribute a = (Attribute) other;
+ if (namespaceAware)
+ {
+ if (!a.localName.equals(localName))
+ return false;
+ String auri = getNamespaceURI(a.prefix);
+ String uri = getNamespaceURI(prefix);
+ if (uri == null && (auri == null ||
+ (input.xml11 && "".equals(auri))))
+ return true;
+ if (uri != null)
+ {
+ if ("".equals(uri) && input.xml11 && "".equals(auri))
+ return true;
+ return uri.equals(auri);
+ }
+ return false;
+ }
+ else
+ return a.name.equals(name);
}
return false;
}
@@ -4865,6 +4959,7 @@ public class XMLParser
int offset, markOffset;
final String publicId, systemId, name;
final boolean report; // report start- and end-entity
+ final boolean normalize; // normalize CR, etc to LF
InputStream in;
Reader reader;
@@ -4874,7 +4969,8 @@ public class XMLParser
boolean xml11;
Input(InputStream in, Reader reader, String publicId, String systemId,
- String name, String inputEncoding, boolean report)
+ String name, String inputEncoding, boolean report,
+ boolean normalize)
{
if (inputEncoding == null)
inputEncoding = "UTF-8";
@@ -4883,18 +4979,20 @@ public class XMLParser
this.systemId = systemId;
this.name = name;
this.report = report;
+ this.normalize = normalize;
if (in != null)
{
if (reader != null)
throw new IllegalStateException("both byte and char streams "+
"specified");
- in = new CRLFInputStream(in);
+ if (normalize)
+ in = new CRLFInputStream(in);
in = new BufferedInputStream(in);
this.in = in;
}
else
{
- this.reader = new CRLFReader(reader);
+ this.reader = normalize ? new CRLFReader(reader) : reader;
unicodeReader = new UnicodeReader(this.reader);
}
initialized = false;
@@ -4953,7 +5051,8 @@ public class XMLParser
{
offset++;
int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
- if (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028)))
+ if (normalize &&
+ (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
{
// Normalize CR etc to LF
ret = 0x0a;
@@ -4996,7 +5095,8 @@ public class XMLParser
for (int i = 0; i < ret; i++)
{
int c = b[off + i];
- if (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028)))
+ if (normalize &&
+ (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
{
// Normalize CR etc to LF
c = 0x0a;