diff options
author | Chris Burdess <dog@bluezoo.org> | 2006-01-08 21:47:03 +0000 |
---|---|---|
committer | Chris Burdess <dog@bluezoo.org> | 2006-01-08 21:47:03 +0000 |
commit | 0c0aa2702738d4e3ba13aeb91b49c1b048a670b7 (patch) | |
tree | d6b732efc4c4b52394c14776ea627e34b1c4a422 /gnu/xml/stream/UnicodeReader.java | |
parent | 02fda0d645f6e36b41c1df01fb4fd99494874416 (diff) | |
download | classpath-0c0aa2702738d4e3ba13aeb91b49c1b048a670b7.tar.gz |
2006-01-08 Chris Burdess <dog@gnu.org>
* gnu/xml/stream/SAXParser.java: Check standalone status for mixed
content models from external entities.
* gnu/xml/stream/UnicodeReader.java: Report error instead of
attempting to continue with unpaired surrogates.
* gnu/xml/stream/XMLParser.java: Don't normalize LF equivalents when
resolving entities with character entity references; better
checking of valid character ranges; don't report an error for URI
fragments in notation declarations; check unbound namespace
prefixes for elements and attributes, including XML 1.1 unbinding
syntax; namespace-aware checking of attribute duplicates.
Diffstat (limited to 'gnu/xml/stream/UnicodeReader.java')
-rw-r--r-- | gnu/xml/stream/UnicodeReader.java | 94 |
1 files changed, 49 insertions, 45 deletions
diff --git a/gnu/xml/stream/UnicodeReader.java b/gnu/xml/stream/UnicodeReader.java index e3c179cf7..c38516c30 100644 --- a/gnu/xml/stream/UnicodeReader.java +++ b/gnu/xml/stream/UnicodeReader.java @@ -49,8 +49,6 @@ class UnicodeReader { final Reader in; - int carry, markCarry; - boolean isCarry, isMarkCarry; UnicodeReader(Reader in) { @@ -60,27 +58,18 @@ class UnicodeReader public void mark(int limit) throws IOException { - in.mark(limit); - markCarry = carry; - isMarkCarry = isCarry; + in.mark(limit * 2); } public void reset() throws IOException { in.reset(); - carry = markCarry; - isCarry = isMarkCarry; } public int read() throws IOException { - if (isCarry) - { - isCarry = false; - return carry; - } int ret = in.read(); if (ret == -1) return ret; @@ -91,11 +80,12 @@ class UnicodeReader if (low >= 0xdc00 && low < 0xe000) ret = Character.toCodePoint((char) ret, (char) low); else - { - carry = low; - isCarry = true; - } + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(ret)); } + else if (ret >= 0xdc00 && ret < 0xe000) + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(ret)); return ret; } @@ -104,19 +94,13 @@ class UnicodeReader { if (len == 0) return 0; - if (isCarry) - { - isCarry = false; - buf[off] = carry; - return 1; - } char[] b2 = new char[len]; int ret = in.read(b2, 0, len); if (ret <= 0) return ret; int l = ret - 1; - int j = off; - for (int i = 0; i < l; i++) + int i = 0, j = off; + for (; i < l; i++) { char c = b2[i]; if (c >= 0xd800 && c < 0xdc00) @@ -129,26 +113,36 @@ class UnicodeReader i++; continue; } + else + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); } + else if (c >= 0xdc00 && c < 0xe000) + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); buf[j++] = (int) c; } - // last char - char c = b2[l]; - if (c >= 0xd800 && c < 0xdc00) + if (i == l) { - int low = in.read(); - if (low >= 0xdc00 && low < 0xe000) - { - buf[j++] = Character.toCodePoint(c, (char) low); - return j; - } - else + // last char + char c = b2[l]; + if (c >= 0xd800 && c < 0xdc00) { - carry = low; - isCarry = true; + int low = in.read(); + if (low >= 0xdc00 && low < 0xe000) + { + buf[j++] = Character.toCodePoint(c, (char) low); + return j; + } + else + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); } + else if (c >= 0xdc00 && c < 0xe000) + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); + buf[j++] = (int) c; } - buf[j++] = (int) c; return j; } @@ -159,14 +153,15 @@ class UnicodeReader } public static int[] toCodePointArray(String text) + throws IOException { char[] b2 = text.toCharArray(); int[] buf = new int[b2.length]; if (b2.length > 0) { int l = b2.length - 1; - int j = 0; - for (int i = 0; i < l; i++) + int i = 0, j = 0; + for (; i < l; i++) { char c = b2[i]; if (c >= 0xd800 && c < 0xdc00) @@ -179,16 +174,25 @@ class UnicodeReader i++; continue; } + else + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); } + else if (c >= 0xdc00 && c < 0xe000) + throw new IOException("unpaired surrogate: U+" + + Integer.toHexString(c)); buf[j++] = (int) c; } - // last char - buf[j++] = (int) b2[l]; - if (j < buf.length) + if (i == l) { - int[] buf2 = new int[j]; - System.arraycopy(buf, 0, buf2, 0, j); - buf = buf2; + // last char + buf[j++] = (int) b2[l]; + if (j < buf.length) + { + int[] buf2 = new int[j]; + System.arraycopy(buf, 0, buf2, 0, j); + buf = buf2; + } } } return buf; |