summaryrefslogtreecommitdiff
path: root/src/json/ext/StringDecoder.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/json/ext/StringDecoder.java')
-rw-r--r--src/json/ext/StringDecoder.java166
1 files changed, 166 insertions, 0 deletions
diff --git a/src/json/ext/StringDecoder.java b/src/json/ext/StringDecoder.java
new file mode 100644
index 0000000..a4ee975
--- /dev/null
+++ b/src/json/ext/StringDecoder.java
@@ -0,0 +1,166 @@
+/*
+ * This code is copyrighted work by Daniel Luz <dev at mernen dot com>.
+ *
+ * Distributed under the Ruby and GPLv2 licenses; see COPYING and GPL files
+ * for details.
+ */
+package json.ext;
+
+import org.jruby.exceptions.RaiseException;
+import org.jruby.runtime.ThreadContext;
+import org.jruby.util.ByteList;
+
+/**
+ * A decoder that reads a JSON-encoded string from the given sources and
+ * returns its decoded form on a new ByteList. Escaped Unicode characters
+ * are encoded as UTF-8.
+ */
+final class StringDecoder extends ByteListTranscoder {
+ /**
+ * Stores the offset of the high surrogate when reading a surrogate pair,
+ * or -1 when not.
+ */
+ private int surrogatePairStart = -1;
+
+ // Array used for writing multi-byte characters into the buffer at once
+ private final byte[] aux = new byte[4];
+
+ StringDecoder(ThreadContext context) {
+ super(context);
+ }
+
+ ByteList decode(ByteList src, int start, int end) {
+ ByteList out = new ByteList(end - start);
+ init(src, start, end, out);
+ while (hasNext()) {
+ handleChar(readUtf8Char());
+ }
+ quoteStop(pos);
+ return out;
+ }
+
+ private void handleChar(int c) {
+ if (c == '\\') {
+ quoteStop(charStart);
+ handleEscapeSequence();
+ } else {
+ quoteStart();
+ }
+ }
+
+ private void handleEscapeSequence() {
+ ensureMin(1);
+ switch (readUtf8Char()) {
+ case 'b':
+ append('\b');
+ break;
+ case 'f':
+ append('\f');
+ break;
+ case 'n':
+ append('\n');
+ break;
+ case 'r':
+ append('\r');
+ break;
+ case 't':
+ append('\t');
+ break;
+ case 'u':
+ ensureMin(4);
+ int cp = readHex();
+ if (Character.isHighSurrogate((char)cp)) {
+ handleLowSurrogate((char)cp);
+ } else if (Character.isLowSurrogate((char)cp)) {
+ // low surrogate with no high surrogate
+ throw invalidUtf8();
+ } else {
+ writeUtf8Char(cp);
+ }
+ break;
+ default: // '\\', '"', '/'...
+ quoteStart();
+ }
+ }
+
+ private void handleLowSurrogate(char highSurrogate) {
+ surrogatePairStart = charStart;
+ ensureMin(1);
+ int lowSurrogate = readUtf8Char();
+
+ if (lowSurrogate == '\\') {
+ ensureMin(5);
+ if (readUtf8Char() != 'u') throw invalidUtf8();
+ lowSurrogate = readHex();
+ }
+
+ if (Character.isLowSurrogate((char)lowSurrogate)) {
+ writeUtf8Char(Character.toCodePoint(highSurrogate,
+ (char)lowSurrogate));
+ surrogatePairStart = -1;
+ } else {
+ throw invalidUtf8();
+ }
+ }
+
+ private void writeUtf8Char(int codePoint) {
+ if (codePoint < 0x80) {
+ append(codePoint);
+ } else if (codePoint < 0x800) {
+ aux[0] = (byte)(0xc0 | (codePoint >>> 6));
+ aux[1] = tailByte(codePoint & 0x3f);
+ append(aux, 0, 2);
+ } else if (codePoint < 0x10000) {
+ aux[0] = (byte)(0xe0 | (codePoint >>> 12));
+ aux[1] = tailByte(codePoint >>> 6);
+ aux[2] = tailByte(codePoint);
+ append(aux, 0, 3);
+ } else {
+ aux[0] = (byte)(0xf0 | codePoint >>> 18);
+ aux[1] = tailByte(codePoint >>> 12);
+ aux[2] = tailByte(codePoint >>> 6);
+ aux[3] = tailByte(codePoint);
+ append(aux, 0, 4);
+ }
+ }
+
+ private byte tailByte(int value) {
+ return (byte)(0x80 | (value & 0x3f));
+ }
+
+ /**
+ * Reads a 4-digit unsigned hexadecimal number from the source.
+ */
+ private int readHex() {
+ int numberStart = pos;
+ int result = 0;
+ int length = 4;
+ for (int i = 0; i < length; i++) {
+ int digit = readUtf8Char();
+ int digitValue;
+ if (digit >= '0' && digit <= '9') {
+ digitValue = digit - '0';
+ } else if (digit >= 'a' && digit <= 'f') {
+ digitValue = 10 + digit - 'a';
+ } else if (digit >= 'A' && digit <= 'F') {
+ digitValue = 10 + digit - 'A';
+ } else {
+ throw new NumberFormatException("Invalid base 16 number "
+ + src.subSequence(numberStart, numberStart + length));
+ }
+ result = result * 16 + digitValue;
+ }
+ return result;
+ }
+
+ @Override
+ protected RaiseException invalidUtf8() {
+ ByteList message = new ByteList(
+ ByteList.plain("partial character in source, " +
+ "but hit end near "));
+ int start = surrogatePairStart != -1 ? surrogatePairStart : charStart;
+ message.append(src, start, srcEnd - start);
+ return Utils.newException(context, Utils.M_PARSER_ERROR,
+ context.getRuntime().newString(message));
+ }
+}