started to build jruby extension with Rakefile

author: Florian Frank <flori@ping.de> 2010-09-22 22:21:02 +0200
committer: Florian Frank <flori@ping.de> 2010-09-23 01:16:01 +0200
commit: e3fe104e7d5ec184aac36128aed2d217cb655dfc (patch)
tree: 3a63dc0152effdb990defcd5c935e38209649a8f /java/src/json/ext/StringEncoder.java
parent: 2c0f8d2c9b15a33b8d10ffcb1959aef54d320b57 (diff)
download: json-e3fe104e7d5ec184aac36128aed2d217cb655dfc.tar.gz
1 files changed, 106 insertions, 0 deletions
diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java
new file mode 100644
index 0000000..57bd19b
--- /dev/null
+++ b/java/src/json/ext/StringEncoder.java
@@ -0,0 +1,106 @@
+package json.ext;
+
+import org.jruby.exceptions.RaiseException;
+import org.jruby.runtime.ThreadContext;
+import org.jruby.util.ByteList;
+
+/**
+ * An encoder that reads from the given source and outputs its representation
+ * to another ByteList. The source string is fully checked for UTF-8 validity,
+ * and throws a GeneratorError if any problem is found.
+ */
+final class StringEncoder extends ByteListTranscoder {
+    private final boolean asciiOnly;
+
+    // Escaped characters will reuse this array, to avoid new allocations
+    // or appending them byte-by-byte
+    private final byte[] aux =
+        new byte[] {/* First unicode character */
+                    '\\', 'u', 0, 0, 0, 0,
+                    /* Second unicode character (for surrogate pairs) */
+                    '\\', 'u', 0, 0, 0, 0,
+                    /* "\X" characters */
+                    '\\', 0};
+    // offsets on the array above
+    private static final int ESCAPE_UNI1_OFFSET = 0;
+    private static final int ESCAPE_UNI2_OFFSET = ESCAPE_UNI1_OFFSET + 6;
+    private static final int ESCAPE_CHAR_OFFSET = ESCAPE_UNI2_OFFSET + 6;
+    /** Array used for code point decomposition in surrogates */
+    private final char[] utf16 = new char[2];
+
+    private static final byte[] HEX =
+            new byte[] {'0', '1', '2', '3', '4', '5', '6', '7',
+                        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
+
+    StringEncoder(ThreadContext context, boolean asciiOnly) {
+        super(context);
+        this.asciiOnly = asciiOnly;
+    }
+
+    void encode(ByteList src, ByteList out) {
+        init(src, out);
+        append('"');
+        while (hasNext()) {
+            handleChar(readUtf8Char());
+        }
+        quoteStop(pos);
+        append('"');
+    }
+
+    private void handleChar(int c) {
+        switch (c) {
+        case '"':
+        case '\\':
+            escapeChar((char)c);
+            break;
+        case '\n':
+            escapeChar('n');
+            break;
+        case '\r':
+            escapeChar('r');
+            break;
+        case '\t':
+            escapeChar('t');
+            break;
+        case '\f':
+            escapeChar('f');
+            break;
+        case '\b':
+            escapeChar('b');
+            break;
+        default:
+            if (c >= 0x20 && c <= 0x7f ||
+                    (c >= 0x80 && !asciiOnly)) {
+                quoteStart();
+            } else {
+                quoteStop(charStart);
+                escapeUtf8Char(c);
+            }
+        }
+    }
+
+    private void escapeChar(char c) {
+        quoteStop(charStart);
+        aux[ESCAPE_CHAR_OFFSET + 1] = (byte)c;
+        append(aux, ESCAPE_CHAR_OFFSET, 2);
+    }
+
+    private void escapeUtf8Char(int codePoint) {
+        int numChars = Character.toChars(codePoint, utf16, 0);
+        escapeCodeUnit(utf16[0], ESCAPE_UNI1_OFFSET + 2);
+        if (numChars > 1) escapeCodeUnit(utf16[1], ESCAPE_UNI2_OFFSET + 2);
+        append(aux, ESCAPE_UNI1_OFFSET, 6 * numChars);
+    }
+
+    private void escapeCodeUnit(char c, int auxOffset) {
+        for (int i = 0; i < 4; i++) {
+            aux[auxOffset + i] = HEX[(c >>> (12 - 4 * i)) & 0xf];
+        }
+    }
+
+    @Override
+    protected RaiseException invalidUtf8() {
+         return Utils.newException(context, Utils.M_GENERATOR_ERROR,
+                 "source sequence is illegal/malformed utf-8");
+    }
+}
author	Florian Frank <flori@ping.de>	2010-09-22 22:21:02 +0200
committer	Florian Frank <flori@ping.de>	2010-09-23 01:16:01 +0200
commit	e3fe104e7d5ec184aac36128aed2d217cb655dfc (patch)
tree	3a63dc0152effdb990defcd5c935e38209649a8f /java/src/json/ext/StringEncoder.java
parent	2c0f8d2c9b15a33b8d10ffcb1959aef54d320b57 (diff)
download	json-e3fe104e7d5ec184aac36128aed2d217cb655dfc.tar.gz