summaryrefslogtreecommitdiff
path: root/java/src/json/ext/StringEncoder.java
diff options
context:
space:
mode:
authorFlorian Frank <flori@ping.de>2010-09-22 22:21:02 +0200
committerFlorian Frank <flori@ping.de>2010-09-23 01:16:01 +0200
commite3fe104e7d5ec184aac36128aed2d217cb655dfc (patch)
tree3a63dc0152effdb990defcd5c935e38209649a8f /java/src/json/ext/StringEncoder.java
parent2c0f8d2c9b15a33b8d10ffcb1959aef54d320b57 (diff)
downloadjson-e3fe104e7d5ec184aac36128aed2d217cb655dfc.tar.gz
started to build jruby extension with Rakefile
Diffstat (limited to 'java/src/json/ext/StringEncoder.java')
-rw-r--r--java/src/json/ext/StringEncoder.java106
1 files changed, 106 insertions, 0 deletions
diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java
new file mode 100644
index 0000000..57bd19b
--- /dev/null
+++ b/java/src/json/ext/StringEncoder.java
@@ -0,0 +1,106 @@
+package json.ext;
+
+import org.jruby.exceptions.RaiseException;
+import org.jruby.runtime.ThreadContext;
+import org.jruby.util.ByteList;
+
+/**
+ * An encoder that reads from the given source and outputs its representation
+ * to another ByteList. The source string is fully checked for UTF-8 validity,
+ * and throws a GeneratorError if any problem is found.
+ */
+final class StringEncoder extends ByteListTranscoder {
+ private final boolean asciiOnly;
+
+ // Escaped characters will reuse this array, to avoid new allocations
+ // or appending them byte-by-byte
+ private final byte[] aux =
+ new byte[] {/* First unicode character */
+ '\\', 'u', 0, 0, 0, 0,
+ /* Second unicode character (for surrogate pairs) */
+ '\\', 'u', 0, 0, 0, 0,
+ /* "\X" characters */
+ '\\', 0};
+ // offsets on the array above
+ private static final int ESCAPE_UNI1_OFFSET = 0;
+ private static final int ESCAPE_UNI2_OFFSET = ESCAPE_UNI1_OFFSET + 6;
+ private static final int ESCAPE_CHAR_OFFSET = ESCAPE_UNI2_OFFSET + 6;
+ /** Array used for code point decomposition in surrogates */
+ private final char[] utf16 = new char[2];
+
+ private static final byte[] HEX =
+ new byte[] {'0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
+
+ StringEncoder(ThreadContext context, boolean asciiOnly) {
+ super(context);
+ this.asciiOnly = asciiOnly;
+ }
+
+ void encode(ByteList src, ByteList out) {
+ init(src, out);
+ append('"');
+ while (hasNext()) {
+ handleChar(readUtf8Char());
+ }
+ quoteStop(pos);
+ append('"');
+ }
+
+ private void handleChar(int c) {
+ switch (c) {
+ case '"':
+ case '\\':
+ escapeChar((char)c);
+ break;
+ case '\n':
+ escapeChar('n');
+ break;
+ case '\r':
+ escapeChar('r');
+ break;
+ case '\t':
+ escapeChar('t');
+ break;
+ case '\f':
+ escapeChar('f');
+ break;
+ case '\b':
+ escapeChar('b');
+ break;
+ default:
+ if (c >= 0x20 && c <= 0x7f ||
+ (c >= 0x80 && !asciiOnly)) {
+ quoteStart();
+ } else {
+ quoteStop(charStart);
+ escapeUtf8Char(c);
+ }
+ }
+ }
+
+ private void escapeChar(char c) {
+ quoteStop(charStart);
+ aux[ESCAPE_CHAR_OFFSET + 1] = (byte)c;
+ append(aux, ESCAPE_CHAR_OFFSET, 2);
+ }
+
+ private void escapeUtf8Char(int codePoint) {
+ int numChars = Character.toChars(codePoint, utf16, 0);
+ escapeCodeUnit(utf16[0], ESCAPE_UNI1_OFFSET + 2);
+ if (numChars > 1) escapeCodeUnit(utf16[1], ESCAPE_UNI2_OFFSET + 2);
+ append(aux, ESCAPE_UNI1_OFFSET, 6 * numChars);
+ }
+
+ private void escapeCodeUnit(char c, int auxOffset) {
+ for (int i = 0; i < 4; i++) {
+ aux[auxOffset + i] = HEX[(c >>> (12 - 4 * i)) & 0xf];
+ }
+ }
+
+ @Override
+ protected RaiseException invalidUtf8() {
+ return Utils.newException(context, Utils.M_GENERATOR_ERROR,
+ "source sequence is illegal/malformed utf-8");
+ }
+}