diff options
author | Florian Frank <flori@ping.de> | 2010-09-22 22:21:02 +0200 |
---|---|---|
committer | Florian Frank <flori@ping.de> | 2010-09-23 01:16:01 +0200 |
commit | e3fe104e7d5ec184aac36128aed2d217cb655dfc (patch) | |
tree | 3a63dc0152effdb990defcd5c935e38209649a8f /java/src/json/ext/StringEncoder.java | |
parent | 2c0f8d2c9b15a33b8d10ffcb1959aef54d320b57 (diff) | |
download | json-e3fe104e7d5ec184aac36128aed2d217cb655dfc.tar.gz |
started to build jruby extension with Rakefile
Diffstat (limited to 'java/src/json/ext/StringEncoder.java')
-rw-r--r-- | java/src/json/ext/StringEncoder.java | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java new file mode 100644 index 0000000..57bd19b --- /dev/null +++ b/java/src/json/ext/StringEncoder.java @@ -0,0 +1,106 @@ +package json.ext; + +import org.jruby.exceptions.RaiseException; +import org.jruby.runtime.ThreadContext; +import org.jruby.util.ByteList; + +/** + * An encoder that reads from the given source and outputs its representation + * to another ByteList. The source string is fully checked for UTF-8 validity, + * and throws a GeneratorError if any problem is found. + */ +final class StringEncoder extends ByteListTranscoder { + private final boolean asciiOnly; + + // Escaped characters will reuse this array, to avoid new allocations + // or appending them byte-by-byte + private final byte[] aux = + new byte[] {/* First unicode character */ + '\\', 'u', 0, 0, 0, 0, + /* Second unicode character (for surrogate pairs) */ + '\\', 'u', 0, 0, 0, 0, + /* "\X" characters */ + '\\', 0}; + // offsets on the array above + private static final int ESCAPE_UNI1_OFFSET = 0; + private static final int ESCAPE_UNI2_OFFSET = ESCAPE_UNI1_OFFSET + 6; + private static final int ESCAPE_CHAR_OFFSET = ESCAPE_UNI2_OFFSET + 6; + /** Array used for code point decomposition in surrogates */ + private final char[] utf16 = new char[2]; + + private static final byte[] HEX = + new byte[] {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + + StringEncoder(ThreadContext context, boolean asciiOnly) { + super(context); + this.asciiOnly = asciiOnly; + } + + void encode(ByteList src, ByteList out) { + init(src, out); + append('"'); + while (hasNext()) { + handleChar(readUtf8Char()); + } + quoteStop(pos); + append('"'); + } + + private void handleChar(int c) { + switch (c) { + case '"': + case '\\': + escapeChar((char)c); + break; + case '\n': + escapeChar('n'); + break; + case '\r': + escapeChar('r'); + break; + case '\t': + escapeChar('t'); + break; + case '\f': + escapeChar('f'); + break; + case '\b': + escapeChar('b'); + break; + default: + if (c >= 0x20 && c <= 0x7f || + (c >= 0x80 && !asciiOnly)) { + quoteStart(); + } else { + quoteStop(charStart); + escapeUtf8Char(c); + } + } + } + + private void escapeChar(char c) { + quoteStop(charStart); + aux[ESCAPE_CHAR_OFFSET + 1] = (byte)c; + append(aux, ESCAPE_CHAR_OFFSET, 2); + } + + private void escapeUtf8Char(int codePoint) { + int numChars = Character.toChars(codePoint, utf16, 0); + escapeCodeUnit(utf16[0], ESCAPE_UNI1_OFFSET + 2); + if (numChars > 1) escapeCodeUnit(utf16[1], ESCAPE_UNI2_OFFSET + 2); + append(aux, ESCAPE_UNI1_OFFSET, 6 * numChars); + } + + private void escapeCodeUnit(char c, int auxOffset) { + for (int i = 0; i < 4; i++) { + aux[auxOffset + i] = HEX[(c >>> (12 - 4 * i)) & 0xf]; + } + } + + @Override + protected RaiseException invalidUtf8() { + return Utils.newException(context, Utils.M_GENERATOR_ERROR, + "source sequence is illegal/malformed utf-8"); + } +} |