diff options
author | Phongphan Phuttha <phongphan.p@gmail.com> | 2015-11-18 19:44:30 +0700 |
---|---|---|
committer | Nobuaki Sukegawa <nsuke@apache.org> | 2015-11-23 17:09:27 +0900 |
commit | 69826b21ec2e220254526063ae7fa2d3c118a6f4 (patch) | |
tree | 86388f87ce5509b09b6fa241a330e05af89cb767 /lib/javame | |
parent | 0ad6ee95e002f41dd628d4044f901468f43ffc32 (diff) | |
download | thrift-69826b21ec2e220254526063ae7fa2d3c118a6f4.tar.gz |
THRIFT-2410: Add UTF-16 to UTF-8 converter and use for convert the escaped unicode character to UTF-8 byte array.
Client: Java ME
Patch: Phongphan Phuttha
This closes #702
Diffstat (limited to 'lib/javame')
-rw-r--r-- | lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java | 88 |
1 files changed, 84 insertions, 4 deletions
diff --git a/lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java b/lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java index 99e3d2a47..d3916863b 100644 --- a/lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java +++ b/lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java @@ -19,6 +19,7 @@ package org.apache.thrift.protocol; +import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.Stack; @@ -348,6 +349,47 @@ public class TJSONProtocol extends TProtocol { } } + private static boolean isHighSurrogate(char c) { + return c >= '\uD800' && c <= '\uDBFF'; + } + + private static boolean isLowSurrogate(char c) { + return c >= '\uDC00' && c <= '\uDFFF'; + } + + private static byte[] toUTF8(int codepoint) { + final int[] FIRST_BYTE_MASK = { 0, 0xc0, 0xe0, 0xf0 }; + int length = 0; + if (codepoint <= 0x7f) length = 1; + else if (codepoint <= 0x7ff) length = 2; + else if (codepoint <= 0xffff) length = 3; + else if (codepoint <= 0x1fffff) length = 4; + else throw new RuntimeException("Code point over U+1FFFFF is not supported"); + + byte[] bytes = new byte[length]; + switch (length) { + case 4: + bytes[3] = (byte)((codepoint & 0x3f) | 0x80); + codepoint >>= 6; + case 3: + bytes[2] = (byte)((codepoint & 0x3f) | 0x80); + codepoint >>= 6; + case 2: + bytes[1] = (byte)((codepoint & 0x3f) | 0x80); + codepoint >>= 6; + case 1: + bytes[0] = (byte)(codepoint | FIRST_BYTE_MASK[length - 1]); + } + + return bytes; + } + + private static byte[] toUTF8(int high, int low) { + int codepoint = (1 << 16) + ((high & 0x3ff) << 10); + codepoint += low & 0x3ff; + return toUTF8(codepoint); + } + // Write the bytes in array buf as a JSON characters, escaping as needed private void writeJSONString(byte[] b) throws TException { context_.write(); @@ -596,6 +638,7 @@ public class TJSONProtocol extends TProtocol { private TByteArrayOutputStream readJSONString(boolean skipContext) throws TException { TByteArrayOutputStream arr = new TByteArrayOutputStream(DEF_STRING_SIZE); + int highSurrogate = 0; if (!skipContext) { context_.read(); } @@ -608,10 +651,42 @@ public class TJSONProtocol extends TProtocol { if (ch == ESCSEQ[0]) { ch = reader_.read(); if (ch == ESCSEQ[1]) { - readJSONSyntaxChar(ZERO); - readJSONSyntaxChar(ZERO); - trans_.readAll(tmpbuf_, 0, 2); - ch = (byte)((hexVal(tmpbuf_[0]) << 4) + hexVal(tmpbuf_[1])); + trans_.readAll(tmpbuf_, 0, 4); + short cu = (short)( + ((short)hexVal(tmpbuf_[0]) << 12) + + ((short)hexVal(tmpbuf_[1]) << 8) + + ((short)hexVal(tmpbuf_[2]) << 4) + + (short)hexVal(tmpbuf_[3])); + try { + if (isHighSurrogate((char)cu)) { + if (highSurrogate != 0) { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Expected low surrogate char"); + } + highSurrogate = cu; + } + else if (isLowSurrogate((char)cu)) { + if (highSurrogate == 0) { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Expected high surrogate char"); + } + + arr.write(toUTF8(highSurrogate, cu)); + highSurrogate = 0; + } + else { + arr.write(toUTF8(cu)); + } + continue; + } + catch (UnsupportedEncodingException ex) { + throw new TProtocolException(TProtocolException.NOT_IMPLEMENTED, + "JVM does not support UTF-8"); + } + catch (IOException ex) { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Invalid unicode sequence"); + } } else { int off = ESCAPE_CHARS.indexOf(ch); @@ -624,6 +699,11 @@ public class TJSONProtocol extends TProtocol { } arr.write(ch); } + + if (highSurrogate != 0) { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Expected low surrogate char"); + } return arr; } |