summaryrefslogtreecommitdiff
path: root/lib/javame
diff options
context:
space:
mode:
authorPhongphan Phuttha <phongphan.p@gmail.com>2015-11-18 19:44:30 +0700
committerNobuaki Sukegawa <nsuke@apache.org>2015-11-23 17:09:27 +0900
commit69826b21ec2e220254526063ae7fa2d3c118a6f4 (patch)
tree86388f87ce5509b09b6fa241a330e05af89cb767 /lib/javame
parent0ad6ee95e002f41dd628d4044f901468f43ffc32 (diff)
downloadthrift-69826b21ec2e220254526063ae7fa2d3c118a6f4.tar.gz
THRIFT-2410: Add UTF-16 to UTF-8 converter and use for convert the escaped unicode character to UTF-8 byte array.
Client: Java ME Patch: Phongphan Phuttha This closes #702
Diffstat (limited to 'lib/javame')
-rw-r--r--lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java88
1 files changed, 84 insertions, 4 deletions
diff --git a/lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java b/lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java
index 99e3d2a47..d3916863b 100644
--- a/lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java
+++ b/lib/javame/src/org/apache/thrift/protocol/TJSONProtocol.java
@@ -19,6 +19,7 @@
package org.apache.thrift.protocol;
+import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Stack;
@@ -348,6 +349,47 @@ public class TJSONProtocol extends TProtocol {
}
}
+ private static boolean isHighSurrogate(char c) {
+ return c >= '\uD800' && c <= '\uDBFF';
+ }
+
+ private static boolean isLowSurrogate(char c) {
+ return c >= '\uDC00' && c <= '\uDFFF';
+ }
+
+ private static byte[] toUTF8(int codepoint) {
+ final int[] FIRST_BYTE_MASK = { 0, 0xc0, 0xe0, 0xf0 };
+ int length = 0;
+ if (codepoint <= 0x7f) length = 1;
+ else if (codepoint <= 0x7ff) length = 2;
+ else if (codepoint <= 0xffff) length = 3;
+ else if (codepoint <= 0x1fffff) length = 4;
+ else throw new RuntimeException("Code point over U+1FFFFF is not supported");
+
+ byte[] bytes = new byte[length];
+ switch (length) {
+ case 4:
+ bytes[3] = (byte)((codepoint & 0x3f) | 0x80);
+ codepoint >>= 6;
+ case 3:
+ bytes[2] = (byte)((codepoint & 0x3f) | 0x80);
+ codepoint >>= 6;
+ case 2:
+ bytes[1] = (byte)((codepoint & 0x3f) | 0x80);
+ codepoint >>= 6;
+ case 1:
+ bytes[0] = (byte)(codepoint | FIRST_BYTE_MASK[length - 1]);
+ }
+
+ return bytes;
+ }
+
+ private static byte[] toUTF8(int high, int low) {
+ int codepoint = (1 << 16) + ((high & 0x3ff) << 10);
+ codepoint += low & 0x3ff;
+ return toUTF8(codepoint);
+ }
+
// Write the bytes in array buf as a JSON characters, escaping as needed
private void writeJSONString(byte[] b) throws TException {
context_.write();
@@ -596,6 +638,7 @@ public class TJSONProtocol extends TProtocol {
private TByteArrayOutputStream readJSONString(boolean skipContext)
throws TException {
TByteArrayOutputStream arr = new TByteArrayOutputStream(DEF_STRING_SIZE);
+ int highSurrogate = 0;
if (!skipContext) {
context_.read();
}
@@ -608,10 +651,42 @@ public class TJSONProtocol extends TProtocol {
if (ch == ESCSEQ[0]) {
ch = reader_.read();
if (ch == ESCSEQ[1]) {
- readJSONSyntaxChar(ZERO);
- readJSONSyntaxChar(ZERO);
- trans_.readAll(tmpbuf_, 0, 2);
- ch = (byte)((hexVal(tmpbuf_[0]) << 4) + hexVal(tmpbuf_[1]));
+ trans_.readAll(tmpbuf_, 0, 4);
+ short cu = (short)(
+ ((short)hexVal(tmpbuf_[0]) << 12) +
+ ((short)hexVal(tmpbuf_[1]) << 8) +
+ ((short)hexVal(tmpbuf_[2]) << 4) +
+ (short)hexVal(tmpbuf_[3]));
+ try {
+ if (isHighSurrogate((char)cu)) {
+ if (highSurrogate != 0) {
+ throw new TProtocolException(TProtocolException.INVALID_DATA,
+ "Expected low surrogate char");
+ }
+ highSurrogate = cu;
+ }
+ else if (isLowSurrogate((char)cu)) {
+ if (highSurrogate == 0) {
+ throw new TProtocolException(TProtocolException.INVALID_DATA,
+ "Expected high surrogate char");
+ }
+
+ arr.write(toUTF8(highSurrogate, cu));
+ highSurrogate = 0;
+ }
+ else {
+ arr.write(toUTF8(cu));
+ }
+ continue;
+ }
+ catch (UnsupportedEncodingException ex) {
+ throw new TProtocolException(TProtocolException.NOT_IMPLEMENTED,
+ "JVM does not support UTF-8");
+ }
+ catch (IOException ex) {
+ throw new TProtocolException(TProtocolException.INVALID_DATA,
+ "Invalid unicode sequence");
+ }
}
else {
int off = ESCAPE_CHARS.indexOf(ch);
@@ -624,6 +699,11 @@ public class TJSONProtocol extends TProtocol {
}
arr.write(ch);
}
+
+ if (highSurrogate != 0) {
+ throw new TProtocolException(TProtocolException.INVALID_DATA,
+ "Expected low surrogate char");
+ }
return arr;
}