summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBryan Duxbury <bryanduxbury@apache.org>2010-05-01 13:45:38 +0000
committerBryan Duxbury <bryanduxbury@apache.org>2010-05-01 13:45:38 +0000
commit06491d7bb591f8b55d4be6b116b246c56af66511 (patch)
treebe20e37391a27825bfdca6f29b42821943d744ea
parentadf5f19c541deeeb5fc9f695c61378e4490745a7 (diff)
downloadthrift-06491d7bb591f8b55d4be6b116b246c56af66511.tar.gz
THRIFT-765. java: Revert the changes applied by THRIFT-765, as they appear to be unstable
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@940013 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--lib/java/src/org/apache/thrift/Utf8Helper.java124
-rw-r--r--lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java36
-rwxr-xr-xlib/java/src/org/apache/thrift/protocol/TCompactProtocol.java26
-rw-r--r--lib/java/test/org/apache/thrift/BenchStringEncoding.java67
-rw-r--r--lib/java/test/org/apache/thrift/TestUtf8Helper.java74
5 files changed, 41 insertions, 286 deletions
diff --git a/lib/java/src/org/apache/thrift/Utf8Helper.java b/lib/java/src/org/apache/thrift/Utf8Helper.java
deleted file mode 100644
index 2d3fd267b..000000000
--- a/lib/java/src/org/apache/thrift/Utf8Helper.java
+++ /dev/null
@@ -1,124 +0,0 @@
-package org.apache.thrift;
-
-public final class Utf8Helper {
- private Utf8Helper() {}
-
- public static final int getByteLength(final String s) {
- int byteLength = 0;
- int codePoint;
- for (int i = 0; i < s.length(); i++) {
- codePoint = s.charAt(i);
- if (codePoint >= 0x07FF) {
- codePoint = s.codePointAt(i);
- if (Character.isSupplementaryCodePoint(codePoint)) {
- i++;
- }
- }
- if (codePoint >= 0 && codePoint <= 0x007F) {
- byteLength++;
- } else if (codePoint >= 0x80 && codePoint <= 0x07FF) {
- byteLength += 2;
- } else if ((codePoint >= 0x0800 && codePoint < 0xD800) || (codePoint > 0xDFFF && codePoint <= 0xFFFD)) {
- byteLength+=3;
- } else if (codePoint >= 0x10000 && codePoint <= 0x10FFFF) {
- byteLength+=4;
- } else {
- throw new RuntimeException("Unknown unicode codepoint in string! "
- + Integer.toHexString(codePoint));
- }
- }
- return byteLength;
- }
-
- public static byte[] encode(String s) {
- byte[] buf = new byte[getByteLength(s)];
- encode(s, buf, 0);
- return buf;
- }
-
- public static void encode(final String s, final byte[] buf, final int offset) {
- int nextByte = 0;
- int codePoint;
- final int strLen = s.length();
- for (int i = 0; i < strLen; i++) {
- codePoint = s.charAt(i);
- if (codePoint >= 0x07FF) {
- codePoint = s.codePointAt(i);
- if (Character.isSupplementaryCodePoint(codePoint)) {
- i++;
- }
- }
- if (codePoint <= 0x007F) {
- buf[offset + nextByte] = (byte)codePoint;
- nextByte++;
- } else if (codePoint <= 0x7FF) {
- buf[offset + nextByte ] = (byte)(0xC0 | ((codePoint >> 6) & 0x1F));
- buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >> 0) & 0x3F));
- nextByte+=2;
- } else if ((codePoint < 0xD800) || (codePoint > 0xDFFF && codePoint <= 0xFFFD)) {
- buf[offset + nextByte ] = (byte)(0xE0 | ((codePoint >> 12) & 0x0F));
- buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >> 6) & 0x3F));
- buf[offset + nextByte + 2] = (byte)(0x80 | ((codePoint >> 0) & 0x3F));
- nextByte+=3;
- } else if (codePoint >= 0x10000 && codePoint <= 0x10FFFF) {
- buf[offset + nextByte ] = (byte)(0xF0 | ((codePoint >> 18) & 0x07));
- buf[offset + nextByte + 1] = (byte)(0x80 | ((codePoint >> 12) & 0x3F));
- buf[offset + nextByte + 2] = (byte)(0x80 | ((codePoint >> 6) & 0x3F));
- buf[offset + nextByte + 3] = (byte)(0x80 | ((codePoint >> 0) & 0x3F));
- nextByte+=4;
- } else {
- throw new RuntimeException("Unknown unicode codepoint in string! "
- + Integer.toHexString(codePoint));
- }
- }
- }
-
- public static String decode(byte[] buf) {
- char[] charBuf = new char[buf.length];
- int charsDecoded = decode(buf, 0, buf.length, charBuf);
- return new String(charBuf, 0, charsDecoded);
- }
-
- public static final int UNI_SUR_HIGH_START = 0xD800;
- public static final int UNI_SUR_HIGH_END = 0xDBFF;
- public static final int UNI_SUR_LOW_START = 0xDC00;
- public static final int UNI_SUR_LOW_END = 0xDFFF;
- public static final int UNI_REPLACEMENT_CHAR = 0xFFFD;
-
- private static final int HALF_BASE = 0x0010000;
- private static final long HALF_SHIFT = 10;
- private static final long HALF_MASK = 0x3FFL;
-
- public static int decode(final byte[] buf, final int offset, final int byteLength, final char[] charBuf) {
- int curByteIdx = offset;
- int endByteIdx = offset + byteLength;
-
- int curCharIdx = 0;
-
- while (curByteIdx < endByteIdx) {
- final int b = buf[curByteIdx++]&0xff;
- final int ch;
-
- if (b < 0xC0) {
- ch = b;
- } else if (b < 0xE0) {
- ch = ((b & 0x1F) << 6) + (buf[curByteIdx++] & 0x3F);
- } else if (b < 0xf0) {
- ch = ((b & 0xF) << 12) + ((buf[curByteIdx++] & 0x3F) << 6) + (buf[curByteIdx++] & 0x3F);
- } else {
- ch = ((b & 0x7) << 18) + ((buf[curByteIdx++]& 0x3F) << 12) + ((buf[curByteIdx++] & 0x3F) << 6) + (buf[curByteIdx++] & 0x3F);
- }
-
- if (ch <= 0xFFFF) {
- // target is a character <= 0xFFFF
- charBuf[curCharIdx++] = (char) ch;
- } else {
- // target is a character in range 0xFFFF - 0x10FFFF
- final int chHalf = ch - HALF_BASE;
- charBuf[curCharIdx++] = (char) ((chHalf >> HALF_SHIFT) + UNI_SUR_HIGH_START);
- charBuf[curCharIdx++] = (char) ((chHalf & HALF_MASK) + UNI_SUR_LOW_START);
- }
- }
- return curCharIdx;
- }
-}
diff --git a/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java b/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java
index 9e7634806..1cfa69dc2 100644
--- a/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java
+++ b/lib/java/src/org/apache/thrift/protocol/TBinaryProtocol.java
@@ -19,8 +19,9 @@
package org.apache.thrift.protocol;
+import java.io.UnsupportedEncodingException;
+
import org.apache.thrift.TException;
-import org.apache.thrift.Utf8Helper;
import org.apache.thrift.transport.TTransport;
/**
@@ -179,9 +180,13 @@ public class TBinaryProtocol extends TProtocol {
}
public void writeString(String str) throws TException {
- byte[] dat = Utf8Helper.encode(str);
- writeI32(dat.length);
- trans_.write(dat, 0, dat.length);
+ try {
+ byte[] dat = str.getBytes("UTF-8");
+ writeI32(dat.length);
+ trans_.write(dat, 0, dat.length);
+ } catch (UnsupportedEncodingException uex) {
+ throw new TException("JVM DOES NOT SUPPORT UTF-8");
+ }
}
public void writeBinary(byte[] bin) throws TException {
@@ -328,20 +333,27 @@ public class TBinaryProtocol extends TProtocol {
int size = readI32();
if (trans_.getBytesRemainingInBuffer() >= size) {
- char[] charBuf = new char[size];
- int charsDecoded = Utf8Helper.decode(trans_.getBuffer(), trans_.getBufferPosition(), size, charBuf);
- trans_.consumeBuffer(size);
- return new String(charBuf, 0, charsDecoded);
+ try {
+ String s = new String(trans_.getBuffer(), trans_.getBufferPosition(), size, "UTF-8");
+ trans_.consumeBuffer(size);
+ return s;
+ } catch (UnsupportedEncodingException e) {
+ throw new TException("JVM DOES NOT SUPPORT UTF-8");
+ }
}
return readStringBody(size);
}
public String readStringBody(int size) throws TException {
- checkReadLength(size);
- byte[] buf = new byte[size];
- trans_.readAll(buf, 0, size);
- return Utf8Helper.decode(buf);
+ try {
+ checkReadLength(size);
+ byte[] buf = new byte[size];
+ trans_.readAll(buf, 0, size);
+ return new String(buf, "UTF-8");
+ } catch (UnsupportedEncodingException uex) {
+ throw new TException("JVM DOES NOT SUPPORT UTF-8");
+ }
}
public byte[] readBinary() throws TException {
diff --git a/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java b/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java
index e81ed828c..f4979423e 100755
--- a/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java
+++ b/lib/java/src/org/apache/thrift/protocol/TCompactProtocol.java
@@ -20,9 +20,10 @@
package org.apache.thrift.protocol;
+import java.io.UnsupportedEncodingException;
+
import org.apache.thrift.ShortStack;
import org.apache.thrift.TException;
-import org.apache.thrift.Utf8Helper;
import org.apache.thrift.transport.TTransport;
/**
@@ -292,7 +293,11 @@ public final class TCompactProtocol extends TProtocol {
* Write a string to the wire with a varint size preceeding.
*/
public void writeString(String str) throws TException {
- writeBinary(Utf8Helper.encode(str));
+ try {
+ writeBinary(str.getBytes("UTF-8"));
+ } catch (UnsupportedEncodingException e) {
+ throw new TException("UTF-8 not supported!");
+ }
}
/**
@@ -605,13 +610,16 @@ public final class TCompactProtocol extends TProtocol {
return "";
}
- if (trans_.getBytesRemainingInBuffer() >= length) {
- char[] charBuf = new char[length];
- int charsDecoded = Utf8Helper.decode(trans_.getBuffer(), trans_.getBufferPosition(), length, charBuf);
- trans_.consumeBuffer(length);
- return new String(charBuf, 0, charsDecoded);
- } else {
- return Utf8Helper.decode(readBinary(length));
+ try {
+ if (trans_.getBytesRemainingInBuffer() >= length) {
+ String str = new String(trans_.getBuffer(), trans_.getBufferPosition(), length, "UTF-8");
+ trans_.consumeBuffer(length);
+ return str;
+ } else {
+ return new String(readBinary(length), "UTF-8");
+ }
+ } catch (UnsupportedEncodingException e) {
+ throw new TException("UTF-8 not supported!");
}
}
diff --git a/lib/java/test/org/apache/thrift/BenchStringEncoding.java b/lib/java/test/org/apache/thrift/BenchStringEncoding.java
deleted file mode 100644
index 3ae22c773..000000000
--- a/lib/java/test/org/apache/thrift/BenchStringEncoding.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package org.apache.thrift;
-
-import java.io.UnsupportedEncodingException;
-
-public class BenchStringEncoding {
- private static final String STRING = "a moderately long (but not overly long) string";
- private static final int HOW_MANY = 100000;
- private static final byte[] BYTES;
- static {
- try {
- BYTES = STRING.getBytes("UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
- }
-
- public static void main(String[] args) throws UnsupportedEncodingException {
- for (int trial = 0; trial < 5; trial++) {
- benchGetBytes();
- benchFromBytes();
- benchEncode();
- benchDecode();
- }
- }
-
- private static void benchDecode() {
- char[] charBuf = new char[256];
- long start = System.currentTimeMillis();
- for (int i = 0; i < HOW_MANY; i++) {
- Utf8Helper.decode(BYTES, 0, BYTES.length, charBuf);
- }
- long end = System.currentTimeMillis();
- System.out.println("decode: decode: " + (end-start) + "ms");
- }
-
- private static void benchFromBytes() {
- long start = System.currentTimeMillis();
- for (int i = 0; i < HOW_MANY; i++) {
- try {
- new String(BYTES, "UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
- }
- long end = System.currentTimeMillis();
- System.out.println("decode: fromBytes: " + (end-start) + "ms");
- }
-
- private static void benchEncode() {
- long start = System.currentTimeMillis();
- byte[] outbuf = new byte[256];
- for (int i = 0; i < HOW_MANY; i++) {
- Utf8Helper.encode(STRING, outbuf, 0);
- }
- long end = System.currentTimeMillis();
- System.out.println("encode: directEncode: " + (end-start) + "ms");
- }
-
- private static void benchGetBytes() throws UnsupportedEncodingException {
- long start = System.currentTimeMillis();
- for (int i = 0; i < HOW_MANY; i++) {
- STRING.getBytes("UTF-8");
- }
- long end = System.currentTimeMillis();
- System.out.println("encode: getBytes(UTF-8): " + (end-start) + "ms");
- }
-}
diff --git a/lib/java/test/org/apache/thrift/TestUtf8Helper.java b/lib/java/test/org/apache/thrift/TestUtf8Helper.java
deleted file mode 100644
index bdfd35a41..000000000
--- a/lib/java/test/org/apache/thrift/TestUtf8Helper.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package org.apache.thrift;
-
-import java.io.UnsupportedEncodingException;
-import java.util.Arrays;
-
-import junit.framework.TestCase;
-
-public class TestUtf8Helper extends TestCase {
- private static final String NON_UNICODE_STRING = "here's some text";
-
- private static final byte[] kUnicodeBytes = {
- (byte)0xd3, (byte)0x80, (byte)0xe2, (byte)0x85, (byte)0xae, (byte)0xce,
- (byte)0x9d, (byte)0x20, (byte)0xd0, (byte)0x9d, (byte)0xce, (byte)0xbf,
- (byte)0xe2, (byte)0x85, (byte)0xbf, (byte)0xd0, (byte)0xbe, (byte)0xc9,
- (byte)0xa1, (byte)0xd0, (byte)0xb3, (byte)0xd0, (byte)0xb0, (byte)0xcf,
- (byte)0x81, (byte)0xe2, (byte)0x84, (byte)0x8e, (byte)0x20, (byte)0xce,
- (byte)0x91, (byte)0x74, (byte)0x74, (byte)0xce, (byte)0xb1, (byte)0xe2,
- (byte)0x85, (byte)0xbd, (byte)0xce, (byte)0xba, (byte)0x83, (byte)0xe2,
- (byte)0x80, (byte)0xbc
- };
-
- private static final String UNICODE_STRING = "abc\u5639\u563b";
- private static final byte[] UNICODE_STRING_BYTES;
-
- private static final String UNICODE_STRING_2;
- private static final byte[] UNICODE_STRING_BYTES_2;
-
- private static final String REALLY_WHACKY_ONE = "\uD841\uDC91";
- private static final byte[] REALLY_WHACKY_ONE_BYTES;
-
- private static final String TWO_CHAR_CHAR = "\uD801\uDC00";
- private static final byte[] TWO_CHAR_CHAR_BYTES;
-
- static {
- try {
- UNICODE_STRING_BYTES = UNICODE_STRING.getBytes("UTF-8");
- UNICODE_STRING_2 = new String(kUnicodeBytes, "UTF-8");
- UNICODE_STRING_BYTES_2 = UNICODE_STRING_2.getBytes("UTF-8");
- REALLY_WHACKY_ONE_BYTES = REALLY_WHACKY_ONE.getBytes("UTF-8");
- TWO_CHAR_CHAR_BYTES = TWO_CHAR_CHAR.getBytes("UTF-8");
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException(e);
- }
- }
-
-
- public void testEncode() throws Exception {
- byte[] bytes = NON_UNICODE_STRING.getBytes("UTF-8");
- byte[] otherBytes = Utf8Helper.encode(NON_UNICODE_STRING);
- assertTrue(Arrays.equals(bytes, otherBytes));
-
- otherBytes = Utf8Helper.encode(UNICODE_STRING);
- assertTrue(Arrays.equals(UNICODE_STRING_BYTES, otherBytes));
-
- otherBytes = Utf8Helper.encode(UNICODE_STRING_2);
- assertTrue(Arrays.equals(UNICODE_STRING_BYTES_2, otherBytes));
-
- otherBytes = Utf8Helper.encode(REALLY_WHACKY_ONE);
- assertTrue(Arrays.equals(REALLY_WHACKY_ONE_BYTES, otherBytes));
-
- otherBytes = Utf8Helper.encode(TWO_CHAR_CHAR);
- assertTrue(Arrays.equals(TWO_CHAR_CHAR_BYTES, otherBytes));
- }
-
- public void testDecode() throws Exception {
- byte[] bytes = NON_UNICODE_STRING.getBytes("UTF-8");
- assertEquals(NON_UNICODE_STRING, Utf8Helper.decode(bytes));
-
- assertEquals(UNICODE_STRING, Utf8Helper.decode(UNICODE_STRING_BYTES));
- assertEquals(UNICODE_STRING_2, Utf8Helper.decode(UNICODE_STRING_BYTES_2));
- assertEquals(REALLY_WHACKY_ONE, Utf8Helper.decode(REALLY_WHACKY_ONE_BYTES));
- assertEquals(TWO_CHAR_CHAR, Utf8Helper.decode(TWO_CHAR_CHAR_BYTES));
- }
-}