summaryrefslogtreecommitdiff
path: root/lib/d
diff options
context:
space:
mode:
authorJens Geyer <jensg@apache.org>2015-10-14 00:17:28 +0200
committerJens Geyer <jensg@apache.org>2015-10-14 00:17:28 +0200
commitbb272dbcd5956f727a604f9643bc87b8fd5e1ed4 (patch)
tree639afa485d35fa2198c57f4719ed08f216b7d536 /lib/d
parent673c44bc11250cf5ed891d7855e915de5ed70ed8 (diff)
downloadthrift-bb272dbcd5956f727a604f9643bc87b8fd5e1ed4.tar.gz
THRIFT-2412 UTF-8 sent by PHP as JSON is not understood
Client: D Author: Phongphan Phuttha <phongphan@acm.org> This closes #650
Diffstat (limited to 'lib/d')
-rw-r--r--lib/d/src/thrift/protocol/json.d64
1 files changed, 59 insertions, 5 deletions
diff --git a/lib/d/src/thrift/protocol/json.d b/lib/d/src/thrift/protocol/json.d
index ed8f9c0b9..223d3a361 100644
--- a/lib/d/src/thrift/protocol/json.d
+++ b/lib/d/src/thrift/protocol/json.d
@@ -26,6 +26,7 @@ import std.range;
import std.string : format;
import std.traits : isIntegral;
import std.typetuple : allSatisfy, TypeTuple;
+import std.utf : toUTF8;
import thrift.protocol.base;
import thrift.transport.base;
@@ -492,12 +493,15 @@ private:
return readSyntaxChar(reader_, ch);
}
- ubyte readJsonEscapeChar() {
- readJsonSyntaxChar(ZERO_CHAR);
- readJsonSyntaxChar(ZERO_CHAR);
+ wchar readJsonEscapeChar() {
auto a = reader_.read();
auto b = reader_.read();
- return cast(ubyte)((hexVal(a[0]) << 4) + hexVal(b[0]));
+ auto c = reader_.read();
+ auto d = reader_.read();
+ return cast(ushort)(
+ (hexVal(a[0]) << 12) + (hexVal(b[0]) << 8) +
+ (hexVal(c[0]) << 4) + hexVal(d[0])
+ );
}
string readJsonString(bool skipContext = false) {
@@ -506,6 +510,7 @@ private:
readJsonSyntaxChar(STRING_DELIMITER);
auto buffer = appender!string();
+ wchar[] wchs;
int bytesRead;
while (true) {
auto ch = reader_.read();
@@ -521,7 +526,18 @@ private:
if (ch == BACKSLASH) {
ch = reader_.read();
if (ch == ESCAPE_CHAR) {
- ch = readJsonEscapeChar();
+ auto wch = readJsonEscapeChar();
+ if (wch >= 0xD800 && wch <= 0xDBFF) {
+ wchs ~= wch;
+ } else if (wch >= 0xDC00 && wch <= 0xDFFF && wchs.length == 0) {
+ throw new TProtocolException("Missing UTF-16 high surrogate.",
+ TProtocolException.Type.INVALID_DATA);
+ } else {
+ wchs ~= wch;
+ buffer.put(wchs.toUTF8);
+ wchs = [];
+ }
+ continue;
} else {
auto pos = countUntil(kEscapeChars[], ch[0]);
if (pos == -1) {
@@ -531,9 +547,17 @@ private:
ch = kEscapeCharVals[pos];
}
}
+ if (wchs.length != 0) {
+ throw new TProtocolException("Missing UTF-16 low surrogate.",
+ TProtocolException.Type.INVALID_DATA);
+ }
buffer.put(ch[0]);
}
+ if (wchs.length != 0) {
+ throw new TProtocolException("Missing UTF-16 low surrogate.",
+ TProtocolException.Type.INVALID_DATA);
+ }
return buffer.data;
}
@@ -772,6 +796,36 @@ unittest {
}
unittest {
+ import std.exception;
+ import thrift.transport.memory;
+
+ auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\ud835\\udd3e\"");
+ auto json = tJsonProtocol(buf);
+ auto str = json.readString();
+ enforce(str == "ก 𝔾");
+}
+
+unittest {
+ // Thrown if low surrogate is missing.
+ import std.exception;
+ import thrift.transport.memory;
+
+ auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\ud835\"");
+ auto json = tJsonProtocol(buf);
+ assertThrown!TProtocolException(json.readString());
+}
+
+unittest {
+ // Thrown if high surrogate is missing.
+ import std.exception;
+ import thrift.transport.memory;
+
+ auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\udd3e\"");
+ auto json = tJsonProtocol(buf);
+ assertThrown!TProtocolException(json.readString());
+}
+
+unittest {
import thrift.internal.test.protocol;
testContainerSizeLimit!(TJsonProtocol!())();
testStringSizeLimit!(TJsonProtocol!())();