diff options
author | Jens Geyer <jensg@apache.org> | 2015-10-14 00:17:28 +0200 |
---|---|---|
committer | Jens Geyer <jensg@apache.org> | 2015-10-14 00:17:28 +0200 |
commit | bb272dbcd5956f727a604f9643bc87b8fd5e1ed4 (patch) | |
tree | 639afa485d35fa2198c57f4719ed08f216b7d536 /lib/d | |
parent | 673c44bc11250cf5ed891d7855e915de5ed70ed8 (diff) | |
download | thrift-bb272dbcd5956f727a604f9643bc87b8fd5e1ed4.tar.gz |
THRIFT-2412 UTF-8 sent by PHP as JSON is not understood
Client: D
Author: Phongphan Phuttha <phongphan@acm.org>
This closes #650
Diffstat (limited to 'lib/d')
-rw-r--r-- | lib/d/src/thrift/protocol/json.d | 64 |
1 files changed, 59 insertions, 5 deletions
diff --git a/lib/d/src/thrift/protocol/json.d b/lib/d/src/thrift/protocol/json.d index ed8f9c0b9..223d3a361 100644 --- a/lib/d/src/thrift/protocol/json.d +++ b/lib/d/src/thrift/protocol/json.d @@ -26,6 +26,7 @@ import std.range; import std.string : format; import std.traits : isIntegral; import std.typetuple : allSatisfy, TypeTuple; +import std.utf : toUTF8; import thrift.protocol.base; import thrift.transport.base; @@ -492,12 +493,15 @@ private: return readSyntaxChar(reader_, ch); } - ubyte readJsonEscapeChar() { - readJsonSyntaxChar(ZERO_CHAR); - readJsonSyntaxChar(ZERO_CHAR); + wchar readJsonEscapeChar() { auto a = reader_.read(); auto b = reader_.read(); - return cast(ubyte)((hexVal(a[0]) << 4) + hexVal(b[0])); + auto c = reader_.read(); + auto d = reader_.read(); + return cast(ushort)( + (hexVal(a[0]) << 12) + (hexVal(b[0]) << 8) + + (hexVal(c[0]) << 4) + hexVal(d[0]) + ); } string readJsonString(bool skipContext = false) { @@ -506,6 +510,7 @@ private: readJsonSyntaxChar(STRING_DELIMITER); auto buffer = appender!string(); + wchar[] wchs; int bytesRead; while (true) { auto ch = reader_.read(); @@ -521,7 +526,18 @@ private: if (ch == BACKSLASH) { ch = reader_.read(); if (ch == ESCAPE_CHAR) { - ch = readJsonEscapeChar(); + auto wch = readJsonEscapeChar(); + if (wch >= 0xD800 && wch <= 0xDBFF) { + wchs ~= wch; + } else if (wch >= 0xDC00 && wch <= 0xDFFF && wchs.length == 0) { + throw new TProtocolException("Missing UTF-16 high surrogate.", + TProtocolException.Type.INVALID_DATA); + } else { + wchs ~= wch; + buffer.put(wchs.toUTF8); + wchs = []; + } + continue; } else { auto pos = countUntil(kEscapeChars[], ch[0]); if (pos == -1) { @@ -531,9 +547,17 @@ private: ch = kEscapeCharVals[pos]; } } + if (wchs.length != 0) { + throw new TProtocolException("Missing UTF-16 low surrogate.", + TProtocolException.Type.INVALID_DATA); + } buffer.put(ch[0]); } + if (wchs.length != 0) { + throw new TProtocolException("Missing UTF-16 low surrogate.", + TProtocolException.Type.INVALID_DATA); + } return buffer.data; } @@ -772,6 +796,36 @@ unittest { } unittest { + import std.exception; + import thrift.transport.memory; + + auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\ud835\\udd3e\""); + auto json = tJsonProtocol(buf); + auto str = json.readString(); + enforce(str == "ก 𝔾"); +} + +unittest { + // Thrown if low surrogate is missing. + import std.exception; + import thrift.transport.memory; + + auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\ud835\""); + auto json = tJsonProtocol(buf); + assertThrown!TProtocolException(json.readString()); +} + +unittest { + // Thrown if high surrogate is missing. + import std.exception; + import thrift.transport.memory; + + auto buf = new TMemoryBuffer(cast(ubyte[])"\"\\u0e01 \\udd3e\""); + auto json = tJsonProtocol(buf); + assertThrown!TProtocolException(json.readString()); +} + +unittest { import thrift.internal.test.protocol; testContainerSizeLimit!(TJsonProtocol!())(); testStringSizeLimit!(TJsonProtocol!())(); |