summaryrefslogtreecommitdiff
path: root/ext/json
diff options
context:
space:
mode:
authorScott MacVicar <scottmac@php.net>2009-01-02 03:02:22 +0000
committerScott MacVicar <scottmac@php.net>2009-01-02 03:02:22 +0000
commit0bdbc4e356d81fd2fd7e1f2391e24e76c2f3b8f6 (patch)
treeb62b4615b40808b430f3b0c8f69fbe5aa416c3b3 /ext/json
parent8d62f3dd0201094f589fa8c64ab337e5edcf43e6 (diff)
downloadphp-git-0bdbc4e356d81fd2fd7e1f2391e24e76c2f3b8f6.tar.gz
MFH Fix bug #46944 - UTF-8 characters outside the BMP aren't encoded correctly.
Diffstat (limited to 'ext/json')
-rw-r--r--ext/json/tests/bug46944.phpt32
-rw-r--r--ext/json/utf8_decode.c2
-rw-r--r--ext/json/utf8_to_utf16.c2
3 files changed, 34 insertions, 2 deletions
diff --git a/ext/json/tests/bug46944.phpt b/ext/json/tests/bug46944.phpt
new file mode 100644
index 0000000000..735de04435
--- /dev/null
+++ b/ext/json/tests/bug46944.phpt
@@ -0,0 +1,32 @@
+--TEST--
+Bug #46944 (json_encode() doesn't handle 3 byte utf8 correctly)
+--SKIPIF--
+<?php if (!extension_loaded('json')) print 'skip'; ?>
+--FILE--
+<?php
+
+for ($i = 1; $i <= 16; $i++) {
+ echo json_encode(b"aa" . (0xf0|($i >> 2)) . (0x8f|($i & 3) << 4) . "\xbf\xbdzz") . "\n";
+}
+
+
+echo "Done\n";
+?>
+--EXPECT--
+"aa\ud83f\udffdzz"
+"aa\ud87f\udffdzz"
+"aa\ud8bf\udffdzz"
+"aa\ud8ff\udffdzz"
+"aa\ud93f\udffdzz"
+"aa\ud97f\udffdzz"
+"aa\ud9bf\udffdzz"
+"aa\ud9ff\udffdzz"
+"aa\uda3f\udffdzz"
+"aa\uda7f\udffdzz"
+"aa\udabf\udffdzz"
+"aa\udaff\udffdzz"
+"aa\udb3f\udffdzz"
+"aa\udb7f\udffdzz"
+"aa\udbbf\udffdzz"
+"aa\udbff\udffdzz"
+Done
diff --git a/ext/json/utf8_decode.c b/ext/json/utf8_decode.c
index cea1f8cec8..2d0422bedb 100644
--- a/ext/json/utf8_decode.c
+++ b/ext/json/utf8_decode.c
@@ -165,7 +165,7 @@ utf8_decode_next(json_utf8_decode *utf8)
/*
Three continuation (65536 to 1114111)
*/
- if ((c & 0xF1) == 0xF0) {
+ if ((c & 0xF8) == 0xF0) {
int c1 = cont(utf8);
int c2 = cont(utf8);
int c3 = cont(utf8);
diff --git a/ext/json/utf8_to_utf16.c b/ext/json/utf8_to_utf16.c
index 42ea9e5d8e..599f0e13b4 100644
--- a/ext/json/utf8_to_utf16.c
+++ b/ext/json/utf8_to_utf16.c
@@ -46,7 +46,7 @@ utf8_to_utf16(unsigned short w[], char p[], int length)
w[the_index] = (unsigned short)c;
the_index += 1;
} else {
- c &= 0xFFFF;
+ c -= 0x10000;
w[the_index] = (unsigned short)(0xD800 | (c >> 10));
the_index += 1;
w[the_index] = (unsigned short)(0xDC00 | (c & 0x3FF));