summaryrefslogtreecommitdiff
path: root/ext/json/json_encoder.c
diff options
context:
space:
mode:
authorDmitry Stogov <dmitry@zend.com>2018-06-13 18:26:48 +0300
committerDmitry Stogov <dmitry@zend.com>2018-06-13 18:26:48 +0300
commitc1ce43d1d2e500aeb0143c7c0f051e40f09ce4db (patch)
tree0532d5708acccddacc542176f8ab8c4447e83a98 /ext/json/json_encoder.c
parent52f92b51aa2c838c7bc3a55b092bc9570a581367 (diff)
downloadphp-git-c1ce43d1d2e500aeb0143c7c0f051e40f09ce4db.tar.gz
php_json_escape_string() optimization
Diffstat (limited to 'ext/json/json_encoder.c')
-rw-r--r--ext/json/json_encoder.c95
1 files changed, 60 insertions, 35 deletions
diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c
index b3d0b57a7e..c79e694f26 100644
--- a/ext/json/json_encoder.c
+++ b/ext/json/json_encoder.c
@@ -33,7 +33,7 @@
static const char digits[] = "0123456789abcdef";
static int php_json_escape_string(
- smart_str *buf, char *s, size_t len,
+ smart_str *buf, const char *s, size_t len,
int options, php_json_encoder *encoder);
static int php_json_determine_array_type(zval *val) /* {{{ */
@@ -250,12 +250,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso
/* }}} */
static int php_json_escape_string(
- smart_str *buf, char *s, size_t len,
+ smart_str *buf, const char *s, size_t len,
int options, php_json_encoder *encoder) /* {{{ */
{
int status;
unsigned int us;
size_t pos, checkpoint;
+ char *dst;
if (len == 0) {
smart_str_appendl(buf, "\"\"", 2);
@@ -287,72 +288,89 @@ static int php_json_escape_string(
do {
us = (unsigned char)s[pos];
- if (us >= 0x80) {
- int utf8_sub = 0;
- size_t prev_pos = pos;
-
+ if (UNEXPECTED(us >= 0x80)) {
+ if (pos) {
+ smart_str_appendl(buf, s, pos);
+ s += pos;
+ pos = 0;
+ }
us = php_next_utf8_char((unsigned char *)s, len, &pos, &status);
+ len -= pos;
/* check whether UTF8 character is correct */
- if (status != SUCCESS) {
+ if (UNEXPECTED(status != SUCCESS)) {
+ s += pos;
+ pos = 0;
if (options & PHP_JSON_INVALID_UTF8_IGNORE) {
/* ignore invalid UTF8 character */
continue;
} else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) {
/* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */
- us = 0xfffd;
- utf8_sub = 1;
- } else {
- if (buf->s) {
- ZSTR_LEN(buf->s) = checkpoint;
+ if (options & PHP_JSON_UNESCAPED_UNICODE) {
+ smart_str_appendl(buf, "\xef\xbf\xbd", 3);
+ } else {
+ smart_str_appendl(buf, "\\ufffd", 6);
}
+ continue;
+ } else {
+ ZSTR_LEN(buf->s) = checkpoint;
encoder->error_code = PHP_JSON_ERROR_UTF8;
if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) {
smart_str_appendl(buf, "null", 4);
}
return FAILURE;
}
- }
/* Escape U+2028/U+2029 line terminators, UNLESS both
JSON_UNESCAPED_UNICODE and
JSON_UNESCAPED_LINE_TERMINATORS were provided */
- if ((options & PHP_JSON_UNESCAPED_UNICODE)
+ } else if ((options & PHP_JSON_UNESCAPED_UNICODE)
&& ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
|| us < 0x2028 || us > 0x2029)) {
- if (utf8_sub) {
- smart_str_appendl(buf, "\xef\xbf\xbd", 3);
- } else {
- smart_str_appendl(buf, s + prev_pos, pos - prev_pos);
- }
+ smart_str_appendl(buf, s, pos);
+ s += pos;
+ pos = 0;
continue;
}
/* From http://en.wikipedia.org/wiki/UTF16 */
if (us >= 0x10000) {
unsigned int next_us;
+
us -= 0x10000;
next_us = (unsigned short)((us & 0x3ff) | 0xdc00);
us = (unsigned short)((us >> 10) | 0xd800);
- smart_str_appendl(buf, "\\u", 2);
- smart_str_appendc(buf, digits[(us & 0xf000) >> 12]);
- smart_str_appendc(buf, digits[(us & 0xf00) >> 8]);
- smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
- smart_str_appendc(buf, digits[(us & 0xf)]);
+ dst = smart_str_extend(buf, 6);
+ dst[0] = '\\';
+ dst[1] = 'u';
+ dst[2] = digits[(us >> 12) & 0xf];
+ dst[3] = digits[(us >> 8) & 0xf];
+ dst[4] = digits[(us >> 4) & 0xf];
+ dst[5] = digits[us & 0xf];
us = next_us;
}
- smart_str_appendl(buf, "\\u", 2);
- smart_str_appendc(buf, digits[(us & 0xf000) >> 12]);
- smart_str_appendc(buf, digits[(us & 0xf00) >> 8]);
- smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
- smart_str_appendc(buf, digits[(us & 0xf)]);
+ dst = smart_str_extend(buf, 6);
+ dst[0] = '\\';
+ dst[1] = 'u';
+ dst[2] = digits[(us >> 12) & 0xf];
+ dst[3] = digits[(us >> 8) & 0xf];
+ dst[4] = digits[(us >> 4) & 0xf];
+ dst[5] = digits[us & 0xf];
+ s += pos;
+ pos = 0;
} else {
static const uint32_t charmap[4] = {
0xffffffff, 0x500080c4, 0x10000000, 0x00000000};
- pos++;
+ len--;
if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) {
- smart_str_appendc(buf, (unsigned char) us);
+ pos++;
} else {
+ if (pos) {
+ smart_str_appendl(buf, s, pos);
+ s += pos;
+ pos = 0;
+ }
+ s++;
switch (us) {
case '"':
if (options & PHP_JSON_HEX_QUOT) {
@@ -428,15 +446,22 @@ static int php_json_escape_string(
default:
ZEND_ASSERT(us < ' ');
- smart_str_appendl(buf, "\\u00", sizeof("\\u00")-1);
- smart_str_appendc(buf, digits[(us & 0xf0) >> 4]);
- smart_str_appendc(buf, digits[(us & 0xf)]);
+ dst = smart_str_extend(buf, 6);
+ dst[0] = '\\';
+ dst[1] = 'u';
+ dst[2] = '0';
+ dst[3] = '0';
+ dst[4] = digits[(us >> 4) & 0xf];
+ dst[5] = digits[us & 0xf];
break;
}
}
}
- } while (pos < len);
+ } while (len);
+ if (EXPECTED(pos)) {
+ smart_str_appendl(buf, s, pos);
+ }
smart_str_appendc(buf, '"');
return SUCCESS;