diff options
author | Dmitry Stogov <dmitry@zend.com> | 2018-06-13 18:26:48 +0300 |
---|---|---|
committer | Dmitry Stogov <dmitry@zend.com> | 2018-06-13 18:26:48 +0300 |
commit | c1ce43d1d2e500aeb0143c7c0f051e40f09ce4db (patch) | |
tree | 0532d5708acccddacc542176f8ab8c4447e83a98 /ext/json/json_encoder.c | |
parent | 52f92b51aa2c838c7bc3a55b092bc9570a581367 (diff) | |
download | php-git-c1ce43d1d2e500aeb0143c7c0f051e40f09ce4db.tar.gz |
php_json_escape_string() optimization
Diffstat (limited to 'ext/json/json_encoder.c')
-rw-r--r-- | ext/json/json_encoder.c | 95 |
1 files changed, 60 insertions, 35 deletions
diff --git a/ext/json/json_encoder.c b/ext/json/json_encoder.c index b3d0b57a7e..c79e694f26 100644 --- a/ext/json/json_encoder.c +++ b/ext/json/json_encoder.c @@ -33,7 +33,7 @@ static const char digits[] = "0123456789abcdef"; static int php_json_escape_string( - smart_str *buf, char *s, size_t len, + smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder); static int php_json_determine_array_type(zval *val) /* {{{ */ @@ -250,12 +250,13 @@ static int php_json_encode_array(smart_str *buf, zval *val, int options, php_jso /* }}} */ static int php_json_escape_string( - smart_str *buf, char *s, size_t len, + smart_str *buf, const char *s, size_t len, int options, php_json_encoder *encoder) /* {{{ */ { int status; unsigned int us; size_t pos, checkpoint; + char *dst; if (len == 0) { smart_str_appendl(buf, "\"\"", 2); @@ -287,72 +288,89 @@ static int php_json_escape_string( do { us = (unsigned char)s[pos]; - if (us >= 0x80) { - int utf8_sub = 0; - size_t prev_pos = pos; - + if (UNEXPECTED(us >= 0x80)) { + if (pos) { + smart_str_appendl(buf, s, pos); + s += pos; + pos = 0; + } us = php_next_utf8_char((unsigned char *)s, len, &pos, &status); + len -= pos; /* check whether UTF8 character is correct */ - if (status != SUCCESS) { + if (UNEXPECTED(status != SUCCESS)) { + s += pos; + pos = 0; if (options & PHP_JSON_INVALID_UTF8_IGNORE) { /* ignore invalid UTF8 character */ continue; } else if (options & PHP_JSON_INVALID_UTF8_SUBSTITUTE) { /* Use Unicode character 'REPLACEMENT CHARACTER' (U+FFFD) */ - us = 0xfffd; - utf8_sub = 1; - } else { - if (buf->s) { - ZSTR_LEN(buf->s) = checkpoint; + if (options & PHP_JSON_UNESCAPED_UNICODE) { + smart_str_appendl(buf, "\xef\xbf\xbd", 3); + } else { + smart_str_appendl(buf, "\\ufffd", 6); } + continue; + } else { + ZSTR_LEN(buf->s) = checkpoint; encoder->error_code = PHP_JSON_ERROR_UTF8; if (options & PHP_JSON_PARTIAL_OUTPUT_ON_ERROR) { smart_str_appendl(buf, "null", 4); } return FAILURE; } - } /* Escape U+2028/U+2029 line terminators, UNLESS both JSON_UNESCAPED_UNICODE and JSON_UNESCAPED_LINE_TERMINATORS were provided */ - if ((options & PHP_JSON_UNESCAPED_UNICODE) + } else if ((options & PHP_JSON_UNESCAPED_UNICODE) && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS) || us < 0x2028 || us > 0x2029)) { - if (utf8_sub) { - smart_str_appendl(buf, "\xef\xbf\xbd", 3); - } else { - smart_str_appendl(buf, s + prev_pos, pos - prev_pos); - } + smart_str_appendl(buf, s, pos); + s += pos; + pos = 0; continue; } /* From http://en.wikipedia.org/wiki/UTF16 */ if (us >= 0x10000) { unsigned int next_us; + us -= 0x10000; next_us = (unsigned short)((us & 0x3ff) | 0xdc00); us = (unsigned short)((us >> 10) | 0xd800); - smart_str_appendl(buf, "\\u", 2); - smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); - smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); - smart_str_appendc(buf, digits[(us & 0xf)]); + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = digits[(us >> 12) & 0xf]; + dst[3] = digits[(us >> 8) & 0xf]; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; us = next_us; } - smart_str_appendl(buf, "\\u", 2); - smart_str_appendc(buf, digits[(us & 0xf000) >> 12]); - smart_str_appendc(buf, digits[(us & 0xf00) >> 8]); - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); - smart_str_appendc(buf, digits[(us & 0xf)]); + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = digits[(us >> 12) & 0xf]; + dst[3] = digits[(us >> 8) & 0xf]; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; + s += pos; + pos = 0; } else { static const uint32_t charmap[4] = { 0xffffffff, 0x500080c4, 0x10000000, 0x00000000}; - pos++; + len--; if (EXPECTED(!ZEND_BIT_TEST(charmap, us))) { - smart_str_appendc(buf, (unsigned char) us); + pos++; } else { + if (pos) { + smart_str_appendl(buf, s, pos); + s += pos; + pos = 0; + } + s++; switch (us) { case '"': if (options & PHP_JSON_HEX_QUOT) { @@ -428,15 +446,22 @@ static int php_json_escape_string( default: ZEND_ASSERT(us < ' '); - smart_str_appendl(buf, "\\u00", sizeof("\\u00")-1); - smart_str_appendc(buf, digits[(us & 0xf0) >> 4]); - smart_str_appendc(buf, digits[(us & 0xf)]); + dst = smart_str_extend(buf, 6); + dst[0] = '\\'; + dst[1] = 'u'; + dst[2] = '0'; + dst[3] = '0'; + dst[4] = digits[(us >> 4) & 0xf]; + dst[5] = digits[us & 0xf]; break; } } } - } while (pos < len); + } while (len); + if (EXPECTED(pos)) { + smart_str_appendl(buf, s, pos); + } smart_str_appendc(buf, '"'); return SUCCESS; |