summaryrefslogtreecommitdiff
path: root/ext/mbstring/php_unicode.c
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2017-07-19 22:36:53 +0200
committerNikita Popov <nikita.ppv@gmail.com>2017-07-19 23:59:42 +0200
commitdead4f0b1b9a555bbea970f5399c01142414db85 (patch)
treefbc00ea52cc9766818b242a5fe7d185642e6be9e /ext/mbstring/php_unicode.c
parenta34e597ea8df82cc0c1563daae4207a55b161504 (diff)
downloadphp-git-dead4f0b1b9a555bbea970f5399c01142414db85.tar.gz
Avoid unnecessary encoding lookups in mbstring
Extract part of php_mb_convert_encoding that does the actual work and use it whenever we already know the encoding.
Diffstat (limited to 'ext/mbstring/php_unicode.c')
-rw-r--r--ext/mbstring/php_unicode.c25
1 files changed, 15 insertions, 10 deletions
diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c
index 8b6a52156a..5584b2a3a4 100644
--- a/ext/mbstring/php_unicode.c
+++ b/ext/mbstring/php_unicode.c
@@ -43,6 +43,7 @@
#include "mbstring.h"
#include "php_unicode.h"
#include "unicode_data.h"
+#include "libmbfl/filters/mbfilter_ucs4.h"
ZEND_EXTERN_MODULE_GLOBALS(mbstring)
@@ -268,20 +269,23 @@ MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_
}
MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
- const char *src_encoding)
+ const char *src_encoding_name)
{
char *unicode, *newstr;
size_t unicode_len;
unsigned char *unicode_ptr;
size_t i;
- enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
+ enum mbfl_no_encoding src_no_encoding;
- if (_src_encoding == mbfl_no_encoding_invalid) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", src_encoding);
+ const mbfl_encoding *src_encoding = mbfl_name2encoding(src_encoding_name);
+ if (!src_encoding) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", src_encoding_name);
return NULL;
}
- unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len);
+ src_no_encoding = src_encoding->no_encoding;
+
+ unicode = php_mb_convert_encoding_ex(srcstr, srclen, &mbfl_encoding_ucs4be, src_encoding, &unicode_len);
if (unicode == NULL)
return NULL;
@@ -291,14 +295,14 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, s
case PHP_UNICODE_CASE_UPPER:
for (i = 0; i < unicode_len; i+=4) {
UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
+ php_unicode_toupper(BE_ARY_TO_UINT32(&unicode_ptr[i]), src_no_encoding));
}
break;
case PHP_UNICODE_CASE_LOWER:
for (i = 0; i < unicode_len; i+=4) {
UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
+ php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), src_no_encoding));
}
break;
@@ -312,7 +316,7 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, s
if (mode) {
if (res) {
UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
+ php_unicode_tolower(BE_ARY_TO_UINT32(&unicode_ptr[i]), src_no_encoding));
} else {
mode = 0;
}
@@ -320,7 +324,7 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, s
if (res) {
mode = 1;
UINT32_TO_BE_ARY(&unicode_ptr[i],
- php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), _src_encoding));
+ php_unicode_totitle(BE_ARY_TO_UINT32(&unicode_ptr[i]), src_no_encoding));
}
}
}
@@ -328,7 +332,8 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, s
}
- newstr = php_mb_convert_encoding(unicode, unicode_len, src_encoding, "UCS-4BE", ret_len);
+ newstr = php_mb_convert_encoding_ex(
+ unicode, unicode_len, src_encoding, &mbfl_encoding_ucs4be, ret_len);
efree(unicode);
return newstr;