summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2017-07-19 23:50:14 +0200
committerNikita Popov <nikita.ppv@gmail.com>2017-07-19 23:59:42 +0200
commit9c73be898d4e5aa2e64b21da14797ec9ad202134 (patch)
tree0a476d521fa9a57a9e9c023e77224b1c838e4d4b
parent4128746b949355f588143ef18ad98fdfda089873 (diff)
downloadphp-git-9c73be898d4e5aa2e64b21da14797ec9ad202134.tar.gz
Directly accept encoding in php_unicode_convert_case()
As a side-effect mb_strtolower() and mb_strtoupper() now correctly handle a NULL encoding parameter by using the internal encoding. This is what caused the two test changes.
-rw-r--r--ext/mbstring/mbstring.c55
-rw-r--r--ext/mbstring/php_unicode.c15
-rw-r--r--ext/mbstring/php_unicode.h6
-rw-r--r--ext/mbstring/tests/mb_strtolower_variation2.phpt18
-rw-r--r--ext/mbstring/tests/mb_strtoupper_variation2.phpt18
5 files changed, 55 insertions, 57 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index 9c64092c2e..1ce966610c 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -3514,12 +3514,13 @@ PHP_FUNCTION(mb_convert_encoding)
Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)
{
- const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ const char *from_encoding = NULL;
char *str;
size_t str_len, from_encoding_len;
zend_long case_mode = 0;
char *newstr;
size_t ret_len;
+ const mbfl_encoding *enc;
RETVAL_FALSE;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "sl|s!", &str, &str_len,
@@ -3527,7 +3528,12 @@ PHP_FUNCTION(mb_convert_case)
return;
}
- newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding);
+ enc = php_mb_get_encoding(from_encoding);
+ if (!enc) {
+ return;
+ }
+
+ newstr = php_unicode_convert_case(case_mode, str, str_len, &ret_len, enc);
if (newstr) {
// TODO: avoid reallocation ???
@@ -3542,17 +3548,24 @@ PHP_FUNCTION(mb_convert_case)
*/
PHP_FUNCTION(mb_strtoupper)
{
- const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ const char *from_encoding = NULL;
char *str;
size_t str_len, from_encoding_len;
char *newstr;
size_t ret_len;
+ const mbfl_encoding *enc;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
&from_encoding, &from_encoding_len) == FAILURE) {
return;
}
- newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding);
+
+ enc = php_mb_get_encoding(from_encoding);
+ if (!enc) {
+ RETURN_FALSE;
+ }
+
+ newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
if (newstr) {
// TODO: avoid reallocation ???
@@ -3569,17 +3582,24 @@ PHP_FUNCTION(mb_strtoupper)
*/
PHP_FUNCTION(mb_strtolower)
{
- const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
+ const char *from_encoding = NULL;
char *str;
size_t str_len, from_encoding_len;
char *newstr;
size_t ret_len;
+ const mbfl_encoding *enc;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s!", &str, &str_len,
&from_encoding, &from_encoding_len) == FAILURE) {
return;
}
- newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding);
+
+ enc = php_mb_get_encoding(from_encoding);
+ if (!enc) {
+ RETURN_FALSE;
+ }
+
+ newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
if (newstr) {
// TODO: avoid reallocation ???
@@ -5566,20 +5586,25 @@ MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nby
*/
MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding)
{
- int n;
+ int n = -1;
mbfl_string haystack, needle;
- n = -1;
+ const mbfl_encoding *enc;
+
+ enc = php_mb_get_encoding(from_encoding);
+ if (!enc) {
+ return -1;
+ }
mbfl_string_init(&haystack);
mbfl_string_init(&needle);
haystack.no_language = MBSTRG(language);
- haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
+ haystack.no_encoding = enc->no_encoding;
needle.no_language = MBSTRG(language);
- needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
+ needle.no_encoding = enc->no_encoding;
do {
size_t len = 0;
- haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding);
+ haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, enc);
haystack.len = len;
if (!haystack.val) {
@@ -5590,7 +5615,7 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
break;
}
- needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding);
+ needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, enc);
needle.len = len;
if (!needle.val) {
@@ -5601,12 +5626,6 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
break;
}
- haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
- if (haystack.no_encoding == mbfl_no_encoding_invalid) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
- break;
- }
-
{
int haystack_char_len = mbfl_strlen(&haystack);
diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c
index 490a985f90..5d6ccbfdd5 100644
--- a/ext/mbstring/php_unicode.c
+++ b/ext/mbstring/php_unicode.c
@@ -274,22 +274,15 @@ MBSTRING_API unsigned long php_unicode_totitle(unsigned long code, enum mbfl_no_
((unsigned char*)(ptr))[3] = (v ) & 0xff;\
}
-MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
- const char *src_encoding_name)
+MBSTRING_API char *php_unicode_convert_case(
+ int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
+ const mbfl_encoding *src_encoding)
{
char *unicode, *newstr;
size_t unicode_len;
unsigned char *unicode_ptr;
size_t i;
- enum mbfl_no_encoding src_no_encoding;
-
- const mbfl_encoding *src_encoding = mbfl_name2encoding(src_encoding_name);
- if (!src_encoding) {
- php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", src_encoding_name);
- return NULL;
- }
-
- src_no_encoding = src_encoding->no_encoding;
+ enum mbfl_no_encoding src_no_encoding = src_encoding->no_encoding;
unicode = php_mb_convert_encoding_ex(srcstr, srclen, &mbfl_encoding_ucs4be, src_encoding, &unicode_len);
if (unicode == NULL)
diff --git a/ext/mbstring/php_unicode.h b/ext/mbstring/php_unicode.h
index ac6dd6e57a..3a6c75ce86 100644
--- a/ext/mbstring/php_unicode.h
+++ b/ext/mbstring/php_unicode.h
@@ -94,8 +94,10 @@
MBSTRING_API int php_unicode_is_prop(unsigned long code, ...);
MBSTRING_API int php_unicode_is_prop1(unsigned long code, int prop);
-MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, size_t srclen, size_t *retlen,
- const char *src_encoding);
+
+MBSTRING_API char *php_unicode_convert_case(
+ int case_mode, const char *srcstr, size_t srclen, size_t *retlen,
+ const mbfl_encoding *src_encoding);
#define PHP_UNICODE_CASE_UPPER 0
#define PHP_UNICODE_CASE_LOWER 1
diff --git a/ext/mbstring/tests/mb_strtolower_variation2.phpt b/ext/mbstring/tests/mb_strtolower_variation2.phpt
index 43d0f53728..ef69eb00af 100644
--- a/ext/mbstring/tests/mb_strtolower_variation2.phpt
+++ b/ext/mbstring/tests/mb_strtolower_variation2.phpt
@@ -157,14 +157,10 @@ Warning: mb_strtolower(): Unknown encoding "0.5" in %s on line %d
bool(false)
-- Iteration 10 --
-
-Warning: mb_strtolower(): Unknown encoding "(null)" in %s on line %d
-bool(false)
+string(24) "68656c6c6f2c20776f726c64"
-- Iteration 11 --
-
-Warning: mb_strtolower(): Unknown encoding "(null)" in %s on line %d
-bool(false)
+string(24) "68656c6c6f2c20776f726c64"
-- Iteration 12 --
@@ -209,17 +205,13 @@ string(24) "68656c6c6f2c20776f726c64"
string(24) "68656c6c6f2c20776f726c64"
-- Iteration 22 --
-
-Warning: mb_strtolower(): Unknown encoding "(null)" in %s on line %d
-bool(false)
+string(24) "68656c6c6f2c20776f726c64"
-- Iteration 23 --
-
-Warning: mb_strtolower(): Unknown encoding "(null)" in %s on line %d
-bool(false)
+string(24) "68656c6c6f2c20776f726c64"
-- Iteration 24 --
Warning: mb_strtolower() expects parameter 2 to be string, resource given in %s on line %d
NULL
-Done \ No newline at end of file
+Done
diff --git a/ext/mbstring/tests/mb_strtoupper_variation2.phpt b/ext/mbstring/tests/mb_strtoupper_variation2.phpt
index 52beb3d741..25b9a53e91 100644
--- a/ext/mbstring/tests/mb_strtoupper_variation2.phpt
+++ b/ext/mbstring/tests/mb_strtoupper_variation2.phpt
@@ -158,14 +158,10 @@ Warning: mb_strtoupper(): Unknown encoding "0.5" in %s on line %d
bool(false)
-- Iteration 10 --
-
-Warning: mb_strtoupper(): Unknown encoding "(null)" in %s on line %d
-bool(false)
+string(24) "48454c4c4f2c20574f524c44"
-- Iteration 11 --
-
-Warning: mb_strtoupper(): Unknown encoding "(null)" in %s on line %d
-bool(false)
+string(24) "48454c4c4f2c20574f524c44"
-- Iteration 12 --
@@ -210,17 +206,13 @@ string(24) "48454c4c4f2c20574f524c44"
string(24) "48454c4c4f2c20574f524c44"
-- Iteration 22 --
-
-Warning: mb_strtoupper(): Unknown encoding "(null)" in %s on line %d
-bool(false)
+string(24) "48454c4c4f2c20574f524c44"
-- Iteration 23 --
-
-Warning: mb_strtoupper(): Unknown encoding "(null)" in %s on line %d
-bool(false)
+string(24) "48454c4c4f2c20574f524c44"
-- Iteration 24 --
Warning: mb_strtoupper() expects parameter 2 to be string, resource given in %s on line %d
NULL
-Done \ No newline at end of file
+Done