diff options
author | Nikita Popov <nikita.ppv@gmail.com> | 2019-04-16 16:35:35 +0200 |
---|---|---|
committer | Nikita Popov <nikita.ppv@gmail.com> | 2019-04-17 14:05:53 +0200 |
commit | f73f190c3f97479f735b97c22a8e4885344edca1 (patch) | |
tree | 625e71a2a24f578560492d93291df1dbaad28869 /ext/mbstring | |
parent | 3ccd3aba90b4c0c95f1e0a457dbbc33c4524a4ed (diff) | |
download | php-git-f73f190c3f97479f735b97c22a8e4885344edca1.tar.gz |
Fix internal_encoding fallback in mbstring
By introducing a hook that is called whenever one of
internal_encoding / input_encoding / output_encoding changes, so
that mbstring can adjust it's internal state.
This also makes internal_encoding work with zend multibyte.
Diffstat (limited to 'ext/mbstring')
-rw-r--r-- | ext/mbstring/mb_gpc.c | 5 | ||||
-rw-r--r-- | ext/mbstring/mbstring.c | 156 | ||||
-rw-r--r-- | ext/mbstring/mbstring.h | 7 | ||||
-rw-r--r-- | ext/mbstring/tests/ini_encoding2.phpt | 2 | ||||
-rw-r--r-- | ext/mbstring/tests/internal_encoding.phpt | 66 | ||||
-rw-r--r-- | ext/mbstring/tests/mb_internal_encoding_basic2.phpt | 2 |
6 files changed, 145 insertions, 93 deletions
diff --git a/ext/mbstring/mb_gpc.c b/ext/mbstring/mb_gpc.c index 2a6a9b23bb..c57e2b6be1 100644 --- a/ext/mbstring/mb_gpc.c +++ b/ext/mbstring/mb_gpc.c @@ -58,11 +58,6 @@ MBSTRING_API SAPI_TREAT_DATA_FUNC(mbstr_treat_data) const mbfl_encoding *detected; php_mb_encoding_handler_info_t info; - if (arg != PARSE_STRING) { - char *value = MBSTRG(internal_encoding_name); - _php_mb_ini_mbstring_internal_encoding_set(value, value ? strlen(value): 0); - } - if (!MBSTRG(encoding_translation)) { php_default_treat_data(arg, str, destArray); return; diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index ee65b4107b..61e47630ef 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -615,34 +615,6 @@ ZEND_TSRMLS_CACHE_DEFINE() ZEND_GET_MODULE(mbstring) #endif -static char *get_internal_encoding(void) { - if (PG(internal_encoding) && PG(internal_encoding)[0]) { - return PG(internal_encoding); - } else if (SG(default_charset)) { - return SG(default_charset); - } - return ""; -} - -static char *get_input_encoding(void) { - if (PG(input_encoding) && PG(input_encoding)[0]) { - return PG(input_encoding); - } else if (SG(default_charset)) { - return SG(default_charset); - } - return ""; -} - -static char *get_output_encoding(void) { - if (PG(output_encoding) && PG(output_encoding)[0]) { - return PG(output_encoding); - } else if (SG(default_charset)) { - return SG(default_charset); - } - return ""; -} - - /* {{{ allocators */ static void *_php_mb_allocators_malloc(size_t sz) { @@ -1289,77 +1261,70 @@ static PHP_INI_MH(OnUpdate_mbstring_detect_order) } /* }}} */ -/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */ -static PHP_INI_MH(OnUpdate_mbstring_http_input) -{ +static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) { const mbfl_encoding **list; size_t size; - - if (!new_value || !ZSTR_VAL(new_value)) { - if (MBSTRG(http_input_list)) { - pefree(MBSTRG(http_input_list), 1); - } - if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(), strlen(get_input_encoding())+1, &list, &size, 1)) { - MBSTRG(http_input_list) = list; - MBSTRG(http_input_list_size) = size; - return SUCCESS; - } - MBSTRG(http_input_list) = NULL; - MBSTRG(http_input_list_size) = 0; - return SUCCESS; - } - - if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, 1)) { + if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) { return FAILURE; } - if (MBSTRG(http_input_list)) { pefree(MBSTRG(http_input_list), 1); } MBSTRG(http_input_list) = list; MBSTRG(http_input_list_size) = size; + return SUCCESS; +} +/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */ +static PHP_INI_MH(OnUpdate_mbstring_http_input) +{ if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) { php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated"); } - return SUCCESS; + if (!new_value || !ZSTR_VAL(new_value)) { + const char *encoding = php_get_input_encoding(); + MBSTRG(http_input_set) = 0; + _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding)); + return SUCCESS; + } + + MBSTRG(http_input_set) = 1; + return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value)); } /* }}} */ -/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */ -static PHP_INI_MH(OnUpdate_mbstring_http_output) -{ - const mbfl_encoding *encoding; - - if (new_value == NULL || ZSTR_LEN(new_value) == 0) { - encoding = mbfl_name2encoding(get_output_encoding()); - if (!encoding) { - MBSTRG(http_output_encoding) = &mbfl_encoding_pass; - MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; - return SUCCESS; - } - } else { - encoding = mbfl_name2encoding(ZSTR_VAL(new_value)); - if (!encoding) { - MBSTRG(http_output_encoding) = &mbfl_encoding_pass; - MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass; - return FAILURE; - } +static int _php_mb_ini_mbstring_http_output_set(const char *new_value) { + const mbfl_encoding *encoding = mbfl_name2encoding(new_value); + if (!encoding) { + return FAILURE; } + MBSTRG(http_output_encoding) = encoding; MBSTRG(current_http_output_encoding) = encoding; + return SUCCESS; +} +/* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */ +static PHP_INI_MH(OnUpdate_mbstring_http_output) +{ if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) { php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated"); } - return SUCCESS; + if (new_value == NULL || ZSTR_LEN(new_value) == 0) { + MBSTRG(http_output_set) = 0; + _php_mb_ini_mbstring_http_output_set(php_get_output_encoding()); + return SUCCESS; + } + + MBSTRG(http_output_set) = 1; + return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value)); } /* }}} */ /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */ -int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length) +static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length) { const mbfl_encoding *encoding; @@ -1395,20 +1360,13 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) return FAILURE; } - if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) { - if (new_value && ZSTR_LEN(new_value)) { - return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value)); - } else { - return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(), strlen(get_internal_encoding())+1); - } + if (new_value && ZSTR_LEN(new_value)) { + MBSTRG(internal_encoding_set) = 1; + return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value)); } else { - /* the corresponding mbstring globals needs to be set according to the - * ini value in the later stage because it never falls back to the - * default value if 1. no value for mbstring.internal_encoding is given, - * 2. mbstring.language directive is processed in per-dir or runtime - * context and 3. call to the handler for mbstring.language is done - * after mbstring.internal_encoding is handled. */ - return SUCCESS; + const char *encoding = php_get_internal_encoding(); + MBSTRG(internal_encoding_set) = 0; + return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding)); } } /* }}} */ @@ -1532,6 +1490,24 @@ PHP_INI_BEGIN() PHP_INI_END() /* }}} */ +static void mbstring_internal_encoding_changed_hook() { + /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */ + if (!MBSTRG(internal_encoding_set)) { + const char *encoding = php_get_internal_encoding(); + _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding)); + } + + if (!MBSTRG(http_output_set)) { + const char *encoding = php_get_output_encoding(); + _php_mb_ini_mbstring_http_output_set(encoding); + } + + if (!MBSTRG(http_input_set)) { + const char *encoding = php_get_input_encoding(); + _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding)); + } +} + /* {{{ module global initialize handler */ static PHP_GINIT_FUNCTION(mbstring) { @@ -1572,6 +1548,9 @@ ZEND_TSRMLS_CACHE_UPDATE(); #endif mbstring_globals->last_used_encoding_name = NULL; mbstring_globals->last_used_encoding = NULL; + mbstring_globals->internal_encoding_set = 0; + mbstring_globals->http_output_set = 0; + mbstring_globals->http_input_set = 0; } /* }}} */ @@ -1603,6 +1582,11 @@ ZEND_TSRMLS_CACHE_UPDATE(); REGISTER_INI_ENTRIES(); + /* We assume that we're the only user of the hook. */ + ZEND_ASSERT(php_internal_encoding_changed == NULL); + php_internal_encoding_changed = mbstring_internal_encoding_changed_hook; + mbstring_internal_encoding_changed_hook(); + /* This is a global handler. Should not be set in a per-request handler. */ sapi_register_treat_data(mbstr_treat_data); @@ -1763,6 +1747,10 @@ PHP_RSHUTDOWN_FUNCTION(mbstring) MBSTRG(last_used_encoding_name) = NULL; } + MBSTRG(internal_encoding_set) = 0; + MBSTRG(http_output_set) = 0; + MBSTRG(http_input_set) = 0; + #if HAVE_MBREGEX PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif @@ -1846,6 +1834,7 @@ PHP_FUNCTION(mb_internal_encoding) RETURN_FALSE; } else { MBSTRG(current_internal_encoding) = encoding; + MBSTRG(internal_encoding_set) = 1; RETURN_TRUE; } } @@ -1969,6 +1958,7 @@ PHP_FUNCTION(mb_http_output) php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", name); RETURN_FALSE; } else { + MBSTRG(http_output_set) = 1; MBSTRG(current_http_output_encoding) = encoding; RETURN_TRUE; } diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index cd882c1c03..5a713e5496 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -130,9 +130,6 @@ MBSTRING_API size_t php_mb_mbchar_bytes(const char *s); MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, zend_string *from_encoding); MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc); -/* internal use only */ -int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length); - ZEND_BEGIN_MODULE_GLOBALS(mbstring) char *internal_encoding_name; const mbfl_encoding *internal_encoding; @@ -169,6 +166,10 @@ ZEND_BEGIN_MODULE_GLOBALS(mbstring) #endif zend_string *last_used_encoding_name; const mbfl_encoding *last_used_encoding; + /* Whether an explicit internal_encoding / http_output / http_input encoding was set. */ + zend_bool internal_encoding_set; + zend_bool http_output_set; + zend_bool http_input_set; ZEND_END_MODULE_GLOBALS(mbstring) #define MB_OVERLOAD_MAIL 1 diff --git a/ext/mbstring/tests/ini_encoding2.phpt b/ext/mbstring/tests/ini_encoding2.phpt index f3728486ae..64cc23ec91 100644 --- a/ext/mbstring/tests/ini_encoding2.phpt +++ b/ext/mbstring/tests/ini_encoding2.phpt @@ -50,7 +50,7 @@ string(6) "EUC-JP" string(0) "" string(0) "" string(0) "" -string(5) "UTF-8" +string(6) "EUC-JP" string(0) "" string(0) "" Setting INI diff --git a/ext/mbstring/tests/internal_encoding.phpt b/ext/mbstring/tests/internal_encoding.phpt new file mode 100644 index 0000000000..3d097ac79c --- /dev/null +++ b/ext/mbstring/tests/internal_encoding.phpt @@ -0,0 +1,66 @@ +--TEST-- +Check that "internal_encoding" ini is picked up by mbstring +--INI-- +internal_encoding=iso-8859-1 +--FILE-- +<?php + +var_dump(mb_internal_encoding()); +var_dump(mb_strlen("\xc3\xb6")); + +ini_set('mbstring.internal_encoding', 'utf-8'); + +var_dump(mb_internal_encoding()); +var_dump(mb_strlen("\xc3\xb6")); + +// mbstring.internal_encoding is set, this has no effect +ini_set('internal_encoding', 'iso-8859-2'); + +var_dump(mb_internal_encoding()); +var_dump(mb_strlen("\xc3\xb6")); + +// mbstring.internal_encoding is unset, pick up internal_encoding again +ini_set('mbstring.internal_encoding', ''); + +var_dump(mb_internal_encoding()); +var_dump(mb_strlen("\xc3\xb6")); + +mb_internal_encoding('utf-8'); + +var_dump(mb_internal_encoding()); +var_dump(mb_strlen("\xc3\xb6")); + +// mb_internal_encoding() is set, this has no effect +ini_set('internal_encoding', 'iso-8859-3'); + +var_dump(mb_internal_encoding()); +var_dump(mb_strlen("\xc3\xb6")); + +// mbstring.internal_encoding is unset, pick up internal_encoding again +ini_set('mbstring.internal_encoding', ''); + +var_dump(mb_internal_encoding()); +var_dump(mb_strlen("\xc3\xb6")); + +?> +--EXPECTF-- +string(10) "ISO-8859-1" +int(2) + +Deprecated: ini_set(): Use of mbstring.internal_encoding is deprecated in %s on line %d +string(5) "UTF-8" +int(1) +string(5) "UTF-8" +int(1) + +Deprecated: ini_set(): Use of mbstring.internal_encoding is deprecated in %s on line %d +string(10) "ISO-8859-2" +int(2) +string(5) "UTF-8" +int(1) +string(5) "UTF-8" +int(1) + +Deprecated: ini_set(): Use of mbstring.internal_encoding is deprecated in %s on line %d +string(10) "ISO-8859-3" +int(2) diff --git a/ext/mbstring/tests/mb_internal_encoding_basic2.phpt b/ext/mbstring/tests/mb_internal_encoding_basic2.phpt index 8090b47be5..99b92027e9 100644 --- a/ext/mbstring/tests/mb_internal_encoding_basic2.phpt +++ b/ext/mbstring/tests/mb_internal_encoding_basic2.phpt @@ -47,7 +47,7 @@ string(10) "ISO-8859-1" string(0) "" string(0) "" string(0) "" -string(5) "UTF-8" +string(10) "ISO-8859-1" bool(true) string(5) "UTF-8" Done |