diff options
author | Andrea Faulds <ajf@ajf.me> | 2016-10-13 23:33:33 +0100 |
---|---|---|
committer | Andrea Faulds <ajf@ajf.me> | 2016-10-17 15:39:02 +0100 |
commit | 1a512eed449128334edf0329b72e53c5caaaa95b (patch) | |
tree | c095b8ccdb00a6d364a1b1f642c5a5a218a675d6 | |
parent | a5251f78f8c3204c04d93c02b63d2e3967d3e1c1 (diff) | |
download | php-git-1a512eed449128334edf0329b72e53c5caaaa95b.tar.gz |
Move utf8_encode and utf8_decode to ext/standard
-rw-r--r-- | ext/standard/basic_functions.c | 10 | ||||
-rw-r--r-- | ext/standard/php_string.h | 2 | ||||
-rw-r--r-- | ext/standard/string.c | 94 | ||||
-rw-r--r-- | ext/standard/tests/strings/bug43957.phpt (renamed from ext/xml/tests/bug43957.phpt) | 5 | ||||
-rw-r--r-- | ext/standard/tests/strings/bug49687.phpt (renamed from ext/xml/tests/bug49687.phpt) | 5 | ||||
-rw-r--r-- | ext/standard/tests/strings/utf8.phpt (renamed from ext/xml/tests/xml006.phpt) | 2 | ||||
-rw-r--r-- | ext/standard/tests/strings/utf8_decode_error.phpt (renamed from ext/xml/tests/utf8_decode_error.phpt) | 8 | ||||
-rw-r--r-- | ext/standard/tests/strings/utf8_decode_variation1.phpt (renamed from ext/xml/tests/utf8_decode_variation1.phpt) | 8 | ||||
-rw-r--r-- | ext/standard/tests/strings/utf8_encode_error.phpt (renamed from ext/xml/tests/utf8_encode_error.phpt) | 8 | ||||
-rw-r--r-- | ext/standard/tests/strings/utf8_encode_variation1.phpt (renamed from ext/xml/tests/utf8_encode_variation1.phpt) | 8 | ||||
-rw-r--r-- | ext/xml/xml.c | 50 |
11 files changed, 110 insertions, 90 deletions
diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index 13e8a4e6eb..d528e51908 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -2465,6 +2465,14 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_substr_compare, 0, 0, 3) ZEND_ARG_INFO(0, length) ZEND_ARG_INFO(0, case_sensitivity) ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1) + ZEND_ARG_INFO(0, data) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1) + ZEND_ARG_INFO(0, data) +ZEND_END_ARG_INFO() /* }}} */ /* {{{ syslog.c */ #ifdef HAVE_SYSLOG_H @@ -2764,6 +2772,8 @@ const zend_function_entry basic_functions[] = { /* {{{ */ PHP_FE(str_split, arginfo_str_split) PHP_FE(strpbrk, arginfo_strpbrk) PHP_FE(substr_compare, arginfo_substr_compare) + PHP_FE(utf8_encode, arginfo_utf8_encode) + PHP_FE(utf8_decode, arginfo_utf8_decode) #ifdef HAVE_STRCOLL PHP_FE(strcoll, arginfo_strcoll) diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h index 14b66e7e13..6fc7587121 100644 --- a/ext/standard/php_string.h +++ b/ext/standard/php_string.h @@ -93,6 +93,8 @@ PHP_FUNCTION(str_word_count); PHP_FUNCTION(str_split); PHP_FUNCTION(strpbrk); PHP_FUNCTION(substr_compare); +PHP_FUNCTION(utf8_encode); +PHP_FUNCTION(utf8_decode); #ifdef HAVE_STRCOLL PHP_FUNCTION(strcoll); #endif diff --git a/ext/standard/string.c b/ext/standard/string.c index fa59ddd06f..4389e10702 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -64,6 +64,8 @@ /* For str_getcsv() support */ #include "ext/standard/file.h" +/* For php_next_utf8_char() */ +#include "ext/standard/html.h" #define STR_PAD_LEFT 0 #define STR_PAD_RIGHT 1 @@ -5653,6 +5655,98 @@ PHP_FUNCTION(substr_compare) } /* }}} */ +/* {{{ */ +static zend_string *php_utf8_encode(const char *s, size_t len) +{ + size_t pos = len; + zend_string *str; + unsigned char c; + + str = zend_string_safe_alloc(len, 2, 0, 0); + ZSTR_LEN(str) = 0; + while (pos > 0) { + /* The lower 256 codepoints of Unicode are identical to Latin-1, + * so we don't need to do any mapping here. */ + c = (unsigned char)(*s); + if (c < 0x80) { + ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c; + /* We only account for the single-byte and two-byte cases because + * we're only dealing with the first 256 Unicode codepoints. */ + } else { + ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6)); + ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f)); + } + pos--; + s++; + } + ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0'; + str = zend_string_truncate(str, ZSTR_LEN(str), 0); + return str; +} +/* }}} */ + +/* {{{ */ +static zend_string *php_utf8_decode(const char *s, size_t len) +{ + size_t pos = 0; + unsigned int c; + zend_string *str; + + str = zend_string_alloc(len, 0); + ZSTR_LEN(str) = 0; + while (pos < len) { + int status = FAILURE; + c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status); + + /* The lower 256 codepoints of Unicode are identical to Latin-1, + * so we don't need to do any mapping here beyond replacing non-Latin-1 + * characters. */ + if (status == FAILURE || c > 0xFFU) { + c = '?'; + } + + ZSTR_VAL(str)[ZSTR_LEN(str)++] = c; + } + ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0'; + if (ZSTR_LEN(str) < len) { + str = zend_string_truncate(str, ZSTR_LEN(str), 0); + } + + return str; +} +/* }}} */ + + +/* {{{ proto string utf8_encode(string data) + Encodes an ISO-8859-1 string to UTF-8 */ +PHP_FUNCTION(utf8_encode) +{ + char *arg; + size_t arg_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) { + return; + } + + RETURN_STR(php_utf8_encode(arg, arg_len)); +} +/* }}} */ + +/* {{{ proto string utf8_decode(string data) + Converts a UTF-8 encoded string to ISO-8859-1 */ +PHP_FUNCTION(utf8_decode) +{ + char *arg; + size_t arg_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) { + return; + } + + RETURN_STR(php_utf8_decode(arg, arg_len)); +} +/* }}} */ + /* * Local variables: * tab-width: 4 diff --git a/ext/xml/tests/bug43957.phpt b/ext/standard/tests/strings/bug43957.phpt index f11d15627b..0380787b73 100644 --- a/ext/xml/tests/bug43957.phpt +++ b/ext/standard/tests/strings/bug43957.phpt @@ -1,10 +1,5 @@ --TEST-- Bug #43957 (utf8_decode() bogus conversion on multibyte indicator near end of string) ---SKIPIF-- -<?php -require_once("skipif.inc"); -if (!extension_loaded('xml')) die ("skip xml extension not available"); -?> --FILE-- <?php echo utf8_decode('abc'.chr(0xe0)); diff --git a/ext/xml/tests/bug49687.phpt b/ext/standard/tests/strings/bug49687.phpt index 3ff19cee7e..99e8dc3ec6 100644 --- a/ext/xml/tests/bug49687.phpt +++ b/ext/standard/tests/strings/bug49687.phpt @@ -1,10 +1,5 @@ --TEST--
Bug #49687 Several utf8_decode deficiencies and vulnerabilities
---SKIPIF--
-<?php
-require_once("skipif.inc");
-if (!extension_loaded('xml')) die ("skip xml extension not available");
-?>
--FILE--
<?php
diff --git a/ext/xml/tests/xml006.phpt b/ext/standard/tests/strings/utf8.phpt index c714e85913..aea04fdecd 100644 --- a/ext/xml/tests/xml006.phpt +++ b/ext/standard/tests/strings/utf8.phpt @@ -1,7 +1,5 @@ --TEST-- UTF-8<->ISO Latin 1 encoding/decoding test ---SKIPIF-- -<?php include("skipif.inc"); ?> --FILE-- <?php printf("%s -> %s\n", urlencode("æ"), urlencode(utf8_encode("æ"))); diff --git a/ext/xml/tests/utf8_decode_error.phpt b/ext/standard/tests/strings/utf8_decode_error.phpt index 8735fd82f6..911cc15cfc 100644 --- a/ext/xml/tests/utf8_decode_error.phpt +++ b/ext/standard/tests/strings/utf8_decode_error.phpt @@ -1,16 +1,10 @@ --TEST-- Test utf8_decode() function : error conditions ---SKIPIF-- -<?php -if (!extension_loaded("xml")) { - print "skip - XML extension not loaded"; -} -?> --FILE-- <?php /* Prototype : proto string utf8_decode(string data) * Description: Converts a UTF-8 encoded string to ISO-8859-1 - * Source code: ext/xml/xml.c + * Source code: ext/standard/string.c * Alias to functions: */ diff --git a/ext/xml/tests/utf8_decode_variation1.phpt b/ext/standard/tests/strings/utf8_decode_variation1.phpt index 4b9679a895..f564b87da0 100644 --- a/ext/xml/tests/utf8_decode_variation1.phpt +++ b/ext/standard/tests/strings/utf8_decode_variation1.phpt @@ -1,16 +1,10 @@ --TEST-- Test utf8_decode() function : usage variations - different types for data ---SKIPIF-- -<?php -if (!extension_loaded("xml")) { - print "skip - XML extension not loaded"; -} -?> --FILE-- <?php /* Prototype : proto string utf8_decode(string data) * Description: Converts a UTF-8 encoded string to ISO-8859-1 - * Source code: ext/xml/xml.c + * Source code: ext/standard/string.c * Alias to functions: */ diff --git a/ext/xml/tests/utf8_encode_error.phpt b/ext/standard/tests/strings/utf8_encode_error.phpt index a82f98ff3b..e12f0978b6 100644 --- a/ext/xml/tests/utf8_encode_error.phpt +++ b/ext/standard/tests/strings/utf8_encode_error.phpt @@ -1,16 +1,10 @@ --TEST-- Test utf8_encode() function : error conditions ---SKIPIF-- -<?php -if (!extension_loaded("xml")) { - print "skip - XML extension not loaded"; -} -?> --FILE-- <?php /* Prototype : proto string utf8_encode(string data) * Description: Encodes an ISO-8859-1 string to UTF-8 - * Source code: ext/xml/xml.c + * Source code: ext/standard/string.c * Alias to functions: */ diff --git a/ext/xml/tests/utf8_encode_variation1.phpt b/ext/standard/tests/strings/utf8_encode_variation1.phpt index 04b956c422..fa4b79976e 100644 --- a/ext/xml/tests/utf8_encode_variation1.phpt +++ b/ext/standard/tests/strings/utf8_encode_variation1.phpt @@ -1,16 +1,10 @@ --TEST-- Test utf8_encode() function : usage variations - <type here specifics of this variation> ---SKIPIF-- -<?php -if (!extension_loaded("xml")) { - print "skip - XML extension not loaded"; -} -?> --FILE-- <?php /* Prototype : proto string utf8_encode(string data) * Description: Encodes an ISO-8859-1 string to UTF-8 - * Source code: ext/xml/xml.c + * Source code: ext/standard/string.c * Alias to functions: */ diff --git a/ext/xml/xml.c b/ext/xml/xml.c index f0da47dc5b..f8d72523a0 100644 --- a/ext/xml/xml.c +++ b/ext/xml/xml.c @@ -212,14 +212,6 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_get_option, 0, 0, 2) ZEND_ARG_INFO(0, option) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1) - ZEND_ARG_INFO(0, data) -ZEND_END_ARG_INFO() - -ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1) - ZEND_ARG_INFO(0, data) -ZEND_END_ARG_INFO() - const zend_function_entry xml_functions[] = { PHP_FE(xml_parser_create, arginfo_xml_parser_create) PHP_FE(xml_parser_create_ns, arginfo_xml_parser_create_ns) @@ -243,8 +235,6 @@ const zend_function_entry xml_functions[] = { PHP_FE(xml_parser_free, arginfo_xml_parser_free) PHP_FE(xml_parser_set_option, arginfo_xml_parser_set_option) PHP_FE(xml_parser_get_option, arginfo_xml_parser_get_option) - PHP_FE(utf8_encode, arginfo_utf8_encode) - PHP_FE(utf8_decode, arginfo_utf8_decode) PHP_FE_END }; @@ -1667,46 +1657,6 @@ PHP_FUNCTION(xml_parser_get_option) } /* }}} */ -/* {{{ proto string utf8_encode(string data) - Encodes an ISO-8859-1 string to UTF-8 */ -PHP_FUNCTION(utf8_encode) -{ - char *arg; - size_t arg_len; - zend_string *encoded; - - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) { - return; - } - - encoded = xml_utf8_encode(arg, arg_len, (XML_Char*)"ISO-8859-1"); - if (encoded == NULL) { - RETURN_FALSE; - } - RETURN_STR(encoded); -} -/* }}} */ - -/* {{{ proto string utf8_decode(string data) - Converts a UTF-8 encoded string to ISO-8859-1 */ -PHP_FUNCTION(utf8_decode) -{ - char *arg; - size_t arg_len; - zend_string *decoded; - - if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) { - return; - } - - decoded = xml_utf8_decode((XML_Char*)arg, arg_len, (XML_Char*)"ISO-8859-1"); - if (decoded == NULL) { - RETURN_FALSE; - } - RETURN_STR(decoded); -} -/* }}} */ - #endif /* |