summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrea Faulds <ajf@ajf.me>2016-10-13 23:33:33 +0100
committerAndrea Faulds <ajf@ajf.me>2016-10-17 15:39:02 +0100
commit1a512eed449128334edf0329b72e53c5caaaa95b (patch)
treec095b8ccdb00a6d364a1b1f642c5a5a218a675d6
parenta5251f78f8c3204c04d93c02b63d2e3967d3e1c1 (diff)
downloadphp-git-1a512eed449128334edf0329b72e53c5caaaa95b.tar.gz
Move utf8_encode and utf8_decode to ext/standard
-rw-r--r--ext/standard/basic_functions.c10
-rw-r--r--ext/standard/php_string.h2
-rw-r--r--ext/standard/string.c94
-rw-r--r--ext/standard/tests/strings/bug43957.phpt (renamed from ext/xml/tests/bug43957.phpt)5
-rw-r--r--ext/standard/tests/strings/bug49687.phpt (renamed from ext/xml/tests/bug49687.phpt)5
-rw-r--r--ext/standard/tests/strings/utf8.phpt (renamed from ext/xml/tests/xml006.phpt)2
-rw-r--r--ext/standard/tests/strings/utf8_decode_error.phpt (renamed from ext/xml/tests/utf8_decode_error.phpt)8
-rw-r--r--ext/standard/tests/strings/utf8_decode_variation1.phpt (renamed from ext/xml/tests/utf8_decode_variation1.phpt)8
-rw-r--r--ext/standard/tests/strings/utf8_encode_error.phpt (renamed from ext/xml/tests/utf8_encode_error.phpt)8
-rw-r--r--ext/standard/tests/strings/utf8_encode_variation1.phpt (renamed from ext/xml/tests/utf8_encode_variation1.phpt)8
-rw-r--r--ext/xml/xml.c50
11 files changed, 110 insertions, 90 deletions
diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c
index 13e8a4e6eb..d528e51908 100644
--- a/ext/standard/basic_functions.c
+++ b/ext/standard/basic_functions.c
@@ -2465,6 +2465,14 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_substr_compare, 0, 0, 3)
ZEND_ARG_INFO(0, length)
ZEND_ARG_INFO(0, case_sensitivity)
ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
+ ZEND_ARG_INFO(0, data)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
+ ZEND_ARG_INFO(0, data)
+ZEND_END_ARG_INFO()
/* }}} */
/* {{{ syslog.c */
#ifdef HAVE_SYSLOG_H
@@ -2764,6 +2772,8 @@ const zend_function_entry basic_functions[] = { /* {{{ */
PHP_FE(str_split, arginfo_str_split)
PHP_FE(strpbrk, arginfo_strpbrk)
PHP_FE(substr_compare, arginfo_substr_compare)
+ PHP_FE(utf8_encode, arginfo_utf8_encode)
+ PHP_FE(utf8_decode, arginfo_utf8_decode)
#ifdef HAVE_STRCOLL
PHP_FE(strcoll, arginfo_strcoll)
diff --git a/ext/standard/php_string.h b/ext/standard/php_string.h
index 14b66e7e13..6fc7587121 100644
--- a/ext/standard/php_string.h
+++ b/ext/standard/php_string.h
@@ -93,6 +93,8 @@ PHP_FUNCTION(str_word_count);
PHP_FUNCTION(str_split);
PHP_FUNCTION(strpbrk);
PHP_FUNCTION(substr_compare);
+PHP_FUNCTION(utf8_encode);
+PHP_FUNCTION(utf8_decode);
#ifdef HAVE_STRCOLL
PHP_FUNCTION(strcoll);
#endif
diff --git a/ext/standard/string.c b/ext/standard/string.c
index fa59ddd06f..4389e10702 100644
--- a/ext/standard/string.c
+++ b/ext/standard/string.c
@@ -64,6 +64,8 @@
/* For str_getcsv() support */
#include "ext/standard/file.h"
+/* For php_next_utf8_char() */
+#include "ext/standard/html.h"
#define STR_PAD_LEFT 0
#define STR_PAD_RIGHT 1
@@ -5653,6 +5655,98 @@ PHP_FUNCTION(substr_compare)
}
/* }}} */
+/* {{{ */
+static zend_string *php_utf8_encode(const char *s, size_t len)
+{
+ size_t pos = len;
+ zend_string *str;
+ unsigned char c;
+
+ str = zend_string_safe_alloc(len, 2, 0, 0);
+ ZSTR_LEN(str) = 0;
+ while (pos > 0) {
+ /* The lower 256 codepoints of Unicode are identical to Latin-1,
+ * so we don't need to do any mapping here. */
+ c = (unsigned char)(*s);
+ if (c < 0x80) {
+ ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c;
+ /* We only account for the single-byte and two-byte cases because
+ * we're only dealing with the first 256 Unicode codepoints. */
+ } else {
+ ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6));
+ ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f));
+ }
+ pos--;
+ s++;
+ }
+ ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
+ str = zend_string_truncate(str, ZSTR_LEN(str), 0);
+ return str;
+}
+/* }}} */
+
+/* {{{ */
+static zend_string *php_utf8_decode(const char *s, size_t len)
+{
+ size_t pos = 0;
+ unsigned int c;
+ zend_string *str;
+
+ str = zend_string_alloc(len, 0);
+ ZSTR_LEN(str) = 0;
+ while (pos < len) {
+ int status = FAILURE;
+ c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status);
+
+ /* The lower 256 codepoints of Unicode are identical to Latin-1,
+ * so we don't need to do any mapping here beyond replacing non-Latin-1
+ * characters. */
+ if (status == FAILURE || c > 0xFFU) {
+ c = '?';
+ }
+
+ ZSTR_VAL(str)[ZSTR_LEN(str)++] = c;
+ }
+ ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0';
+ if (ZSTR_LEN(str) < len) {
+ str = zend_string_truncate(str, ZSTR_LEN(str), 0);
+ }
+
+ return str;
+}
+/* }}} */
+
+
+/* {{{ proto string utf8_encode(string data)
+ Encodes an ISO-8859-1 string to UTF-8 */
+PHP_FUNCTION(utf8_encode)
+{
+ char *arg;
+ size_t arg_len;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
+ return;
+ }
+
+ RETURN_STR(php_utf8_encode(arg, arg_len));
+}
+/* }}} */
+
+/* {{{ proto string utf8_decode(string data)
+ Converts a UTF-8 encoded string to ISO-8859-1 */
+PHP_FUNCTION(utf8_decode)
+{
+ char *arg;
+ size_t arg_len;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
+ return;
+ }
+
+ RETURN_STR(php_utf8_decode(arg, arg_len));
+}
+/* }}} */
+
/*
* Local variables:
* tab-width: 4
diff --git a/ext/xml/tests/bug43957.phpt b/ext/standard/tests/strings/bug43957.phpt
index f11d15627b..0380787b73 100644
--- a/ext/xml/tests/bug43957.phpt
+++ b/ext/standard/tests/strings/bug43957.phpt
@@ -1,10 +1,5 @@
--TEST--
Bug #43957 (utf8_decode() bogus conversion on multibyte indicator near end of string)
---SKIPIF--
-<?php
-require_once("skipif.inc");
-if (!extension_loaded('xml')) die ("skip xml extension not available");
-?>
--FILE--
<?php
echo utf8_decode('abc'.chr(0xe0));
diff --git a/ext/xml/tests/bug49687.phpt b/ext/standard/tests/strings/bug49687.phpt
index 3ff19cee7e..99e8dc3ec6 100644
--- a/ext/xml/tests/bug49687.phpt
+++ b/ext/standard/tests/strings/bug49687.phpt
@@ -1,10 +1,5 @@
--TEST--
Bug #49687 Several utf8_decode deficiencies and vulnerabilities
---SKIPIF--
-<?php
-require_once("skipif.inc");
-if (!extension_loaded('xml')) die ("skip xml extension not available");
-?>
--FILE--
<?php
diff --git a/ext/xml/tests/xml006.phpt b/ext/standard/tests/strings/utf8.phpt
index c714e85913..aea04fdecd 100644
--- a/ext/xml/tests/xml006.phpt
+++ b/ext/standard/tests/strings/utf8.phpt
@@ -1,7 +1,5 @@
--TEST--
UTF-8<->ISO Latin 1 encoding/decoding test
---SKIPIF--
-<?php include("skipif.inc"); ?>
--FILE--
<?php
printf("%s -> %s\n", urlencode("æ"), urlencode(utf8_encode("æ")));
diff --git a/ext/xml/tests/utf8_decode_error.phpt b/ext/standard/tests/strings/utf8_decode_error.phpt
index 8735fd82f6..911cc15cfc 100644
--- a/ext/xml/tests/utf8_decode_error.phpt
+++ b/ext/standard/tests/strings/utf8_decode_error.phpt
@@ -1,16 +1,10 @@
--TEST--
Test utf8_decode() function : error conditions
---SKIPIF--
-<?php
-if (!extension_loaded("xml")) {
- print "skip - XML extension not loaded";
-}
-?>
--FILE--
<?php
/* Prototype : proto string utf8_decode(string data)
* Description: Converts a UTF-8 encoded string to ISO-8859-1
- * Source code: ext/xml/xml.c
+ * Source code: ext/standard/string.c
* Alias to functions:
*/
diff --git a/ext/xml/tests/utf8_decode_variation1.phpt b/ext/standard/tests/strings/utf8_decode_variation1.phpt
index 4b9679a895..f564b87da0 100644
--- a/ext/xml/tests/utf8_decode_variation1.phpt
+++ b/ext/standard/tests/strings/utf8_decode_variation1.phpt
@@ -1,16 +1,10 @@
--TEST--
Test utf8_decode() function : usage variations - different types for data
---SKIPIF--
-<?php
-if (!extension_loaded("xml")) {
- print "skip - XML extension not loaded";
-}
-?>
--FILE--
<?php
/* Prototype : proto string utf8_decode(string data)
* Description: Converts a UTF-8 encoded string to ISO-8859-1
- * Source code: ext/xml/xml.c
+ * Source code: ext/standard/string.c
* Alias to functions:
*/
diff --git a/ext/xml/tests/utf8_encode_error.phpt b/ext/standard/tests/strings/utf8_encode_error.phpt
index a82f98ff3b..e12f0978b6 100644
--- a/ext/xml/tests/utf8_encode_error.phpt
+++ b/ext/standard/tests/strings/utf8_encode_error.phpt
@@ -1,16 +1,10 @@
--TEST--
Test utf8_encode() function : error conditions
---SKIPIF--
-<?php
-if (!extension_loaded("xml")) {
- print "skip - XML extension not loaded";
-}
-?>
--FILE--
<?php
/* Prototype : proto string utf8_encode(string data)
* Description: Encodes an ISO-8859-1 string to UTF-8
- * Source code: ext/xml/xml.c
+ * Source code: ext/standard/string.c
* Alias to functions:
*/
diff --git a/ext/xml/tests/utf8_encode_variation1.phpt b/ext/standard/tests/strings/utf8_encode_variation1.phpt
index 04b956c422..fa4b79976e 100644
--- a/ext/xml/tests/utf8_encode_variation1.phpt
+++ b/ext/standard/tests/strings/utf8_encode_variation1.phpt
@@ -1,16 +1,10 @@
--TEST--
Test utf8_encode() function : usage variations - <type here specifics of this variation>
---SKIPIF--
-<?php
-if (!extension_loaded("xml")) {
- print "skip - XML extension not loaded";
-}
-?>
--FILE--
<?php
/* Prototype : proto string utf8_encode(string data)
* Description: Encodes an ISO-8859-1 string to UTF-8
- * Source code: ext/xml/xml.c
+ * Source code: ext/standard/string.c
* Alias to functions:
*/
diff --git a/ext/xml/xml.c b/ext/xml/xml.c
index f0da47dc5b..f8d72523a0 100644
--- a/ext/xml/xml.c
+++ b/ext/xml/xml.c
@@ -212,14 +212,6 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_xml_parser_get_option, 0, 0, 2)
ZEND_ARG_INFO(0, option)
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_encode, 0, 0, 1)
- ZEND_ARG_INFO(0, data)
-ZEND_END_ARG_INFO()
-
-ZEND_BEGIN_ARG_INFO_EX(arginfo_utf8_decode, 0, 0, 1)
- ZEND_ARG_INFO(0, data)
-ZEND_END_ARG_INFO()
-
const zend_function_entry xml_functions[] = {
PHP_FE(xml_parser_create, arginfo_xml_parser_create)
PHP_FE(xml_parser_create_ns, arginfo_xml_parser_create_ns)
@@ -243,8 +235,6 @@ const zend_function_entry xml_functions[] = {
PHP_FE(xml_parser_free, arginfo_xml_parser_free)
PHP_FE(xml_parser_set_option, arginfo_xml_parser_set_option)
PHP_FE(xml_parser_get_option, arginfo_xml_parser_get_option)
- PHP_FE(utf8_encode, arginfo_utf8_encode)
- PHP_FE(utf8_decode, arginfo_utf8_decode)
PHP_FE_END
};
@@ -1667,46 +1657,6 @@ PHP_FUNCTION(xml_parser_get_option)
}
/* }}} */
-/* {{{ proto string utf8_encode(string data)
- Encodes an ISO-8859-1 string to UTF-8 */
-PHP_FUNCTION(utf8_encode)
-{
- char *arg;
- size_t arg_len;
- zend_string *encoded;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
- return;
- }
-
- encoded = xml_utf8_encode(arg, arg_len, (XML_Char*)"ISO-8859-1");
- if (encoded == NULL) {
- RETURN_FALSE;
- }
- RETURN_STR(encoded);
-}
-/* }}} */
-
-/* {{{ proto string utf8_decode(string data)
- Converts a UTF-8 encoded string to ISO-8859-1 */
-PHP_FUNCTION(utf8_decode)
-{
- char *arg;
- size_t arg_len;
- zend_string *decoded;
-
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) {
- return;
- }
-
- decoded = xml_utf8_decode((XML_Char*)arg, arg_len, (XML_Char*)"ISO-8859-1");
- if (decoded == NULL) {
- RETURN_FALSE;
- }
- RETURN_STR(decoded);
-}
-/* }}} */
-
#endif
/*