diff options
Diffstat (limited to 'ext/standard/string.c')
-rw-r--r-- | ext/standard/string.c | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/ext/standard/string.c b/ext/standard/string.c index fa59ddd06f..4389e10702 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -64,6 +64,8 @@ /* For str_getcsv() support */ #include "ext/standard/file.h" +/* For php_next_utf8_char() */ +#include "ext/standard/html.h" #define STR_PAD_LEFT 0 #define STR_PAD_RIGHT 1 @@ -5653,6 +5655,98 @@ PHP_FUNCTION(substr_compare) } /* }}} */ +/* {{{ */ +static zend_string *php_utf8_encode(const char *s, size_t len) +{ + size_t pos = len; + zend_string *str; + unsigned char c; + + str = zend_string_safe_alloc(len, 2, 0, 0); + ZSTR_LEN(str) = 0; + while (pos > 0) { + /* The lower 256 codepoints of Unicode are identical to Latin-1, + * so we don't need to do any mapping here. */ + c = (unsigned char)(*s); + if (c < 0x80) { + ZSTR_VAL(str)[ZSTR_LEN(str)++] = (char) c; + /* We only account for the single-byte and two-byte cases because + * we're only dealing with the first 256 Unicode codepoints. */ + } else { + ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0xc0 | (c >> 6)); + ZSTR_VAL(str)[ZSTR_LEN(str)++] = (0x80 | (c & 0x3f)); + } + pos--; + s++; + } + ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0'; + str = zend_string_truncate(str, ZSTR_LEN(str), 0); + return str; +} +/* }}} */ + +/* {{{ */ +static zend_string *php_utf8_decode(const char *s, size_t len) +{ + size_t pos = 0; + unsigned int c; + zend_string *str; + + str = zend_string_alloc(len, 0); + ZSTR_LEN(str) = 0; + while (pos < len) { + int status = FAILURE; + c = php_next_utf8_char((const unsigned char*)s, (size_t) len, &pos, &status); + + /* The lower 256 codepoints of Unicode are identical to Latin-1, + * so we don't need to do any mapping here beyond replacing non-Latin-1 + * characters. */ + if (status == FAILURE || c > 0xFFU) { + c = '?'; + } + + ZSTR_VAL(str)[ZSTR_LEN(str)++] = c; + } + ZSTR_VAL(str)[ZSTR_LEN(str)] = '\0'; + if (ZSTR_LEN(str) < len) { + str = zend_string_truncate(str, ZSTR_LEN(str), 0); + } + + return str; +} +/* }}} */ + + +/* {{{ proto string utf8_encode(string data) + Encodes an ISO-8859-1 string to UTF-8 */ +PHP_FUNCTION(utf8_encode) +{ + char *arg; + size_t arg_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) { + return; + } + + RETURN_STR(php_utf8_encode(arg, arg_len)); +} +/* }}} */ + +/* {{{ proto string utf8_decode(string data) + Converts a UTF-8 encoded string to ISO-8859-1 */ +PHP_FUNCTION(utf8_decode) +{ + char *arg; + size_t arg_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &arg, &arg_len) == FAILURE) { + return; + } + + RETURN_STR(php_utf8_decode(arg, arg_len)); +} +/* }}} */ + /* * Local variables: * tab-width: 4 |