summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbstring.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r--ext/mbstring/mbstring.c709
1 files changed, 652 insertions, 57 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index f5ae57c719..d6d7cfc432 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -430,6 +430,21 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1)
+ ZEND_ARG_INFO(0, str)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1)
+ ZEND_ARG_INFO(0, str)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1)
+ ZEND_ARG_INFO(0, cp)
+ ZEND_ARG_INFO(0, encoding)
+ZEND_END_ARG_INFO()
+
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
ZEND_ARG_INFO(0, encoding)
ZEND_END_ARG_INFO()
@@ -555,6 +570,9 @@ const zend_function_entry mbstring_functions[] = {
PHP_FE(mb_send_mail, arginfo_mb_send_mail)
PHP_FE(mb_get_info, arginfo_mb_get_info)
PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
+ PHP_FE(mb_ord, arginfo_mb_ord)
+ PHP_FE(mb_chr, arginfo_mb_chr)
+ PHP_FE(mb_scrub, arginfo_mb_scrub)
#if HAVE_MBREGEX
PHP_MBREGEX_FUNCTION_ENTRIES
#endif
@@ -3137,8 +3155,33 @@ PHP_FUNCTION(mb_strimwidth)
}
/* }}} */
+
+/* See mbfl_no_encoding definition for list of unsupported encodings */
+static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
+{
+ return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
+ || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
+ || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
+ || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
+}
+
+
+/* See mbfl_no_encoding definition for list of unicode encodings */
+static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc)
+{
+ return (no_enc >= mbfl_no_encoding_ucs4 && no_enc <= mbfl_no_encoding_utf8_sb);
+}
+
+
+/* See mbfl_no_encoding definition for list of UTF-8 encodings */
+static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
+{
+ return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
+}
+
+
/* {{{ MBSTRING_API char *php_mb_convert_encoding() */
-MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
+MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len)
{
mbfl_string string, result, *ret;
const mbfl_encoding *from_encoding, *to_encoding;
@@ -3207,7 +3250,28 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
return NULL;
}
mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
- mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+
+ if (string.no_encoding == MBSTRG(current_internal_encoding)->no_encoding) {
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ } else if (php_mb_is_no_encoding_unicode(string.no_encoding) && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
+
+ if (php_mb_is_no_encoding_utf8(string.no_encoding)) {
+
+ if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff &&
+ 0xe000 > MBSTRG(current_filter_illegal_substchar)
+ ) {
+ mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
+ } else {
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ }
+
+ } else {
+ mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
+ }
+
+ } else {
+ mbfl_buffer_converter_illegal_substchar(convd, 0x3f);
+ }
/* do it */
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
@@ -3224,12 +3288,83 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co
}
/* }}} */
+MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings)
+{
+ HashTable *output, *chash;
+ zend_long idx;
+ zend_string *key, *key_tmp;
+ zval *entry, entry_tmp;
+ size_t ckey_len, cval_len;
+ char *ckey, *cval;
+
+ if (!input) {
+ return NULL;
+ }
+
+ if (input->u.v.nApplyCount++ > 1) {
+ input->u.v.nApplyCount--;
+ php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
+ return NULL;
+ }
+ output = (HashTable *)emalloc(sizeof(HashTable));
+ zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0);
+ ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
+ /* convert key */
+ if (key) {
+ ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len);
+ key_tmp = zend_string_init(ckey, ckey_len, 0);
+ }
+ /* convert value */
+ ZEND_ASSERT(entry);
+ switch(Z_TYPE_P(entry)) {
+ case IS_STRING:
+ cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len);
+ ZVAL_STRINGL(&entry_tmp, cval, cval_len);
+ efree(cval);
+ break;
+ case IS_NULL:
+ case IS_TRUE:
+ case IS_FALSE:
+ case IS_LONG:
+ case IS_DOUBLE:
+ ZVAL_COPY(&entry_tmp, entry);
+ break;
+ case IS_ARRAY:
+ chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings);
+ if (!chash) {
+ chash = (HashTable *)emalloc(sizeof(HashTable));
+ zend_hash_init(chash, 0, NULL, ZVAL_PTR_DTOR, 0);
+ }
+ ZVAL_ARR(&entry_tmp, chash);
+ break;
+ case IS_OBJECT:
+ default:
+ if (key) {
+ efree(key_tmp);
+ }
+ php_error_docref(NULL, E_WARNING, "Object is not supported");
+ continue;
+ }
+ if (key) {
+ zend_hash_add(output, key_tmp, &entry_tmp);
+ } else {
+ zend_hash_index_add(output, idx, &entry_tmp);
+ }
+ } ZEND_HASH_FOREACH_END();
+ input->u.v.nApplyCount--;
+
+ return output;
+}
+/* }}} */
+
+
/* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)
{
- char *arg_str, *arg_new;
- size_t str_len, new_len;
+ zval *input;
+ char *arg_new;
+ size_t new_len;
zval *arg_old = NULL;
size_t size, l, n;
char *_from_encodings = NULL, *ret, *s_free = NULL;
@@ -3237,10 +3372,14 @@ PHP_FUNCTION(mb_convert_encoding)
zval *hash_entry;
HashTable *target_hash;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) {
return;
}
+ if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) {
+ convert_to_string(input);
+ }
+
if (arg_old) {
switch (Z_TYPE_P(arg_old)) {
case IS_ARRAY:
@@ -3275,19 +3414,26 @@ PHP_FUNCTION(mb_convert_encoding)
}
}
- /* new encoding */
- ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size);
- if (ret != NULL) {
- // TODO: avoid reallocation ???
- RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
- efree(ret);
+ if (Z_TYPE_P(input) == IS_STRING) {
+ /* new encoding */
+ ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size);
+ if (ret != NULL) {
+ // TODO: avoid reallocation ???
+ RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
+ efree(ret);
+ } else {
+ RETVAL_FALSE;
+ }
+ if (s_free) {
+ efree(s_free);
+ }
} else {
- RETVAL_FALSE;
+ HashTable *tmp;
+ tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings);
+ RETURN_ARR(tmp);
}
- if ( s_free) {
- efree(s_free);
- }
+ return;
}
/* }}} */
@@ -4257,11 +4403,11 @@ PHP_FUNCTION(mb_send_mail)
size_t to_len;
char *message = NULL;
size_t message_len;
- char *headers = NULL;
- size_t headers_len;
char *subject = NULL;
- zend_string *extra_cmd = NULL;
size_t subject_len;
+ zval *headers = NULL;
+ zend_string *extra_cmd = NULL;
+ zend_string *str_headers=NULL, *tmp_headers;
int i;
char *to_r = NULL;
char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
@@ -4301,7 +4447,7 @@ PHP_FUNCTION(mb_send_mail)
body_enc = lang->mail_body_encoding;
}
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) {
return;
}
@@ -4310,7 +4456,20 @@ PHP_FUNCTION(mb_send_mail)
MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
if (headers) {
- MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
+ switch(Z_TYPE_P(headers)) {
+ case IS_STRING:
+ tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0);
+ MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers));
+ str_headers = php_trim(tmp_headers, NULL, 0, 2);
+ zend_string_release(tmp_headers);
+ break;
+ case IS_ARRAY:
+ str_headers = php_mail_build_headers(headers);
+ break;
+ default:
+ php_error_docref(NULL, E_WARNING, "headers parameter must be string or array");
+ RETURN_FALSE;
+ }
}
if (extra_cmd) {
MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd));
@@ -4318,8 +4477,8 @@ PHP_FUNCTION(mb_send_mail)
zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
- if (headers != NULL) {
- _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
+ if (str_headers != NULL) {
+ _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
}
if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
@@ -4462,10 +4621,11 @@ PHP_FUNCTION(mb_send_mail)
#define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
#define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
#define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
- if (headers != NULL) {
- p = headers;
- n = headers_len;
+ if (str_headers != NULL) {
+ p = ZSTR_VAL(str_headers);
+ n = ZSTR_LEN(str_headers);
mbfl_memory_device_strncat(&device, p, n);
+ zend_string_release(str_headers);
if (n > 0 && p[n - 1] != '\n') {
mbfl_memory_device_strncat(&device, "\n", 1);
}
@@ -4498,7 +4658,7 @@ PHP_FUNCTION(mb_send_mail)
mbfl_memory_device_unput(&device);
mbfl_memory_device_output('\0', &device);
- headers = (char *)device.buffer;
+ str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
if (force_extra_parameters) {
extra_cmd = php_escape_shell_cmd(force_extra_parameters);
@@ -4506,7 +4666,7 @@ PHP_FUNCTION(mb_send_mail)
extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
}
- if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
+ if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
RETVAL_TRUE;
} else {
RETVAL_FALSE;
@@ -4527,6 +4687,9 @@ PHP_FUNCTION(mb_send_mail)
}
mbfl_memory_device_clear(&device);
zend_hash_destroy(&ht_headers);
+ if (str_headers) {
+ zend_string_release(str_headers);
+ }
}
#undef SKIP_LONG_HEADER_SEP_MBSTRING
@@ -4715,13 +4878,51 @@ PHP_FUNCTION(mb_get_info)
}
/* }}} */
-MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
+
+static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
{
- const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
mbfl_buffer_converter *convd;
+
+ convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
+ if (convd == NULL) {
+ return NULL;
+ }
+ mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
+ mbfl_buffer_converter_illegal_substchar(convd, 0);
+ return convd;
+}
+
+
+static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
mbfl_string string, result, *ret = NULL;
long illegalchars = 0;
+ /* initialize string */
+ mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
+ mbfl_string_init(&result);
+
+ string.val = (unsigned char *) input;
+ string.len = length;
+
+ ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+ illegalchars = mbfl_buffer_illegalchars(convd);
+
+ if (ret != NULL) {
+ if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
+ mbfl_string_clear(&result);
+ return 1;
+ }
+ mbfl_string_clear(&result);
+ }
+ return 0;
+}
+
+
+MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc)
+{
+ const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
+ mbfl_buffer_converter *convd;
+
if (input == NULL) {
return MBSTRG(illegalchars) == 0;
}
@@ -4734,60 +4935,454 @@ MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const c
}
}
- convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
-
+ convd = php_mb_init_convd(encoding);
if (convd == NULL) {
php_error_docref(NULL, E_WARNING, "Unable to create converter");
return 0;
}
- mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
- mbfl_buffer_converter_illegal_substchar(convd, 0);
+ if (php_mb_check_encoding_impl(convd, input, length, encoding)) {
+ mbfl_buffer_converter_delete(convd);
+ return 1;
+ }
+ mbfl_buffer_converter_delete(convd);
+ return 0;
+}
- /* initialize string */
- mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
- mbfl_string_init(&result);
- string.val = (unsigned char *) input;
- string.len = length;
+MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc)
+{
+ const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
+ mbfl_buffer_converter *convd;
+ zend_long idx;
+ zend_string *key;
+ zval *entry;
+ int valid = 1;
- ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
- illegalchars = mbfl_buffer_illegalchars(convd);
- mbfl_buffer_converter_delete(convd);
+ (void)(idx);
- if (ret != NULL) {
- if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
- mbfl_string_clear(&result);
- return 1;
+ if (enc != NULL) {
+ encoding = mbfl_name2encoding(ZSTR_VAL(enc));
+ if (!encoding || encoding == &mbfl_encoding_pass) {
+ php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc));
+ return 0;
}
+ }
- mbfl_string_clear(&result);
+ convd = php_mb_init_convd(encoding);
+ if (convd == NULL) {
+ php_error_docref(NULL, E_WARNING, "Unable to create converter");
+ return 0;
}
- return 0;
+ if (vars->u.v.nApplyCount++ > 1) {
+ vars->u.v.nApplyCount--;
+ mbfl_buffer_converter_delete(convd);
+ php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
+ return 0;
+ }
+ ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
+ ZVAL_DEREF(entry);
+ if (key) {
+ if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
+ valid = 0;
+ break;
+ }
+ }
+ switch (Z_TYPE_P(entry)) {
+ case IS_STRING:
+ if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
+ valid = 0;
+ break;
+ }
+ break;
+ case IS_ARRAY:
+ if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) {
+ valid = 0;
+ break;
+ }
+ break;
+ case IS_LONG:
+ case IS_DOUBLE:
+ case IS_NULL:
+ case IS_TRUE:
+ case IS_FALSE:
+ break;
+ default:
+ /* Other types are error. */
+ valid = 0;
+ break;
+ }
+ } ZEND_HASH_FOREACH_END();
+ vars->u.v.nApplyCount--;
+ mbfl_buffer_converter_delete(convd);
+ return valid;
}
-/* {{{ proto bool mb_check_encoding([string var[, string encoding]])
+
+/* {{{ proto bool mb_check_encoding([mixed var[, string encoding]])
Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)
{
- char *var = NULL;
- size_t var_len;
- char *enc = NULL;
- size_t enc_len;
+ zval *input = NULL;
+ zend_string *enc = NULL;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) {
return;
- }
+ }
- RETVAL_FALSE;
+ /* FIXME: Actually check all inputs, except $_FILES file content. */
+ if (input == NULL) {
+ if (MBSTRG(illegalchars) == 0) {
+ RETURN_TRUE;
+ }
+ RETURN_FALSE;
+ }
- if (php_mb_check_encoding(var, var_len, enc)) {
- RETVAL_TRUE;
+ switch(Z_TYPE_P(input)) {
+ case IS_LONG:
+ case IS_DOUBLE:
+ case IS_NULL:
+ case IS_TRUE:
+ case IS_FALSE:
+ RETURN_TRUE;
+ break;
+ case IS_STRING:
+ if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) {
+ RETURN_FALSE;
+ }
+ break;
+ case IS_ARRAY:
+ if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) {
+ RETURN_FALSE;
+ }
+ break;
+ default:
+ php_error_docref(NULL, E_WARNING, "Input is something other than scalar or array");
+ RETURN_FALSE;
+ }
+ RETURN_TRUE;
+}
+/* }}} */
+
+
+static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc)
+{
+ enum mbfl_no_encoding no_enc;
+ char* ret;
+ size_t ret_len;
+ const mbfl_encoding *encoding;
+ unsigned char char_len;
+ zend_long cp;
+
+ if (enc == NULL) {
+ no_enc = MBSTRG(current_internal_encoding)->no_encoding;
+ } else {
+ no_enc = mbfl_name2no_encoding(enc);
+
+ if (no_enc == mbfl_no_encoding_invalid) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ return -1;
+ }
+ }
+
+ if (php_mb_is_no_encoding_unicode(no_enc)) {
+
+ ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len);
+
+ if (ret == NULL) {
+ return -1;
+ }
+
+ cp = (unsigned char) ret[0] << 24 | \
+ (unsigned char) ret[1] << 16 | \
+ (unsigned char) ret[2] << 8 | \
+ (unsigned char) ret[3];
+
+ efree(ret);
+
+ return cp;
+
+ } else if (php_mb_is_unsupported_no_encoding(no_enc)) {
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
+ return -1;
+ }
+
+ ret = php_mb_convert_encoding(str, str_len, enc, enc, &ret_len);
+
+ if (ret == NULL) {
+ return -1;
+ }
+
+ encoding = mbfl_no2encoding(no_enc);
+ char_len = php_mb_mbchar_bytes_ex(ret, encoding);
+
+ if (char_len == 1) {
+ cp = (unsigned char) ret[0];
+ } else if (char_len == 2) {
+ cp = ((unsigned char) ret[0] << 8) | \
+ (unsigned char) ret[1];
+ } else if (char_len == 3) {
+ cp = ((unsigned char) ret[0] << 16) | \
+ ((unsigned char) ret[1] << 8) | \
+ (unsigned char) ret[2];
+ } else {
+ cp = ((unsigned char) ret[0] << 24) | \
+ ((unsigned char) ret[1] << 16) | \
+ ((unsigned char) ret[2] << 8) | \
+ (unsigned char) ret[3];
}
+
+ efree(ret);
+
+ return cp;
+}
+
+
+/* {{{ proto bool mb_ord([string str[, string encoding]]) */
+PHP_FUNCTION(mb_ord)
+{
+ char* str;
+ size_t str_len;
+ char* enc = NULL;
+ size_t enc_len;
+ zend_long cp;
+
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_STRING(str, str_len)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ ZEND_PARSE_PARAMETERS_END();
+
+ cp = php_mb_ord(str, str_len, enc);
+
+ if (0 > cp) {
+ RETURN_FALSE;
+ }
+
+ RETURN_LONG(cp);
}
/* }}} */
+
+static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len)
+{
+ enum mbfl_no_encoding no_enc;
+ char* buf;
+ size_t buf_len;
+ char* ret;
+ size_t ret_len;
+
+ if (enc == NULL) {
+ no_enc = MBSTRG(current_internal_encoding)->no_encoding;
+ } else {
+ no_enc = mbfl_name2no_encoding(enc);
+ if (no_enc == mbfl_no_encoding_invalid) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ return NULL;
+ }
+ }
+
+ if (php_mb_is_no_encoding_utf8(no_enc)) {
+
+ if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) {
+ if (php_mb_is_no_encoding_utf8(MBSTRG(current_internal_encoding)->no_encoding)) {
+ cp = MBSTRG(current_filter_illegal_substchar);
+ } else if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
+ if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) {
+ cp = MBSTRG(current_filter_illegal_substchar);
+ } else {
+ cp = 0x3f;
+ }
+ } else {
+ cp = 0x3f;
+ }
+ }
+
+ if (cp < 0x80) {
+ ret_len = 1;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = cp;
+ ret[1] = 0;
+ } else if (cp < 0x800) {
+ ret_len = 2;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xc0 | (cp >> 6);
+ ret[1] = 0x80 | (cp & 0x3f);
+ ret[2] = 0;
+ } else if (cp < 0x10000) {
+ ret_len = 3;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xe0 | (cp >> 12);
+ ret[1] = 0x80 | ((cp >> 6) & 0x3f);
+ ret[2] = 0x80 | (cp & 0x3f);
+ ret[3] = 0;
+ } else {
+ ret_len = 4;
+ ret = (char *) safe_emalloc(ret_len, 1, 1);
+ ret[0] = 0xf0 | (cp >> 18);
+ ret[1] = 0x80 | ((cp >> 12) & 0x3f);
+ ret[2] = 0x80 | ((cp >> 6) & 0x3f);
+ ret[3] = 0x80 | (cp & 0x3f);
+ ret[4] = 0;
+ }
+
+ if (output_len) {
+ *output_len = ret_len;
+ }
+
+ return ret;
+
+ } else if (php_mb_is_no_encoding_unicode(no_enc)) {
+
+ if (0 > cp || 0x10ffff < cp) {
+
+ if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) {
+ cp = MBSTRG(current_filter_illegal_substchar);
+ } else {
+ cp = 0x3f;
+ }
+
+ }
+
+ buf_len = 4;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = (cp >> 24) & 0xff;
+ buf[1] = (cp >> 16) & 0xff;
+ buf[2] = (cp >> 8) & 0xff;
+ buf[3] = cp & 0xff;
+ buf[4] = 0;
+
+ ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len);
+ efree(buf);
+
+ if (output_len) {
+ *output_len = ret_len;
+ }
+
+ return ret;
+
+ } else if (php_mb_is_unsupported_no_encoding(no_enc)) {
+ php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc);
+ return NULL;
+ }
+
+ if (0 > cp || cp > 0x100000000) {
+ if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) {
+ cp = MBSTRG(current_filter_illegal_substchar);
+ } else {
+ cp = 0x3f;
+ }
+ }
+
+ if (cp < 0x100) {
+ buf_len = 1;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = cp;
+ buf[1] = 0;
+ } else if (cp < 0x10000) {
+ buf_len = 2;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = cp >> 8;
+ buf[1] = cp & 0xff;
+ buf[2] = 0;
+ } else if (cp < 0x1000000) {
+ buf_len = 3;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = cp >> 16;
+ buf[1] = (cp >> 8) & 0xff;
+ buf[2] = cp & 0xff;
+ buf[3] = 0;
+ } else {
+ buf_len = 4;
+ buf = (char *) safe_emalloc(buf_len, 1, 1);
+ buf[0] = cp >> 24;
+ buf[1] = (cp >> 16) & 0xff;
+ buf[2] = (cp >> 8) & 0xff;
+ buf[3] = cp & 0xff;
+ buf[4] = 0;
+ }
+
+ ret = php_mb_convert_encoding(buf, buf_len, enc, enc, &ret_len);
+ efree(buf);
+
+ if (output_len) {
+ *output_len = ret_len;
+ }
+
+ return ret;
+}
+
+
+/* {{{ proto bool mb_ord([int cp[, string encoding]]) */
+PHP_FUNCTION(mb_chr)
+{
+ zend_long cp;
+ char* enc = NULL;
+ size_t enc_len;
+ char* ret;
+ size_t ret_len;
+
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_LONG(cp)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ ZEND_PARSE_PARAMETERS_END();
+
+ ret = php_mb_chr(cp, enc, &ret_len);
+
+ if (ret == NULL) {
+ RETURN_FALSE;
+ }
+
+ RETVAL_STRING(ret);
+ efree(ret);
+}
+/* }}} */
+
+
+static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc)
+{
+ size_t ret_len;
+
+ return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len);
+}
+
+
+/* {{{ proto bool mb_scrub([string str[, string encoding]]) */
+PHP_FUNCTION(mb_scrub)
+{
+ char* str;
+ size_t str_len;
+ char *enc = NULL;
+ size_t enc_len;
+ char *ret;
+
+ ZEND_PARSE_PARAMETERS_START(1, 2)
+ Z_PARAM_STRING(str, str_len)
+ Z_PARAM_OPTIONAL
+ Z_PARAM_STRING(enc, enc_len)
+ ZEND_PARSE_PARAMETERS_END();
+
+ if (enc == NULL) {
+ enc = (char *) MBSTRG(current_internal_encoding)->name;
+ } else if (!mbfl_is_support_encoding(enc)) {
+ php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc);
+ RETURN_FALSE;
+ }
+
+ ret = php_mb_scrub(str, str_len, enc);
+
+ if (ret == NULL) {
+ RETURN_FALSE;
+ }
+
+ RETVAL_STRING(ret);
+ efree(ret);
+}
+/* }}} */
+
+
/* {{{ php_mb_populate_current_detect_order_list */
static void php_mb_populate_current_detect_order_list(void)
{