diff options
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r-- | ext/mbstring/mbstring.c | 709 |
1 files changed, 652 insertions, 57 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index f5ae57c719..d6d7cfc432 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -430,6 +430,21 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_scrub, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1) + ZEND_ARG_INFO(0, cp) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -555,6 +570,9 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_send_mail, arginfo_mb_send_mail) PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) + PHP_FE(mb_ord, arginfo_mb_ord) + PHP_FE(mb_chr, arginfo_mb_chr) + PHP_FE(mb_scrub, arginfo_mb_scrub) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -3137,8 +3155,33 @@ PHP_FUNCTION(mb_strimwidth) } /* }}} */ + +/* See mbfl_no_encoding definition for list of unsupported encodings */ +static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) +{ + return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint) + || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap) + || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms) + || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222)); +} + + +/* See mbfl_no_encoding definition for list of unicode encodings */ +static inline zend_bool php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc) +{ + return (no_enc >= mbfl_no_encoding_ucs4 && no_enc <= mbfl_no_encoding_utf8_sb); +} + + +/* See mbfl_no_encoding definition for list of UTF-8 encodings */ +static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) +{ + return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb); +} + + /* {{{ MBSTRING_API char *php_mb_convert_encoding() */ -MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len) +MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len) { mbfl_string string, result, *ret; const mbfl_encoding *from_encoding, *to_encoding; @@ -3207,7 +3250,28 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co return NULL; } mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode)); - mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + + if (string.no_encoding == MBSTRG(current_internal_encoding)->no_encoding) { + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + } else if (php_mb_is_no_encoding_unicode(string.no_encoding) && php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + + if (php_mb_is_no_encoding_utf8(string.no_encoding)) { + + if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff && + 0xe000 > MBSTRG(current_filter_illegal_substchar) + ) { + mbfl_buffer_converter_illegal_substchar(convd, 0x3f); + } else { + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + } + + } else { + mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar)); + } + + } else { + mbfl_buffer_converter_illegal_substchar(convd, 0x3f); + } /* do it */ ret = mbfl_buffer_converter_feed_result(convd, &string, &result); @@ -3224,12 +3288,83 @@ MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, co } /* }}} */ +MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const char *_to_encoding, const char *_from_encodings) +{ + HashTable *output, *chash; + zend_long idx; + zend_string *key, *key_tmp; + zval *entry, entry_tmp; + size_t ckey_len, cval_len; + char *ckey, *cval; + + if (!input) { + return NULL; + } + + if (input->u.v.nApplyCount++ > 1) { + input->u.v.nApplyCount--; + php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values"); + return NULL; + } + output = (HashTable *)emalloc(sizeof(HashTable)); + zend_hash_init(output, zend_hash_num_elements(input), NULL, ZVAL_PTR_DTOR, 0); + ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) { + /* convert key */ + if (key) { + ckey = php_mb_convert_encoding(ZSTR_VAL(key), ZSTR_LEN(key), _to_encoding, _from_encodings, &ckey_len); + key_tmp = zend_string_init(ckey, ckey_len, 0); + } + /* convert value */ + ZEND_ASSERT(entry); + switch(Z_TYPE_P(entry)) { + case IS_STRING: + cval = php_mb_convert_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), _to_encoding, _from_encodings, &cval_len); + ZVAL_STRINGL(&entry_tmp, cval, cval_len); + efree(cval); + break; + case IS_NULL: + case IS_TRUE: + case IS_FALSE: + case IS_LONG: + case IS_DOUBLE: + ZVAL_COPY(&entry_tmp, entry); + break; + case IS_ARRAY: + chash = php_mb_convert_encoding_recursive(HASH_OF(entry), _to_encoding, _from_encodings); + if (!chash) { + chash = (HashTable *)emalloc(sizeof(HashTable)); + zend_hash_init(chash, 0, NULL, ZVAL_PTR_DTOR, 0); + } + ZVAL_ARR(&entry_tmp, chash); + break; + case IS_OBJECT: + default: + if (key) { + efree(key_tmp); + } + php_error_docref(NULL, E_WARNING, "Object is not supported"); + continue; + } + if (key) { + zend_hash_add(output, key_tmp, &entry_tmp); + } else { + zend_hash_index_add(output, idx, &entry_tmp); + } + } ZEND_HASH_FOREACH_END(); + input->u.v.nApplyCount--; + + return output; +} +/* }}} */ + + /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding]) Returns converted string in desired encoding */ PHP_FUNCTION(mb_convert_encoding) { - char *arg_str, *arg_new; - size_t str_len, new_len; + zval *input; + char *arg_new; + size_t new_len; zval *arg_old = NULL; size_t size, l, n; char *_from_encodings = NULL, *ret, *s_free = NULL; @@ -3237,10 +3372,14 @@ PHP_FUNCTION(mb_convert_encoding) zval *hash_entry; HashTable *target_hash; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "zs|z", &input, &arg_new, &new_len, &arg_old) == FAILURE) { return; } + if (Z_TYPE_P(input) != IS_STRING && Z_TYPE_P(input) != IS_ARRAY) { + convert_to_string(input); + } + if (arg_old) { switch (Z_TYPE_P(arg_old)) { case IS_ARRAY: @@ -3275,19 +3414,26 @@ PHP_FUNCTION(mb_convert_encoding) } } - /* new encoding */ - ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size); - if (ret != NULL) { - // TODO: avoid reallocation ??? - RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */ - efree(ret); + if (Z_TYPE_P(input) == IS_STRING) { + /* new encoding */ + ret = php_mb_convert_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), arg_new, _from_encodings, &size); + if (ret != NULL) { + // TODO: avoid reallocation ??? + RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */ + efree(ret); + } else { + RETVAL_FALSE; + } + if (s_free) { + efree(s_free); + } } else { - RETVAL_FALSE; + HashTable *tmp; + tmp = php_mb_convert_encoding_recursive(HASH_OF(input), arg_new, _from_encodings); + RETURN_ARR(tmp); } - if ( s_free) { - efree(s_free); - } + return; } /* }}} */ @@ -4257,11 +4403,11 @@ PHP_FUNCTION(mb_send_mail) size_t to_len; char *message = NULL; size_t message_len; - char *headers = NULL; - size_t headers_len; char *subject = NULL; - zend_string *extra_cmd = NULL; size_t subject_len; + zval *headers = NULL; + zend_string *extra_cmd = NULL; + zend_string *str_headers=NULL, *tmp_headers; int i; char *to_r = NULL; char *force_extra_parameters = INI_STR("mail.force_extra_parameters"); @@ -4301,7 +4447,7 @@ PHP_FUNCTION(mb_send_mail) body_enc = lang->mail_body_encoding; } - if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|sS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "sss|zS", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &extra_cmd) == FAILURE) { return; } @@ -4310,7 +4456,20 @@ PHP_FUNCTION(mb_send_mail) MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len); MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len); if (headers) { - MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len); + switch(Z_TYPE_P(headers)) { + case IS_STRING: + tmp_headers = zend_string_init(Z_STRVAL_P(headers), Z_STRLEN_P(headers), 0); + MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(tmp_headers), ZSTR_LEN(tmp_headers)); + str_headers = php_trim(tmp_headers, NULL, 0, 2); + zend_string_release(tmp_headers); + break; + case IS_ARRAY: + str_headers = php_mail_build_headers(headers); + break; + default: + php_error_docref(NULL, E_WARNING, "headers parameter must be string or array"); + RETURN_FALSE; + } } if (extra_cmd) { MAIL_ASCIIZ_CHECK_MBSTRING(ZSTR_VAL(extra_cmd), ZSTR_LEN(extra_cmd)); @@ -4318,8 +4477,8 @@ PHP_FUNCTION(mb_send_mail) zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0); - if (headers != NULL) { - _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len); + if (str_headers != NULL) { + _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers)); } if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) { @@ -4462,10 +4621,11 @@ PHP_FUNCTION(mb_send_mail) #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain" #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset=" #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: " - if (headers != NULL) { - p = headers; - n = headers_len; + if (str_headers != NULL) { + p = ZSTR_VAL(str_headers); + n = ZSTR_LEN(str_headers); mbfl_memory_device_strncat(&device, p, n); + zend_string_release(str_headers); if (n > 0 && p[n - 1] != '\n') { mbfl_memory_device_strncat(&device, "\n", 1); } @@ -4498,7 +4658,7 @@ PHP_FUNCTION(mb_send_mail) mbfl_memory_device_unput(&device); mbfl_memory_device_output('\0', &device); - headers = (char *)device.buffer; + str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0); if (force_extra_parameters) { extra_cmd = php_escape_shell_cmd(force_extra_parameters); @@ -4506,7 +4666,7 @@ PHP_FUNCTION(mb_send_mail) extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd)); } - if (!err && php_mail(to_r, subject, message, headers, extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) { + if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) { RETVAL_TRUE; } else { RETVAL_FALSE; @@ -4527,6 +4687,9 @@ PHP_FUNCTION(mb_send_mail) } mbfl_memory_device_clear(&device); zend_hash_destroy(&ht_headers); + if (str_headers) { + zend_string_release(str_headers); + } } #undef SKIP_LONG_HEADER_SEP_MBSTRING @@ -4715,13 +4878,51 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ -MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc) + +static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding) { - const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_buffer_converter *convd; + + convd = mbfl_buffer_converter_new2(encoding, encoding, 0); + if (convd == NULL) { + return NULL; + } + mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); + mbfl_buffer_converter_illegal_substchar(convd, 0); + return convd; +} + + +static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) { mbfl_string string, result, *ret = NULL; long illegalchars = 0; + /* initialize string */ + mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); + mbfl_string_init(&result); + + string.val = (unsigned char *) input; + string.len = length; + + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); + illegalchars = mbfl_buffer_illegalchars(convd); + + if (ret != NULL) { + if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { + mbfl_string_clear(&result); + return 1; + } + mbfl_string_clear(&result); + } + return 0; +} + + +MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const char *enc) +{ + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); + mbfl_buffer_converter *convd; + if (input == NULL) { return MBSTRG(illegalchars) == 0; } @@ -4734,60 +4935,454 @@ MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const c } } - convd = mbfl_buffer_converter_new2(encoding, encoding, 0); - + convd = php_mb_init_convd(encoding); if (convd == NULL) { php_error_docref(NULL, E_WARNING, "Unable to create converter"); return 0; } - mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); - mbfl_buffer_converter_illegal_substchar(convd, 0); + if (php_mb_check_encoding_impl(convd, input, length, encoding)) { + mbfl_buffer_converter_delete(convd); + return 1; + } + mbfl_buffer_converter_delete(convd); + return 0; +} - /* initialize string */ - mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); - mbfl_string_init(&result); - string.val = (unsigned char *) input; - string.len = length; +MBSTRING_API int php_mb_check_encoding_recursive(HashTable *vars, const zend_string *enc) +{ + const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); + mbfl_buffer_converter *convd; + zend_long idx; + zend_string *key; + zval *entry; + int valid = 1; - ret = mbfl_buffer_converter_feed_result(convd, &string, &result); - illegalchars = mbfl_buffer_illegalchars(convd); - mbfl_buffer_converter_delete(convd); + (void)(idx); - if (ret != NULL) { - if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { - mbfl_string_clear(&result); - return 1; + if (enc != NULL) { + encoding = mbfl_name2encoding(ZSTR_VAL(enc)); + if (!encoding || encoding == &mbfl_encoding_pass) { + php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", ZSTR_VAL(enc)); + return 0; } + } - mbfl_string_clear(&result); + convd = php_mb_init_convd(encoding); + if (convd == NULL) { + php_error_docref(NULL, E_WARNING, "Unable to create converter"); + return 0; } - return 0; + if (vars->u.v.nApplyCount++ > 1) { + vars->u.v.nApplyCount--; + mbfl_buffer_converter_delete(convd); + php_error_docref(NULL, E_WARNING, "Cannot not handle circular references"); + return 0; + } + ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) { + ZVAL_DEREF(entry); + if (key) { + if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) { + valid = 0; + break; + } + } + switch (Z_TYPE_P(entry)) { + case IS_STRING: + if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) { + valid = 0; + break; + } + break; + case IS_ARRAY: + if (!php_mb_check_encoding_recursive(HASH_OF(entry), enc)) { + valid = 0; + break; + } + break; + case IS_LONG: + case IS_DOUBLE: + case IS_NULL: + case IS_TRUE: + case IS_FALSE: + break; + default: + /* Other types are error. */ + valid = 0; + break; + } + } ZEND_HASH_FOREACH_END(); + vars->u.v.nApplyCount--; + mbfl_buffer_converter_delete(convd); + return valid; } -/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) + +/* {{{ proto bool mb_check_encoding([mixed var[, string encoding]]) Check if the string is valid for the specified encoding */ PHP_FUNCTION(mb_check_encoding) { - char *var = NULL; - size_t var_len; - char *enc = NULL; - size_t enc_len; + zval *input = NULL; + zend_string *enc = NULL; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { + if (zend_parse_parameters(ZEND_NUM_ARGS(), "|zS", &input, &enc) == FAILURE) { return; - } + } - RETVAL_FALSE; + /* FIXME: Actually check all inputs, except $_FILES file content. */ + if (input == NULL) { + if (MBSTRG(illegalchars) == 0) { + RETURN_TRUE; + } + RETURN_FALSE; + } - if (php_mb_check_encoding(var, var_len, enc)) { - RETVAL_TRUE; + switch(Z_TYPE_P(input)) { + case IS_LONG: + case IS_DOUBLE: + case IS_NULL: + case IS_TRUE: + case IS_FALSE: + RETURN_TRUE; + break; + case IS_STRING: + if (!php_mb_check_encoding(Z_STRVAL_P(input), Z_STRLEN_P(input), enc ? ZSTR_VAL(enc): NULL)) { + RETURN_FALSE; + } + break; + case IS_ARRAY: + if (!php_mb_check_encoding_recursive(HASH_OF(input), enc)) { + RETURN_FALSE; + } + break; + default: + php_error_docref(NULL, E_WARNING, "Input is something other than scalar or array"); + RETURN_FALSE; + } + RETURN_TRUE; +} +/* }}} */ + + +static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc) +{ + enum mbfl_no_encoding no_enc; + char* ret; + size_t ret_len; + const mbfl_encoding *encoding; + unsigned char char_len; + zend_long cp; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return -1; + } + } + + if (php_mb_is_no_encoding_unicode(no_enc)) { + + ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + + if (ret == NULL) { + return -1; + } + + cp = (unsigned char) ret[0] << 24 | \ + (unsigned char) ret[1] << 16 | \ + (unsigned char) ret[2] << 8 | \ + (unsigned char) ret[3]; + + efree(ret); + + return cp; + + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return -1; + } + + ret = php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); + + if (ret == NULL) { + return -1; + } + + encoding = mbfl_no2encoding(no_enc); + char_len = php_mb_mbchar_bytes_ex(ret, encoding); + + if (char_len == 1) { + cp = (unsigned char) ret[0]; + } else if (char_len == 2) { + cp = ((unsigned char) ret[0] << 8) | \ + (unsigned char) ret[1]; + } else if (char_len == 3) { + cp = ((unsigned char) ret[0] << 16) | \ + ((unsigned char) ret[1] << 8) | \ + (unsigned char) ret[2]; + } else { + cp = ((unsigned char) ret[0] << 24) | \ + ((unsigned char) ret[1] << 16) | \ + ((unsigned char) ret[2] << 8) | \ + (unsigned char) ret[3]; } + + efree(ret); + + return cp; +} + + +/* {{{ proto bool mb_ord([string str[, string encoding]]) */ +PHP_FUNCTION(mb_ord) +{ + char* str; + size_t str_len; + char* enc = NULL; + size_t enc_len; + zend_long cp; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(str, str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); + + cp = php_mb_ord(str, str_len, enc); + + if (0 > cp) { + RETURN_FALSE; + } + + RETURN_LONG(cp); } /* }}} */ + +static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len) +{ + enum mbfl_no_encoding no_enc; + char* buf; + size_t buf_len; + char* ret; + size_t ret_len; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return NULL; + } + } + + if (php_mb_is_no_encoding_utf8(no_enc)) { + + if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) { + if (php_mb_is_no_encoding_utf8(MBSTRG(current_internal_encoding)->no_encoding)) { + cp = MBSTRG(current_filter_illegal_substchar); + } else if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + } else { + cp = 0x3f; + } + } + + if (cp < 0x80) { + ret_len = 1; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = cp; + ret[1] = 0; + } else if (cp < 0x800) { + ret_len = 2; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xc0 | (cp >> 6); + ret[1] = 0x80 | (cp & 0x3f); + ret[2] = 0; + } else if (cp < 0x10000) { + ret_len = 3; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xe0 | (cp >> 12); + ret[1] = 0x80 | ((cp >> 6) & 0x3f); + ret[2] = 0x80 | (cp & 0x3f); + ret[3] = 0; + } else { + ret_len = 4; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xf0 | (cp >> 18); + ret[1] = 0x80 | ((cp >> 12) & 0x3f); + ret[2] = 0x80 | ((cp >> 6) & 0x3f); + ret[3] = 0x80 | (cp & 0x3f); + ret[4] = 0; + } + + if (output_len) { + *output_len = ret_len; + } + + return ret; + + } else if (php_mb_is_no_encoding_unicode(no_enc)) { + + if (0 > cp || 0x10ffff < cp) { + + if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + + } + + buf_len = 4; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = (cp >> 24) & 0xff; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; + + ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len); + efree(buf); + + if (output_len) { + *output_len = ret_len; + } + + return ret; + + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; + } + + if (0 > cp || cp > 0x100000000) { + if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + } + + if (cp < 0x100) { + buf_len = 1; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp; + buf[1] = 0; + } else if (cp < 0x10000) { + buf_len = 2; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 8; + buf[1] = cp & 0xff; + buf[2] = 0; + } else if (cp < 0x1000000) { + buf_len = 3; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 16; + buf[1] = (cp >> 8) & 0xff; + buf[2] = cp & 0xff; + buf[3] = 0; + } else { + buf_len = 4; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 24; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; + } + + ret = php_mb_convert_encoding(buf, buf_len, enc, enc, &ret_len); + efree(buf); + + if (output_len) { + *output_len = ret_len; + } + + return ret; +} + + +/* {{{ proto bool mb_ord([int cp[, string encoding]]) */ +PHP_FUNCTION(mb_chr) +{ + zend_long cp; + char* enc = NULL; + size_t enc_len; + char* ret; + size_t ret_len; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_LONG(cp) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); + + ret = php_mb_chr(cp, enc, &ret_len); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETVAL_STRING(ret); + efree(ret); +} +/* }}} */ + + +static inline char* php_mb_scrub(const char* str, size_t str_len, const char* enc) +{ + size_t ret_len; + + return php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); +} + + +/* {{{ proto bool mb_scrub([string str[, string encoding]]) */ +PHP_FUNCTION(mb_scrub) +{ + char* str; + size_t str_len; + char *enc = NULL; + size_t enc_len; + char *ret; + + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(str, str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); + + if (enc == NULL) { + enc = (char *) MBSTRG(current_internal_encoding)->name; + } else if (!mbfl_is_support_encoding(enc)) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + RETURN_FALSE; + } + + ret = php_mb_scrub(str, str_len, enc); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETVAL_STRING(ret); + efree(ret); +} +/* }}} */ + + /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { |