summaryrefslogtreecommitdiff
path: root/ext/mbstring/mbstring.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mbstring/mbstring.c')
-rw-r--r--ext/mbstring/mbstring.c133
1 files changed, 47 insertions, 86 deletions
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index b6395aecdd..416dc9a6af 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -34,12 +34,12 @@
#include "libmbfl/mbfl/mbfilter_8bit.h"
#include "libmbfl/mbfl/mbfilter_pass.h"
#include "libmbfl/mbfl/mbfilter_wchar.h"
-#include "libmbfl/filters/mbfilter_ascii.h"
#include "libmbfl/filters/mbfilter_base64.h"
#include "libmbfl/filters/mbfilter_qprint.h"
#include "libmbfl/filters/mbfilter_ucs4.h"
#include "libmbfl/filters/mbfilter_utf8.h"
#include "libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h"
+#include "libmbfl/filters/mbfilter_singlebyte.h"
#include "php_variables.h"
#include "php_globals.h"
@@ -73,9 +73,9 @@ static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *lis
static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
-static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
+static inline bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
-static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
+static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
/* }}} */
/* {{{ php_mb_default_identify_list */
@@ -252,14 +252,14 @@ static size_t count_commas(const char *p, const char *end) {
*/
static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length,
const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num,
- zend_bool allow_pass_encoding)
+ bool allow_pass_encoding)
{
if (value == NULL || value_length == 0) {
*return_list = NULL;
*return_size = 0;
return SUCCESS;
} else {
- zend_bool included_auto;
+ bool included_auto;
size_t n, size;
char *p1, *endp, *tmpstr;
const mbfl_encoding **entry, **list;
@@ -347,7 +347,7 @@ static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encodi
size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
const mbfl_encoding **entry = list;
- zend_bool included_auto = 0;
+ bool included_auto = 0;
size_t n = 0;
zval *hash_entry;
ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
@@ -403,13 +403,7 @@ static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encodin
static bool php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
{
const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
- if (encoding->flag & MBFL_ENCTYPE_SBCS) {
- return 1;
- }
- if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
- return 1;
- }
- return 0;
+ return !(encoding->flag & MBFL_ENCTYPE_GL_UNSAFE);
}
static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
@@ -1421,7 +1415,7 @@ PHP_FUNCTION(mb_substitute_character)
{
zend_string *substitute_character = NULL;
zend_long substitute_codepoint;
- zend_bool substitute_is_null = 1;
+ bool substitute_is_null = 1;
ZEND_PARSE_PARAMETERS_START(0, 1)
Z_PARAM_OPTIONAL
@@ -1497,9 +1491,6 @@ PHP_FUNCTION(mb_preferred_mime_name)
}
/* }}} */
-#define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
-#define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
-
/* {{{ Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)
{
@@ -1717,10 +1708,10 @@ PHP_FUNCTION(mb_str_split)
if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
mb_len = string.len;
chunk_len = (size_t)split_length; /* chunk length in bytes */
- } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */
+ } else if (mbfl_encoding->flag & MBFL_ENCTYPE_WCS2) { /* 2 bytes */
mb_len = string.len / 2;
chunk_len = split_length * 2;
- } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
+ } else if (mbfl_encoding->flag & MBFL_ENCTYPE_WCS4) { /* 4 bytes */
mb_len = string.len / 4;
chunk_len = split_length * 4;
} else if (mbfl_encoding->mblen_table != NULL) {
@@ -2003,7 +1994,7 @@ static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int va
char *haystack_val, *needle_val;
mbfl_string haystack, needle, result, *ret = NULL;
zend_string *encoding_name = NULL;
- zend_bool part = 0;
+ bool part = 0;
ZEND_PARSE_PARAMETERS_START(2, 4)
Z_PARAM_STRING(haystack_val, haystack.len)
@@ -2127,7 +2118,7 @@ PHP_FUNCTION(mb_substr)
zend_long from, len;
size_t real_from, real_len;
size_t str_len;
- zend_bool len_is_null = 1;
+ bool len_is_null = 1;
mbfl_string string, result, *ret;
ZEND_PARSE_PARAMETERS_START(2, 4)
@@ -2191,7 +2182,7 @@ PHP_FUNCTION(mb_strcut)
zend_string *encoding = NULL;
char *string_val;
zend_long from, len;
- zend_bool len_is_null = 1;
+ bool len_is_null = 1;
mbfl_string string, result, *ret;
ZEND_PARSE_PARAMETERS_START(2, 4)
@@ -2335,7 +2326,7 @@ PHP_FUNCTION(mb_strimwidth)
/* See mbfl_no_encoding definition for list of unsupported encodings */
-static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
+static inline bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
{
return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
|| (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
@@ -2345,7 +2336,7 @@ static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding
/* See mbfl_no_encoding definition for list of UTF-8 encodings */
-static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
+static inline bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
{
return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
}
@@ -2504,7 +2495,7 @@ PHP_FUNCTION(mb_convert_encoding)
HashTable *input_ht, *from_encodings_ht = NULL;
const mbfl_encoding **from_encodings;
size_t num_from_encodings;
- zend_bool free_from_encodings;
+ bool free_from_encodings;
ZEND_PARSE_PARAMETERS_START(2, 3)
Z_PARAM_ARRAY_HT_OR_STR(input_ht, input_str)
@@ -2676,13 +2667,13 @@ PHP_FUNCTION(mb_detect_encoding)
size_t str_len;
zend_string *encoding_str = NULL;
HashTable *encoding_ht = NULL;
- zend_bool strict = 0;
+ bool strict = 0;
mbfl_string string;
const mbfl_encoding *ret;
const mbfl_encoding **elist;
size_t size;
- zend_bool free_elist;
+ bool free_elist;
ZEND_PARSE_PARAMETERS_START(1, 3)
Z_PARAM_STRING(str, str_len)
@@ -2764,8 +2755,7 @@ PHP_FUNCTION(mb_encoding_aliases)
array_init(return_value);
if (encoding->aliases != NULL) {
- const char **alias;
- for (alias = *encoding->aliases; *alias; ++alias) {
+ for (const char **alias = encoding->aliases; *alias; ++alias) {
add_next_index_string(return_value, (char *)*alias);
}
}
@@ -3188,7 +3178,7 @@ PHP_FUNCTION(mb_encode_numericentity)
zend_string *encoding = NULL;
int mapsize;
HashTable *target_hash;
- zend_bool is_hex = 0;
+ bool is_hex = 0;
mbfl_string string, result, *ret;
ZEND_PARSE_PARAMETERS_START(2, 4)
@@ -3649,7 +3639,7 @@ PHP_FUNCTION(mb_send_mail)
if (!suppressed_hdrs.cnt_type) {
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
- p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
+ p = (char *)mbfl_encoding_preferred_mime_name(tran_cs);
if (p != NULL) {
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
mbfl_memory_device_strcat(&device, p);
@@ -3658,7 +3648,7 @@ PHP_FUNCTION(mb_send_mail)
}
if (!suppressed_hdrs.cnt_trans_enc) {
mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
- p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
+ p = (char *)mbfl_encoding_preferred_mime_name(body_enc);
if (p == NULL) {
p = "7bit";
}
@@ -3859,70 +3849,43 @@ PHP_FUNCTION(mb_get_info)
}
/* }}} */
-
-static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
+static int mbfl_filt_check_errors(int c, void* data)
{
- mbfl_buffer_converter *convd;
-
- convd = mbfl_buffer_converter_new(encoding, encoding, 0);
- if (convd == NULL) {
- return NULL;
+ if (c & MBFL_WCSGROUP_THROUGH) {
+ (*((mbfl_convert_filter**)data))->num_illegalchar++;
}
- mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
- mbfl_buffer_converter_illegal_substchar(convd, 0);
- return convd;
+ return c;
}
+MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding)
+{
+ mbfl_convert_filter *filter = mbfl_convert_filter_new(encoding, &mbfl_encoding_wchar, mbfl_filt_check_errors, NULL, &filter);
-static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
- mbfl_string string, result;
-
- mbfl_string_init_set(&string, encoding);
- mbfl_string_init(&result);
-
- string.val = (unsigned char *) input;
- string.len = length;
-
- mbfl_string *ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
- size_t illegalchars = mbfl_buffer_illegalchars(convd);
-
- if (ret != NULL) {
- if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
- mbfl_string_clear(&result);
- return 1;
+ while (length--) {
+ unsigned char c = *input++;
+ (filter->filter_function)(c, filter);
+ if (filter->num_illegalchar) {
+ mbfl_convert_filter_delete(filter);
+ return 0;
}
- mbfl_string_clear(&result);
}
- return 0;
-}
-
-MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding)
-{
- mbfl_buffer_converter *convd = php_mb_init_convd(encoding);
- /* If this assertion fails this means some memory allocation failure which is a bug */
- ZEND_ASSERT(convd != NULL);
- int result = php_mb_check_encoding_impl(convd, input, length, encoding);
- mbfl_buffer_converter_delete(convd);
+ (filter->filter_flush)(filter);
+ int result = !filter->num_illegalchar;
+ mbfl_convert_filter_delete(filter);
return result;
}
static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding)
{
- mbfl_buffer_converter *convd;
zend_long idx;
zend_string *key;
zval *entry;
int valid = 1;
- (void)(idx);
-
- convd = php_mb_init_convd(encoding);
- /* If this assertion fails this means some memory allocation failure which is a bug */
- ZEND_ASSERT(convd != NULL);
+ (void)(idx); /* Suppress spurious compiler warning that `idx` is not used */
if (GC_IS_RECURSIVE(vars)) {
- mbfl_buffer_converter_delete(convd);
php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
return 0;
}
@@ -3930,14 +3893,14 @@ static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding
ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
ZVAL_DEREF(entry);
if (key) {
- if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
+ if (!php_mb_check_encoding(ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
valid = 0;
break;
}
}
switch (Z_TYPE_P(entry)) {
case IS_STRING:
- if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
+ if (!php_mb_check_encoding(Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
valid = 0;
break;
}
@@ -3961,11 +3924,9 @@ static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding
}
} ZEND_HASH_FOREACH_END();
GC_TRY_UNPROTECT_RECURSION(vars);
- mbfl_buffer_converter_delete(convd);
return valid;
}
-
/* {{{ Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)
{
@@ -4243,14 +4204,14 @@ static int php_mb_encoding_translation(void)
/* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
{
- if (enc != NULL) {
- if (enc->flag & MBFL_ENCTYPE_MBCS) {
- if (enc->mblen_table != NULL) {
- if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
+ if (enc) {
+ if (enc->mblen_table) {
+ if (s) {
+ return enc->mblen_table[*(unsigned char *)s];
}
- } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
+ } else if (enc->flag & MBFL_ENCTYPE_WCS2) {
return 2;
- } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
+ } else if (enc->flag & MBFL_ENCTYPE_WCS4) {
return 4;
}
}