diff options
Diffstat (limited to 'ext/standard/html.c')
-rw-r--r-- | ext/standard/html.c | 183 |
1 files changed, 108 insertions, 75 deletions
diff --git a/ext/standard/html.c b/ext/standard/html.c index b4d9ba109d..dfe30d9b3e 100644 --- a/ext/standard/html.c +++ b/ext/standard/html.c @@ -1,8 +1,8 @@ /* +----------------------------------------------------------------------+ - | PHP Version 5 | + | PHP Version 7 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2013 The PHP Group | + | Copyright (c) 1997-2014 The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -84,6 +84,18 @@ #define sjis_lead(c) ((c) != 0x80 && (c) != 0xA0 && (c) < 0xFD) #define sjis_trail(c) ((c) >= 0x40 && (c) != 0x7F && (c) < 0xFD) +/* {{{ get_default_charset + */ +static char *get_default_charset(TSRMLS_D) { + if (PG(internal_encoding) && PG(internal_encoding)[0]) { + return PG(internal_encoding); + } else if (SG(default_charset) && SG(default_charset)[0] ) { + return SG(default_charset); + } + return NULL; +} +/* }}} */ + /* {{{ get_next_char */ static inline unsigned int get_next_char( @@ -380,9 +392,9 @@ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC) if (zenc != NULL) { charset_hint = (char *)zend_multibyte_get_encoding_name(zenc); if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) { - if ((len == 4) /* sizeof (none|auto|pass) */ && + if ((len == 4) /* sizeof (auto|pass) */ && + /* XXX should the "wchar" be ignored as well?? */ (!memcmp("pass", charset_hint, 4) || - !memcmp("auto", charset_hint, 4) || !memcmp("auto", charset_hint, 4))) { charset_hint = NULL; len = 0; @@ -778,7 +790,7 @@ static inline int numeric_entity_is_allowed(unsigned uni_cp, int document_type) */ static inline int process_numeric_entity(const char **buf, unsigned *code_point) { - long code_l; + zend_long code_l; int hexadecimal = (**buf == 'x' || **buf == 'X'); /* TODO: XML apparently disallows "X" */ char *endptr; @@ -792,7 +804,7 @@ static inline int process_numeric_entity(const char **buf, unsigned *code_point) return FAILURE; } - code_l = strtol(*buf, &endptr, hexadecimal ? 16 : 10); + code_l = ZEND_STRTOL(*buf, &endptr, hexadecimal ? 16 : 10); /* we're guaranteed there were valid digits, so *endptr > buf */ *buf = endptr; @@ -801,7 +813,7 @@ static inline int process_numeric_entity(const char **buf, unsigned *code_point) /* many more are invalid, but that depends on whether it's HTML * (and which version) or XML. */ - if (code_l > 0x10FFFFL) + if (code_l > Z_L(0x10FFFF)) return FAILURE; if (code_point != NULL) @@ -844,7 +856,7 @@ static inline int process_named_entity_html(const char **buf, const char **start static inline int resolve_named_entity_html(const char *start, size_t length, const entity_ht *ht, unsigned *uni_cp1, unsigned *uni_cp2) { const entity_cp_map *s; - ulong hash = zend_inline_hash_func(start, length); + zend_ulong hash = zend_inline_hash_func(start, length); s = ht->buckets[hash % ht->num_elems]; while (s->entity) { @@ -889,7 +901,7 @@ static inline size_t write_octet_sequence(unsigned char *buf, enum entity_charse #if 0 return php_mb2_int_to_char(buf, code); #else -#ifdef ZEND_DEBUG +#if ZEND_DEBUG assert(code <= 0xFFU); #endif *buf = code; @@ -900,7 +912,7 @@ static inline size_t write_octet_sequence(unsigned char *buf, enum entity_charse #if 0 /* idem */ return php_mb2_int_to_char(buf, code); #else -#ifdef ZEND_DEBUG +#if ZEND_DEBUG assert(code <= 0xFFU); #endif *buf = code; @@ -924,8 +936,7 @@ static inline size_t write_octet_sequence(unsigned char *buf, enum entity_charse static void traverse_for_entities( const char *old, size_t oldlen, - char *ret, /* should have allocated TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(olden) */ - size_t *retlen, + zend_string *ret, /* should have allocated TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(olden) */ int all, int flags, const entity_ht *inv_map, @@ -939,7 +950,7 @@ static void traverse_for_entities( lim = old + oldlen; /* terminator address */ assert(*lim == '\0'); - for (p = old, q = ret; p < lim;) { + for (p = old, q = ret->val; p < lim;) { unsigned code, code2 = 0; const char *next = NULL; /* when set, next > p, otherwise possible inf loop */ @@ -1012,9 +1023,9 @@ static void traverse_for_entities( goto invalid_code; /* not representable in target charset */ } - q += write_octet_sequence(q, charset, code); + q += write_octet_sequence((unsigned char*)q, charset, code); if (code2) { - q += write_octet_sequence(q, charset, code2); + q += write_octet_sequence((unsigned char*)q, charset, code2); } /* jump over the valid entity; may go beyond size of buffer; np */ @@ -1028,7 +1039,7 @@ invalid_code: } *q = '\0'; - *retlen = (size_t)(q - ret); + ret->len = (size_t)(q - ret->val); } /* }}} */ @@ -1083,10 +1094,10 @@ static entity_table_opt determine_entity_table(int all, int doctype) * only the basic ones, i.e., those in basic_entities_ex + the numeric entities * that correspond to quotes. */ -PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC) +PHPAPI zend_string *php_unescape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset TSRMLS_DC) { size_t retlen; - char *ret; + zend_string *ret; enum entity_charset charset; const entity_ht *inverse_map = NULL; size_t new_size = TRAVERSE_FOR_ENTITIES_EXPAND_SIZE(oldlen); @@ -1101,12 +1112,13 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_ if (oldlen > new_size) { /* overflow, refuse to do anything */ - ret = estrndup((char*)old, oldlen); + ret = zend_string_init((char*)old, oldlen, 0); retlen = oldlen; goto empty_source; } - ret = emalloc(new_size); - *ret = '\0'; + ret = zend_string_alloc(new_size, 0); + ret->val[0] = '\0'; + ret->len = oldlen; retlen = oldlen; if (retlen == 0) { goto empty_source; @@ -1115,17 +1127,16 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, size_t oldlen, size_ inverse_map = unescape_inverse_map(all, flags); /* replace numeric entities */ - traverse_for_entities(old, oldlen, ret, &retlen, all, flags, inverse_map, charset); + traverse_for_entities((char*)old, oldlen, ret, all, flags, inverse_map, charset); empty_source: - *newlen = retlen; return ret; } /* }}} */ -PHPAPI char *php_escape_html_entities(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset TSRMLS_DC) +PHPAPI zend_string *php_escape_html_entities(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset TSRMLS_DC) { - return php_escape_html_entities_ex(old, oldlen, newlen, all, flags, hint_charset, 1 TSRMLS_CC); + return php_escape_html_entities_ex(old, oldlen, all, flags, hint_charset, 1 TSRMLS_CC); } /* {{{ find_entity_for_char */ @@ -1211,10 +1222,10 @@ static inline void find_entity_for_char_basic( /* {{{ php_escape_html_entities */ -PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size_t *newlen, int all, int flags, char *hint_charset, zend_bool double_encode TSRMLS_DC) +PHPAPI zend_string *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, int all, int flags, char *hint_charset, zend_bool double_encode TSRMLS_DC) { size_t cursor, maxlen, len; - char *replaced; + zend_string *replaced; enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC); int doctype = flags & ENT_HTML_DOC_TYPE_MASK; entity_table_opt entity_table; @@ -1264,7 +1275,7 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size } } - replaced = emalloc(maxlen + 1); /* adding 1 is safe: maxlen is even */ + replaced = zend_string_alloc(maxlen, 0); len = 0; cursor = 0; while (cursor < oldlen) { @@ -1277,7 +1288,7 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size /* guarantee we have at least 40 bytes to write. * In HTML5, entities may take up to 33 bytes */ if (len > maxlen - 40) { /* maxlen can never be smaller than 128 */ - replaced = safe_erealloc(replaced, maxlen , 1, 128 + 1); + replaced = zend_string_safe_realloc(replaced, maxlen, 1, 128, 0); maxlen += 128; } @@ -1286,12 +1297,11 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size if (flags & ENT_HTML_IGNORE_ERRORS) { continue; } else if (flags & ENT_HTML_SUBSTITUTE_ERRORS) { - memcpy(&replaced[len], replacement, replacement_len); + memcpy(&replaced->val[len], replacement, replacement_len); len += replacement_len; continue; } else { - efree(replaced); - *newlen = 0; + zend_string_free(replaced); return STR_EMPTY_ALLOC(); } } else { /* SUCCESS */ @@ -1324,10 +1334,10 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size } if (rep != NULL) { - replaced[len++] = '&'; - memcpy(&replaced[len], rep, rep_len); + replaced->val[len++] = '&'; + memcpy(&replaced->val[len], rep, rep_len); len += rep_len; - replaced[len++] = ';'; + replaced->val[len++] = ';'; } else { /* we did not find an entity for this char. * check for its validity, if its valid pass it unchanged */ @@ -1361,16 +1371,16 @@ PHPAPI char *php_escape_html_entities_ex(unsigned char *old, size_t oldlen, size } pass_char_through: if (mbseqlen > 1) { - memcpy(replaced + len, mbsequence, mbseqlen); + memcpy(replaced->val + len, mbsequence, mbseqlen); len += mbseqlen; } else { - replaced[len++] = mbsequence[0]; + replaced->val[len++] = mbsequence[0]; } } } else { /* this_char == '&' */ if (double_encode) { encode_amp: - memcpy(&replaced[len], "&", sizeof("&") - 1); + memcpy(&replaced->val[len], "&", sizeof("&") - 1); len += sizeof("&") - 1; } else { /* no double encode */ /* check if entity is valid */ @@ -1410,19 +1420,19 @@ encode_amp: /* at this point maxlen - len >= 40 */ if (maxlen - len < ent_len + 2 /* & and ; */) { /* ent_len < oldlen, which is certainly <= SIZE_MAX/2 */ - replaced = safe_erealloc(replaced, maxlen, 1, ent_len + 128 + 1); + replaced = zend_string_safe_realloc(replaced, maxlen, 1, ent_len + 128, 0); maxlen += ent_len + 128; } - replaced[len++] = '&'; - memcpy(&replaced[len], &old[cursor], ent_len); + replaced->val[len++] = '&'; + memcpy(&replaced->val[len], &old[cursor], ent_len); len += ent_len; - replaced[len++] = ';'; + replaced->val[len++] = ';'; cursor += ent_len + 1; } } } - replaced[len] = '\0'; - *newlen = len; + replaced->val[len] = '\0'; + replaced->len = len; return replaced; } @@ -1432,19 +1442,31 @@ encode_amp: */ static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all) { - char *str, *hint_charset = NULL; - int str_len, hint_charset_len = 0; - size_t new_len; - long flags = ENT_COMPAT; - char *replaced; + zend_string *str, *hint_charset = NULL; + char *default_charset; + zend_long flags = ENT_COMPAT; + zend_string *replaced; zend_bool double_encode = 1; - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls!b", &str, &str_len, &flags, &hint_charset, &hint_charset_len, &double_encode) == FAILURE) { +#ifndef FAST_ZPP + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "S|lS!b", &str, &flags, &hint_charset, &double_encode) == FAILURE) { return; } +#else + ZEND_PARSE_PARAMETERS_START(1, 4) + Z_PARAM_STR(str) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(flags) + Z_PARAM_STR_EX(hint_charset, 1, 0) + Z_PARAM_BOOL(double_encode); + ZEND_PARSE_PARAMETERS_END(); +#endif - replaced = php_escape_html_entities_ex(str, str_len, &new_len, all, (int) flags, hint_charset, double_encode TSRMLS_CC); - RETVAL_STRINGL(replaced, (int)new_len, 0); + if (!hint_charset) { + default_charset = get_default_charset(TSRMLS_C); + } + replaced = php_escape_html_entities_ex((unsigned char*)str->val, str->len, all, (int) flags, (hint_charset ? hint_charset->val : default_charset), double_encode TSRMLS_CC); + RETVAL_STR(replaced); } /* }}} */ @@ -1483,18 +1505,17 @@ PHP_FUNCTION(htmlspecialchars) PHP_FUNCTION(htmlspecialchars_decode) { char *str; - int str_len; - size_t new_len = 0; - long quote_style = ENT_COMPAT; - char *replaced; + size_t str_len; + zend_long quote_style = ENT_COMPAT; + zend_string *replaced; if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &str_len, "e_style) == FAILURE) { return; } - replaced = php_unescape_html_entities(str, str_len, &new_len, 0 /*!all*/, quote_style, NULL TSRMLS_CC); + replaced = php_unescape_html_entities((unsigned char*)str, str_len, 0 /*!all*/, quote_style, NULL TSRMLS_CC); if (replaced) { - RETURN_STRINGL(replaced, (int)new_len, 0); + RETURN_STR(replaced); } RETURN_FALSE; } @@ -1504,20 +1525,32 @@ PHP_FUNCTION(htmlspecialchars_decode) Convert all HTML entities to their applicable characters */ PHP_FUNCTION(html_entity_decode) { - char *str, *hint_charset = NULL; - int str_len, hint_charset_len = 0; - size_t new_len = 0; - long quote_style = ENT_COMPAT; - char *replaced; - - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len, - "e_style, &hint_charset, &hint_charset_len) == FAILURE) { + zend_string *str, *hint_charset = NULL; + char *default_charset; + zend_long quote_style = ENT_COMPAT; + zend_string *replaced; + +#ifndef FAST_ZPP + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "S|lS", &str, + "e_style, &hint_charset) == FAILURE) { return; } +#else + ZEND_PARSE_PARAMETERS_START(1, 3) + Z_PARAM_STR(str) + Z_PARAM_OPTIONAL + Z_PARAM_LONG(quote_style) + Z_PARAM_STR(hint_charset) + ZEND_PARSE_PARAMETERS_END(); +#endif + + if (!hint_charset) { + default_charset = get_default_charset(TSRMLS_C); + } + replaced = php_unescape_html_entities((unsigned char*)str->val, str->len, 1 /*all*/, quote_style, (hint_charset ? hint_charset->val : default_charset) TSRMLS_CC); - replaced = php_unescape_html_entities(str, str_len, &new_len, 1 /*all*/, quote_style, hint_charset TSRMLS_CC); if (replaced) { - RETURN_STRINGL(replaced, (int)new_len, 0); + RETURN_STR(replaced); } RETURN_FALSE; } @@ -1543,13 +1576,13 @@ static inline void write_s3row_data( char entity[LONGEST_ENTITY_LENGTH + 2] = {'&'}; size_t written_k1; - written_k1 = write_octet_sequence(key, charset, orig_cp); + written_k1 = write_octet_sequence((unsigned char*)key, charset, orig_cp); if (!r->ambiguous) { size_t l = r->data.ent.entity_len; memcpy(&entity[1], r->data.ent.entity, l); entity[l + 1] = ';'; - add_assoc_stringl_ex(arr, key, written_k1 + 1, entity, l + 2, 1); + add_assoc_stringl_ex(arr, key, written_k1, entity, l + 2); } else { unsigned i, num_entries; @@ -1559,7 +1592,7 @@ static inline void write_s3row_data( size_t l = mcpr[0].leading_entry.default_entity_len; memcpy(&entity[1], mcpr[0].leading_entry.default_entity, l); entity[l + 1] = ';'; - add_assoc_stringl_ex(arr, key, written_k1 + 1, entity, l + 2, 1); + add_assoc_stringl_ex(arr, key, written_k1, entity, l + 2); } num_entries = mcpr[0].leading_entry.size; for (i = 1; i <= num_entries; i++) { @@ -1578,11 +1611,11 @@ static inline void write_s3row_data( spe_cp = uni_cp; } - written_k2 = write_octet_sequence(&key[written_k1], charset, spe_cp); + written_k2 = write_octet_sequence((unsigned char*)&key[written_k1], charset, spe_cp); memcpy(&entity[1], mcpr[i].normal_entry.entity, l); entity[l + 1] = ';'; entity[l + 1] = '\0'; - add_assoc_stringl_ex(arr, key, written_k1 + written_k2 + 1, entity, l + 1, 1); + add_assoc_stringl_ex(arr, key, written_k1 + written_k2, entity, l + 1); } } } @@ -1592,13 +1625,13 @@ static inline void write_s3row_data( Returns the internal translation table used by htmlspecialchars and htmlentities */ PHP_FUNCTION(get_html_translation_table) { - long all = HTML_SPECIALCHARS, + zend_long all = HTML_SPECIALCHARS, flags = ENT_COMPAT; int doctype; entity_table_opt entity_table; const enc_to_uni *to_uni_table = NULL; char *charset_hint = NULL; - int charset_hint_len; + size_t charset_hint_len; enum entity_charset charset; /* in this function we have to jump through some loops because we're |