diff options
author | Nuno Lopes <nlopess@php.net> | 2008-03-08 11:58:12 +0000 |
---|---|---|
committer | Nuno Lopes <nlopess@php.net> | 2008-03-08 11:58:12 +0000 |
commit | d204214d7f88037911f5efdb8f12d8c83200b3df (patch) | |
tree | f9bb14fe1534ce0efba3ff7b136081f682a03b27 /ext/pcre/php_pcre.c | |
parent | 0d0a7a432aff15332c1e003e2db8685dcf00ccee (diff) | |
download | php-git-d204214d7f88037911f5efdb8f12d8c83200b3df.tar.gz |
implement #44336: optimize utf8 string matching
add PREG_BAD_UTF8_OFFSET_ERROR constant
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 26 |
1 files changed, 20 insertions, 6 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 5566c8e69d..512c88da71 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -48,7 +48,8 @@ enum { PHP_PCRE_INTERNAL_ERROR, PHP_PCRE_BACKTRACK_LIMIT_ERROR, PHP_PCRE_RECURSION_LIMIT_ERROR, - PHP_PCRE_BAD_UTF8_ERROR + PHP_PCRE_BAD_UTF8_ERROR, + PHP_PCRE_BAD_UTF8_OFFSET_ERROR }; @@ -72,6 +73,10 @@ static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */ preg_code = PHP_PCRE_BAD_UTF8_ERROR; break; + case PCRE_ERROR_BADUTF8_OFFSET: + preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR; + break; + default: preg_code = PHP_PCRE_INTERNAL_ERROR; break; @@ -145,6 +150,7 @@ static PHP_MINIT_FUNCTION(pcre) REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT); REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT); return SUCCESS; @@ -614,7 +620,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); - /* Check for too many substrings condition. */ + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings"); count = size_offsets/3; @@ -1034,7 +1043,10 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub /* Execute the regular expression. */ count = pcre_exec(pce->re, extra, subject, subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); - + + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); @@ -1472,6 +1484,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec subject_len, start_offset, exoptions|g_notempty, offsets, size_offsets); + /* the string was already proved to be valid UTF-8 */ + exoptions |= PCRE_NO_UTF8_CHECK; + /* Check for too many substrings condition. */ if (count == 0) { php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings"); @@ -1535,9 +1550,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec subject_len, start_offset, exoptions, offsets, size_offsets); if (count < 1) { - php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Unknown error"); - offsets[0] = start_offset; - offsets[1] = start_offset + 1; + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error"); + RETURN_FALSE; } } else { offsets[0] = start_offset; |