summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
authorNuno Lopes <nlopess@php.net>2008-03-08 11:58:12 +0000
committerNuno Lopes <nlopess@php.net>2008-03-08 11:58:12 +0000
commitd204214d7f88037911f5efdb8f12d8c83200b3df (patch)
treef9bb14fe1534ce0efba3ff7b136081f682a03b27 /ext/pcre/php_pcre.c
parent0d0a7a432aff15332c1e003e2db8685dcf00ccee (diff)
downloadphp-git-d204214d7f88037911f5efdb8f12d8c83200b3df.tar.gz
implement #44336: optimize utf8 string matching
add PREG_BAD_UTF8_OFFSET_ERROR constant
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c26
1 files changed, 20 insertions, 6 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index 5566c8e69d..512c88da71 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -48,7 +48,8 @@ enum {
PHP_PCRE_INTERNAL_ERROR,
PHP_PCRE_BACKTRACK_LIMIT_ERROR,
PHP_PCRE_RECURSION_LIMIT_ERROR,
- PHP_PCRE_BAD_UTF8_ERROR
+ PHP_PCRE_BAD_UTF8_ERROR,
+ PHP_PCRE_BAD_UTF8_OFFSET_ERROR
};
@@ -72,6 +73,10 @@ static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
preg_code = PHP_PCRE_BAD_UTF8_ERROR;
break;
+ case PCRE_ERROR_BADUTF8_OFFSET:
+ preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
+ break;
+
default:
preg_code = PHP_PCRE_INTERNAL_ERROR;
break;
@@ -145,6 +150,7 @@ static PHP_MINIT_FUNCTION(pcre)
REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
return SUCCESS;
@@ -614,7 +620,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
- /* Check for too many substrings condition. */
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
+ /* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
count = size_offsets/3;
@@ -1034,7 +1043,10 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
/* Execute the regular expression. */
count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
-
+
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
@@ -1472,6 +1484,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
subject_len, start_offset,
exoptions|g_notempty, offsets, size_offsets);
+ /* the string was already proved to be valid UTF-8 */
+ exoptions |= PCRE_NO_UTF8_CHECK;
+
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
@@ -1535,9 +1550,8 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
subject_len, start_offset,
exoptions, offsets, size_offsets);
if (count < 1) {
- php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Unknown error");
- offsets[0] = start_offset;
- offsets[1] = start_offset + 1;
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
+ RETURN_FALSE;
}
} else {
offsets[0] = start_offset;