diff options
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 629 |
1 files changed, 308 insertions, 321 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index e82dc252b2..d19c7960a6 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -2,7 +2,7 @@ +----------------------------------------------------------------------+ | PHP Version 7 | +----------------------------------------------------------------------+ - | Copyright (c) 1997-2018 The PHP Group | + | Copyright (c) The PHP Group | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | @@ -23,8 +23,7 @@ #include "ext/standard/info.h" #include "ext/standard/basic_functions.h" #include "zend_smart_str.h" - -#if HAVE_PCRE || HAVE_BUNDLED_PCRE +#include "SAPI.h" #include "ext/standard/php_string.h" @@ -95,7 +94,6 @@ static MUTEX_T pcre_mt = NULL; #define php_pcre_mutex_unlock() #endif -#if HAVE_SETLOCALE ZEND_TLS HashTable char_tables; static void php_pcre_free_char_table(zval *data) @@ -103,7 +101,6 @@ static void php_pcre_free_char_table(zval *data) void *ptr = Z_PTR_P(data); pefree(ptr, 1); }/*}}}*/ -#endif static void pcre_handle_exec_error(int pcre_code) /* {{{ */ { @@ -146,7 +143,16 @@ static void php_free_pcre_cache(zval *data) /* {{{ */ pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data); if (!pce) return; pcre2_code_free(pce->re); - pefree(pce, 1); + free(pce); +} +/* }}} */ + +static void php_efree_pcre_cache(zval *data) /* {{{ */ +{ + pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data); + if (!pce) return; + pcre2_code_free(pce->re); + efree(pce); } /* }}} */ @@ -255,30 +261,35 @@ static PHP_GINIT_FUNCTION(pcre) /* {{{ */ { php_pcre_mutex_alloc(); - zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1); + /* If we're on the CLI SAPI, there will only be one request, so we don't need the + * cache to survive after RSHUTDOWN. */ + pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0; + if (!pcre_globals->per_request_cache) { + zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1); + } + pcre_globals->backtrack_limit = 0; pcre_globals->recursion_limit = 0; pcre_globals->error_code = PHP_PCRE_NO_ERROR; + ZVAL_UNDEF(&pcre_globals->unmatched_null_pair); + ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair); #ifdef HAVE_PCRE_JIT_SUPPORT pcre_globals->jit = 1; #endif php_pcre_init_pcre2(1); -#if HAVE_SETLOCALE zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1); -#endif } /* }}} */ static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */ { - zend_hash_destroy(&pcre_globals->pcre_cache); + if (!pcre_globals->per_request_cache) { + zend_hash_destroy(&pcre_globals->pcre_cache); + } php_pcre_shutdown_pcre2(); -#if HAVE_SETLOCALE zend_hash_destroy(&char_tables); -#endif - php_pcre_mutex_free(); } /* }}} */ @@ -438,10 +449,10 @@ static PHP_MSHUTDOWN_FUNCTION(pcre) } /* }}} */ -#ifdef HAVE_PCRE_JIT_SUPPORT /* {{{ PHP_RINIT_FUNCTION(pcre) */ static PHP_RINIT_FUNCTION(pcre) { +#ifdef HAVE_PCRE_JIT_SUPPORT if (UNEXPECTED(!pcre2_init_ok)) { /* Retry. */ php_pcre_mutex_lock(); @@ -454,11 +465,28 @@ static PHP_RINIT_FUNCTION(pcre) } mdata_used = 0; +#endif + + if (PCRE_G(per_request_cache)) { + zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0); + } return SUCCESS; } /* }}} */ -#endif + +static PHP_RSHUTDOWN_FUNCTION(pcre) +{ + if (PCRE_G(per_request_cache)) { + zend_hash_destroy(&PCRE_G(pcre_cache)); + } + + zval_ptr_dtor(&PCRE_G(unmatched_null_pair)); + zval_ptr_dtor(&PCRE_G(unmatched_empty_pair)); + ZVAL_UNDEF(&PCRE_G(unmatched_null_pair)); + ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair)); + return SUCCESS; +} /* {{{ static pcre_clean_cache */ static int pcre_clean_cache(zval *data, void *arg) @@ -475,29 +503,39 @@ static int pcre_clean_cache(zval *data, void *arg) } /* }}} */ +static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) { + uint32_t i; + for (i = 0; i < num_subpats; i++) { + if (subpat_names[i]) { + zend_string_release(subpat_names[i]); + } + } + efree(subpat_names); +} + /* {{{ static make_subpats_table */ -static char **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce) +static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce) { uint32_t name_cnt = pce->name_count, name_size, ni = 0; char *name_table; - unsigned short name_idx; - char **subpat_names; + zend_string **subpat_names; int rc1, rc2; rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table); rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size); if (rc1 < 0 || rc2 < 0) { - php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc1 < 0 ? rc1 : rc2); + php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2); return NULL; } - subpat_names = (char **)ecalloc(num_subpats, sizeof(char *)); + subpat_names = ecalloc(num_subpats, sizeof(zend_string *)); while (ni++ < name_cnt) { - name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1]; - subpat_names[name_idx] = name_table + 2; - if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) { + unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1]; + const char *name = name_table + 2; + subpat_names[name_idx] = zend_string_init(name, strlen(name), 0); + if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) { php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed"); - efree(subpat_names); + free_subpats_table(subpat_names, num_subpats); return NULL; } name_table += name_size; @@ -542,24 +580,19 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in char *pattern; size_t pattern_len; uint32_t poptions = 0; -#if HAVE_SETLOCALE const uint8_t *tables = NULL; -#endif zval *zv; pcre_cache_entry new_entry; int rc; zend_string *key; pcre_cache_entry *ret; -#if HAVE_SETLOCALE if (locale_aware && BG(locale_string) && (ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) { key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0); memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1); memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1); - } else -#endif - { + } else { key = regex; } @@ -567,11 +600,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in back the compiled pattern, otherwise go on and compile it. */ zv = zend_hash_find(&PCRE_G(pcre_cache), key); if (zv) { -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif return (pcre_cache_entry*)Z_PTR_P(zv); } @@ -581,11 +612,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in get to the end without encountering a delimiter. */ while (isspace((int)*(unsigned char *)p)) p++; if (*p == 0) { -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif php_error_docref(NULL, E_WARNING, p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression"); pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); @@ -596,11 +625,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in or a backslash. */ delimiter = *p++; if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') { -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash"); pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); return NULL; @@ -641,11 +668,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in } if (*pp == 0) { -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) { php_error_docref(NULL,E_WARNING, "Null byte in regex"); } else if (start_delimiter == end_delimiter) { @@ -706,11 +731,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in } pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); efree(pattern); -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif return NULL; } } @@ -719,15 +742,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead"); pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); efree(pattern); -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif return NULL; } -#if HAVE_SETLOCALE if (key != regex) { tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string)); if (!tables) { @@ -741,12 +761,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in return NULL; } _k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1); + GC_MAKE_PERSISTENT_LOCAL(_k); zend_hash_add_ptr(&char_tables, _k, (void *)tables); zend_string_release(_k); } pcre2_set_character_tables(cctx, tables); } -#endif /* Set extra options for the compile context. */ if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) { @@ -762,11 +782,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in } if (re == NULL) { -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif pcre2_get_error_message(errnumber, error, sizeof(error)); php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset); pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); @@ -817,11 +835,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count); if (rc < 0) { -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc); pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); return NULL; @@ -829,11 +845,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count); if (rc < 0) { -#if HAVE_SETLOCALE if (key != regex) { zend_string_release_ex(key, 0); } -#endif php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc); pcre_handle_exec_error(PCRE2_ERROR_INTERNAL); return NULL; @@ -847,22 +861,20 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in * as hash keys especually for this table. * See bug #63180 */ - if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) { + if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) { zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1); - GC_MAKE_PERSISTENT_LOCAL(str); -#if HAVE_SETLOCALE - if (key != regex) { - zend_string_release_ex(key, 0); - } -#endif ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry)); zend_string_release(str); } else { ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry)); } + if (key != regex) { + zend_string_release_ex(key, 0); + } + return ret; } /* }}} */ @@ -877,13 +889,10 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex) /* {{{ pcre_get_compiled_regex */ -PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options) +PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count) { pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex); - if (preg_options) { - *preg_options = 0; - } if (capture_count) { *capture_count = pce ? pce->capture_count : 0; } @@ -899,7 +908,7 @@ PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capt pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex); if (preg_options) { - *preg_options = 0; + *preg_options = pce ? pce->preg_options : 0; } if (compile_options) { *compile_options = pce ? pce->compile_options : 0; @@ -945,35 +954,149 @@ PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data) } }/*}}}*/ +static void init_unmatched_null_pair() { + zval val1, val2; + ZVAL_NULL(&val1); + ZVAL_LONG(&val2, -1); + ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2)); +} + +static void init_unmatched_empty_pair() { + zval val1, val2; + ZVAL_EMPTY_STRING(&val1); + ZVAL_LONG(&val2, -1); + ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2)); +} + +static zend_always_inline void populate_match_value_str( + zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) { + if (start_offset == end_offset) { + ZVAL_EMPTY_STRING(val); + } else if (start_offset + 1 == end_offset) { + ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset])); + } else { + ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset); + } +} + +static inline void populate_match_value( + zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, + uint32_t unmatched_as_null) { + if (PCRE2_UNSET == start_offset) { + if (unmatched_as_null) { + ZVAL_NULL(val); + } else { + ZVAL_EMPTY_STRING(val); + } + } else { + populate_match_value_str(val, subject, start_offset, end_offset); + } +} + /* {{{ add_offset_pair */ -static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, char *name, uint32_t unmatched_as_null) +static inline void add_offset_pair( + zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, + zend_string *name, uint32_t unmatched_as_null) { - zval match_pair, tmp; - - array_init_size(&match_pair, 2); + zval match_pair; /* Add (match, offset) to the return value */ - if (PCRE2_UNSET == offset) { + if (PCRE2_UNSET == start_offset) { if (unmatched_as_null) { - ZVAL_NULL(&tmp); + if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) { + init_unmatched_null_pair(); + } + ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair)); } else { - ZVAL_EMPTY_STRING(&tmp); + if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) { + init_unmatched_empty_pair(); + } + ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair)); } } else { - ZVAL_STRINGL(&tmp, str, len); + zval val1, val2; + populate_match_value_str(&val1, subject, start_offset, end_offset); + ZVAL_LONG(&val2, start_offset); + ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2)); } - zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); - ZVAL_LONG(&tmp, offset); - zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); if (name) { Z_ADDREF(match_pair); - zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair); + zend_hash_update(Z_ARRVAL_P(result), name, &match_pair); } zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair); } /* }}} */ +static void populate_subpat_array( + zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, + uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { + zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0; + zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0; + zval val; + int i; + if (subpat_names) { + if (offset_capture) { + for (i = 0; i < count; i++) { + add_offset_pair( + subpats, subject, offsets[2*i], offsets[2*i+1], + subpat_names[i], unmatched_as_null); + } + if (unmatched_as_null) { + for (i = count; i < num_subpats; i++) { + add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1); + } + } + } else { + for (i = 0; i < count; i++) { + populate_match_value( + &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); + if (subpat_names[i]) { + Z_TRY_ADDREF(val); + zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &val); + } + zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + } + if (unmatched_as_null) { + for (i = count; i < num_subpats; i++) { + ZVAL_NULL(&val); + if (subpat_names[i]) { + zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &val); + } + zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + } + } + } + } else { + if (offset_capture) { + for (i = 0; i < count; i++) { + add_offset_pair( + subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null); + } + if (unmatched_as_null) { + for (i = count; i < num_subpats; i++) { + add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1); + } + } + } else { + for (i = 0; i < count; i++) { + populate_match_value( + &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); + zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val); + } + if (unmatched_as_null) { + for (i = count; i < num_subpats; i++) { + add_next_index_null(subpats); + } + } + } + } + /* Add MARK, if available */ + if (mark) { + add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark); + } +} + static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */ { /* parameters */ @@ -988,7 +1111,7 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ * Z_PARAM_STR(regex) Z_PARAM_STR(subject) Z_PARAM_OPTIONAL - Z_PARAM_ZVAL_DEREF(subpats) + Z_PARAM_ZVAL(subpats) Z_PARAM_LONG(flags) Z_PARAM_LONG(start_offset) ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE); @@ -999,14 +1122,14 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ * } pce->refcount++; - php_pcre_match_impl(pce, ZSTR_VAL(subject), ZSTR_LEN(subject), return_value, subpats, + php_pcre_match_impl(pce, subject, return_value, subpats, global, ZEND_NUM_ARGS() >= 4, flags, start_offset); pce->refcount--; } /* }}} */ /* {{{ php_pcre_match_impl() */ -PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t subject_len, zval *return_value, +PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset) { zval result_set, /* Holds a set of subpatterns after @@ -1018,7 +1141,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub PCRE2_SIZE *offsets; /* Array of subpattern offsets */ uint32_t num_subpats; /* Number of captured subpatterns */ int matched; /* Has anything matched */ - char **subpat_names; /* Array for named subpatterns */ + zend_string **subpat_names; /* Array for named subpatterns */ size_t i; uint32_t subpats_order; /* Order of subpattern matches */ uint32_t offset_capture; /* Capture match offsets: yes/no */ @@ -1028,12 +1151,17 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub pcre2_match_data *match_data; PCRE2_SIZE start_offset2; + char *subject = ZSTR_VAL(subject_str); + size_t subject_len = ZSTR_LEN(subject_str); + ZVAL_UNDEF(&marks); /* Overwrite the passed-in value for subpatterns with an empty array. */ if (subpats != NULL) { - zval_ptr_dtor(subpats); - array_init(subpats); + subpats = zend_try_array_init(subpats); + if (!subpats) { + return; + } } subpats_order = global ? PREG_PATTERN_ORDER : 0; @@ -1083,7 +1211,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub * allocate the table only if there are any named subpatterns. */ subpat_names = NULL; - if (pce->name_count > 0) { + if (subpats && pce->name_count > 0) { subpat_names = make_subpats_table(num_subpats, pce); if (!subpat_names) { RETURN_FALSE; @@ -1108,7 +1236,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub if (!match_data) { PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; if (subpat_names) { - efree(subpat_names); + free_subpats_table(subpat_names, num_subpats); } if (match_sets) { efree(match_sets); @@ -1117,7 +1245,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub } } - options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK; + options = (pce->compile_options & PCRE2_UTF) && !(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8) + ? 0 : PCRE2_NO_UTF_CHECK; /* Execute the regular expression. */ #ifdef HAVE_PCRE_JIT_SUPPORT @@ -1148,7 +1277,7 @@ matched: /* Try to get the list of substrings and display a warning if failed. */ if (offsets[1] < offsets[0]) { if (subpat_names) { - efree(subpat_names); + free_subpats_table(subpat_names, num_subpats); } if (match_sets) efree(match_sets); php_error_docref(NULL, E_WARNING, "Get subpatterns list failed"); @@ -1160,21 +1289,16 @@ matched: /* For each subpattern, insert it into the appropriate array. */ if (offset_capture) { for (i = 0; i < count; i++) { - add_offset_pair(&match_sets[i], subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null); + add_offset_pair( + &match_sets[i], subject, offsets[2*i], offsets[2*i+1], + NULL, unmatched_as_null); } } else { for (i = 0; i < count; i++) { - if (PCRE2_UNSET == offsets[i<<1]) { - if (unmatched_as_null) { - add_next_index_null(&match_sets[i]); - } else { - add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC()); - } - } else { - add_next_index_stringl(&match_sets[i], subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1]); - } + zval val; + populate_match_value( + &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); + zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val); } } mark = pcre2_get_mark(match_data); @@ -1192,7 +1316,11 @@ matched: */ if (count < num_subpats) { for (; i < num_subpats; i++) { - if (unmatched_as_null) { + if (offset_capture) { + add_offset_pair( + &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET, + NULL, unmatched_as_null); + } else if (unmatched_as_null) { add_next_index_null(&match_sets[i]); } else { add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC()); @@ -1200,133 +1328,20 @@ matched: } } } else { - /* Allocate the result set array */ + /* Allocate and populate the result set array */ array_init_size(&result_set, count + (mark ? 1 : 0)); - - /* Add all the subpatterns to it */ - if (subpat_names) { - if (offset_capture) { - for (i = 0; i < count; i++) { - add_offset_pair(&result_set, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null); - } - } else { - for (i = 0; i < count; i++) { - if (subpat_names[i]) { - if (PCRE2_UNSET == offsets[i<<1]) { - if (unmatched_as_null) { - add_assoc_null(&result_set, subpat_names[i]); - } else { - add_assoc_str(&result_set, subpat_names[i], ZSTR_EMPTY_ALLOC()); - } - } else { - add_assoc_stringl(&result_set, subpat_names[i], subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1]); - } - } - if (PCRE2_UNSET == offsets[i<<1]) { - if (unmatched_as_null) { - add_next_index_null(&result_set); - } else { - add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC()); - } - } else { - add_next_index_stringl(&result_set, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1]); - } - } - } - } else { - if (offset_capture) { - for (i = 0; i < count; i++) { - add_offset_pair(&result_set, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null); - } - } else { - for (i = 0; i < count; i++) { - if (PCRE2_UNSET == offsets[i<<1]) { - if (unmatched_as_null) { - add_next_index_null(&result_set); - } else { - add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC()); - } - } else { - add_next_index_stringl(&result_set, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1]); - } - } - } - } - /* Add MARK, if available */ mark = pcre2_get_mark(match_data); - if (mark) { - add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark); - } + populate_subpat_array( + &result_set, subject, offsets, subpat_names, + num_subpats, count, mark, flags); /* And add it to the output array */ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set); } } else { /* single pattern matching */ /* For each subpattern, insert it into the subpatterns array. */ - if (subpat_names) { - if (offset_capture) { - for (i = 0; i < count; i++) { - add_offset_pair(subpats, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], subpat_names[i], unmatched_as_null); - } - } else { - for (i = 0; i < count; i++) { - if (subpat_names[i]) { - if (PCRE2_UNSET == offsets[i<<1]) { - if (unmatched_as_null) { - add_assoc_null(subpats, subpat_names[i]); - } else { - add_assoc_str(subpats, subpat_names[i], ZSTR_EMPTY_ALLOC()); - } - } else { - add_assoc_stringl(subpats, subpat_names[i], subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1]); - } - } - if (PCRE2_UNSET == offsets[i<<1]) { - if (unmatched_as_null) { - add_next_index_null(subpats); - } else { - add_next_index_str(subpats, ZSTR_EMPTY_ALLOC()); - } - } else { - add_next_index_stringl(subpats, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1]); - } - } - } - } else { - if (offset_capture) { - for (i = 0; i < count; i++) { - add_offset_pair(subpats, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], NULL, unmatched_as_null); - } - } else { - for (i = 0; i < count; i++) { - if (PCRE2_UNSET == offsets[i<<1]) { - if (unmatched_as_null) { - add_next_index_null(subpats); - } else { - add_next_index_str(subpats, ZSTR_EMPTY_ALLOC()); - } - } else { - add_next_index_stringl(subpats, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1]); - } - } - } - } - /* Add MARK, if available */ mark = pcre2_get_mark(match_data); - if (mark) { - add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark); - } + populate_subpat_array( + subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags); break; } } @@ -1394,8 +1409,7 @@ error: if (subpat_names) { for (i = 0; i < num_subpats; i++) { if (subpat_names[i]) { - zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i], - strlen(subpat_names[i]), &match_sets[i]); + zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]); Z_ADDREF(match_sets[i]); } zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]); @@ -1413,11 +1427,15 @@ error: } if (subpat_names) { - efree(subpat_names); + free_subpats_table(subpat_names, num_subpats); } - /* Did we encounter an error? */ if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) { + /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */ + if ((pce->compile_options & PCRE2_UTF) && !ZSTR_IS_INTERNED(subject_str)) { + GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8); + } + RETVAL_LONG(matched); } else { RETVAL_FALSE; @@ -1482,29 +1500,14 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, char **subpat_names, int count, const PCRE2_SPTR mark) +static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { zend_string *result_str; zval retval; /* Function return value */ zval arg; /* Argument to pass to function */ - int i; array_init_size(&arg, count + (mark ? 1 : 0)); - if (subpat_names) { - for (i = 0; i < count; i++) { - if (subpat_names[i]) { - add_assoc_stringl(&arg, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]); - } - add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]); - } - } else { - for (i = 0; i < count; i++) { - add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]); - } - } - if (mark) { - add_assoc_string(&arg, "MARK", (char *) mark); - } + populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags); fci->retval = &retval; fci->param_count = 1; @@ -1543,6 +1546,11 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex, pcre_cache_entry *pce; /* Compiled regular expression */ zend_string *result; /* Function result */ + /* Abort on pending exception, e.g. thrown from __toString(). */ + if (UNEXPECTED(EG(exception))) { + return NULL; + } + /* Compile regex or get it from cache. */ if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) { return NULL; @@ -1800,12 +1808,12 @@ error: /* }}} */ /* {{{ php_pcre_replace_func_impl() */ -static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count) +static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags) { uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ PCRE2_SIZE *offsets; /* Array of subpattern offsets */ - char **subpat_names; /* Array for named subpatterns */ + zend_string **subpat_names; /* Array for named subpatterns */ uint32_t num_subpats; /* Number of captured subpatterns */ size_t new_len; /* Length of needed storage */ size_t alloc_len; /* Actual allocated length */ @@ -1851,7 +1859,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin if (!match_data) { PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR; if (subpat_names) { - efree(subpat_names); + free_subpats_table(subpat_names, num_subpats); } mdata_used = old_mdata_used; return NULL; @@ -1902,8 +1910,9 @@ matched: new_len = result_len + offsets[0] - start_offset; /* part before the match */ /* Use custom function to get replacement string and its length. */ - eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count, - pcre2_get_mark(match_data)); + eval_result = preg_do_repl_func( + fci, fcc, subject, offsets, subpat_names, num_subpats, count, + pcre2_get_mark(match_data), flags); ZEND_ASSERT(eval_result); new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len); @@ -2007,7 +2016,7 @@ error: mdata_used = old_mdata_used; if (UNEXPECTED(subpat_names)) { - efree(subpat_names); + free_subpats_table(subpat_names, num_subpats); } return result; @@ -2019,7 +2028,7 @@ error: static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, zend_string *subject_str, zend_fcall_info *fci, zend_fcall_info_cache *fcc, - size_t limit, size_t *replace_count) + size_t limit, size_t *replace_count, zend_long flags) { pcre_cache_entry *pce; /* Compiled regular expression */ zend_string *result; /* Function result */ @@ -2029,8 +2038,9 @@ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex, return NULL; } pce->refcount++; - result = php_pcre_replace_func_impl(pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc, - limit, replace_count); + result = php_pcre_replace_func_impl( + pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc, + limit, replace_count, flags); pce->refcount--; return result; @@ -2150,17 +2160,14 @@ static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval /* {{{ php_replace_in_subject_func */ -static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count) +static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags) { zend_string *result; zend_string *subject_str = zval_get_string(subject); if (Z_TYPE_P(regex) != IS_ARRAY) { - result = php_pcre_replace_func(Z_STR_P(regex), - subject_str, - fci, fcc, - limit, - replace_count); + result = php_pcre_replace_func( + Z_STR_P(regex), subject_str, fci, fcc, limit, replace_count, flags); zend_string_release_ex(subject_str, 0); return result; } else { @@ -2176,11 +2183,8 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc /* Do the actual replacement and put the result back into subject_str for further replacements. */ - result = php_pcre_replace_func(regex_str, - subject_str, - fci, fcc, - limit, - replace_count); + result = php_pcre_replace_func( + regex_str, subject_str, fci, fcc, limit, replace_count, flags); zend_tmp_string_release(tmp_regex_str); zend_string_release_ex(subject_str, 0); subject_str = result; @@ -2196,7 +2200,7 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc /* {{{ preg_replace_func_impl */ -static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val) +static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags) { zend_string *result; size_t replace_count = 0; @@ -2206,7 +2210,8 @@ static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall } if (Z_TYPE_P(subject) != IS_ARRAY) { - result = php_replace_in_subject_func(regex, fci, fcc, subject, limit_val, &replace_count); + result = php_replace_in_subject_func( + regex, fci, fcc, subject, limit_val, &replace_count, flags); if (result != NULL) { RETVAL_STR(result); } else { @@ -2223,7 +2228,8 @@ static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall /* For each subject entry, convert it to string, then perform replacement and add the result to the return_value array. */ ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) { - result = php_replace_in_subject_func(regex, fci, fcc, subject_entry, limit_val, &replace_count); + result = php_replace_in_subject_func( + regex, fci, fcc, subject_entry, limit_val, &replace_count, flags); if (result != NULL) { /* Add to return array */ ZVAL_STR(&zv, result); @@ -2257,7 +2263,7 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter) Z_PARAM_ZVAL(subject) Z_PARAM_OPTIONAL Z_PARAM_LONG(limit) - Z_PARAM_ZVAL_DEREF(zcount) + Z_PARAM_ZVAL(zcount) ZEND_PARSE_PARAMETERS_END(); if (Z_TYPE_P(replace) != IS_ARRAY) { @@ -2323,8 +2329,7 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter) } if (zcount) { - zval_ptr_dtor(zcount); - ZVAL_LONG(zcount, replace_count); + ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); } } /* }}} */ @@ -2342,19 +2347,20 @@ static PHP_FUNCTION(preg_replace) static PHP_FUNCTION(preg_replace_callback) { zval *regex, *replace, *subject, *zcount = NULL; - zend_long limit = -1; + zend_long limit = -1, flags = 0; size_t replace_count; zend_fcall_info fci; zend_fcall_info_cache fcc; /* Get function parameters and do error-checking. */ - ZEND_PARSE_PARAMETERS_START(3, 5) + ZEND_PARSE_PARAMETERS_START(3, 6) Z_PARAM_ZVAL(regex) Z_PARAM_ZVAL(replace) Z_PARAM_ZVAL(subject) Z_PARAM_OPTIONAL Z_PARAM_LONG(limit) - Z_PARAM_ZVAL_DEREF(zcount) + Z_PARAM_ZVAL(zcount) + Z_PARAM_LONG(flags) ZEND_PARSE_PARAMETERS_END(); if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) { @@ -2369,10 +2375,9 @@ static PHP_FUNCTION(preg_replace_callback) fci.object = NULL; ZVAL_COPY_VALUE(&fci.function_name, replace); - replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit); + replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit, flags); if (zcount) { - zval_ptr_dtor(zcount); - ZVAL_LONG(zcount, replace_count); + ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); } } /* }}} */ @@ -2382,19 +2387,20 @@ static PHP_FUNCTION(preg_replace_callback) static PHP_FUNCTION(preg_replace_callback_array) { zval regex, zv, *replace, *subject, *pattern, *zcount = NULL; - zend_long limit = -1; + zend_long limit = -1, flags = 0; zend_string *str_idx; size_t replace_count = 0; zend_fcall_info fci; zend_fcall_info_cache fcc; /* Get function parameters and do error-checking. */ - ZEND_PARSE_PARAMETERS_START(2, 4) + ZEND_PARSE_PARAMETERS_START(2, 5) Z_PARAM_ARRAY(pattern) Z_PARAM_ZVAL(subject) Z_PARAM_OPTIONAL Z_PARAM_LONG(limit) - Z_PARAM_ZVAL_DEREF(zcount) + Z_PARAM_ZVAL(zcount) + Z_PARAM_LONG(flags) ZEND_PARSE_PARAMETERS_END(); fci.size = sizeof(fci); @@ -2421,7 +2427,7 @@ static PHP_FUNCTION(preg_replace_callback_array) ZVAL_COPY_VALUE(&fci.function_name, replace); - replace_count += preg_replace_func_impl(&zv, ®ex, &fci, &fcc, subject, limit); + replace_count += preg_replace_func_impl(&zv, ®ex, &fci, &fcc, subject, limit, flags); if (subject != return_value) { subject = return_value; } else { @@ -2439,8 +2445,7 @@ static PHP_FUNCTION(preg_replace_callback_array) } ZEND_HASH_FOREACH_END(); if (zcount) { - zval_ptr_dtor(zcount); - ZVAL_LONG(zcount, replace_count); + ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count); } } /* }}} */ @@ -2492,14 +2497,14 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ PCRE2_SIZE start_offset; /* Where the new search starts */ - PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */ - char *last_match; /* Location of last match */ + PCRE2_SIZE last_match_offset; /* Location of last match */ uint32_t no_empty; /* If NO_EMPTY flag is set */ uint32_t delim_capture; /* If delimiters should be captured */ uint32_t offset_capture; /* If offsets should be captured */ uint32_t num_subpats; /* Number of captured subpatterns */ zval tmp; pcre2_match_data *match_data; + char *subject = ZSTR_VAL(subject_str); no_empty = flags & PREG_SPLIT_NO_EMPTY; delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE; @@ -2513,11 +2518,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, /* Start at the beginning of the string */ start_offset = 0; - next_offset = 0; - last_match = ZSTR_VAL(subject_str); + last_match_offset = 0; PCRE_G(error_code) = PHP_PCRE_NO_ERROR; - if (limit_val == -1) { /* pass */ } else if (limit_val == 0) { @@ -2541,11 +2544,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { - count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } else #endif - count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, options, match_data, mctx); while (1) { @@ -2565,14 +2568,15 @@ matched: break; } - if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) { - + if (!no_empty || offsets[0] != last_match_offset) { if (offset_capture) { /* Add (match, offset) pair to the return value */ - add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0); + add_offset_pair( + return_value, subject, last_match_offset, offsets[0], + NULL, 0); } else { /* Add the piece to the return value */ - ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match); + populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]); zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); } @@ -2581,19 +2585,16 @@ matched: limit_val--; } - last_match = &ZSTR_VAL(subject_str)[offsets[1]]; - next_offset = offsets[1]; - if (delim_capture) { - size_t i, match_len; + size_t i; for (i = 1; i < count; i++) { - match_len = offsets[(i<<1)+1] - offsets[i<<1]; /* If we have matched a delimiter */ - if (!no_empty || match_len > 0) { + if (!no_empty || offsets[2*i] != offsets[2*i+1]) { if (offset_capture) { - add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0); + add_offset_pair( + return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0); } else { - ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len); + populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]); zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); } } @@ -2601,14 +2602,14 @@ matched: } /* Advance to the position right after the last full match */ - start_offset = offsets[1]; + start_offset = last_match_offset = offsets[1]; /* If we have matched an empty string, mimic what Perl's /g options does. This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try the match again at the same point. If this fails (picked up above) we advance to the next character. */ if (start_offset == offsets[0]) { - count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx); if (count >= 0) { goto matched; @@ -2618,7 +2619,7 @@ matched: the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (start_offset < ZSTR_LEN(subject_str)) { - start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset); + start_offset += calculate_unit_length(pce, subject + start_offset); } else { break; } @@ -2642,11 +2643,11 @@ error: #ifdef HAVE_PCRE_JIT_SUPPORT if (pce->preg_options & PREG_JIT) { - count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } else #endif - count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } if (match_data != mdata) { @@ -2659,18 +2660,18 @@ error: } last: - start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */ + start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */ if (!no_empty || start_offset < ZSTR_LEN(subject_str)) { if (offset_capture) { /* Add the last (match, offset) pair to the return value */ - add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0); + add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0); } else { /* Add the last piece to the return value */ - if (last_match == ZSTR_VAL(subject_str)) { + if (start_offset == 0) { ZVAL_STR_COPY(&tmp, subject_str); } else { - ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match); + populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str)); } zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); } @@ -2936,8 +2937,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return Returns the error code of the last regexp execution. */ static PHP_FUNCTION(preg_last_error) { - ZEND_PARSE_PARAMETERS_START(0, 0) - ZEND_PARSE_PARAMETERS_END(); + ZEND_PARSE_PARAMETERS_NONE(); RETURN_LONG(PCRE_G(error_code)); } @@ -2976,6 +2976,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3) ZEND_ARG_INFO(0, subject) ZEND_ARG_INFO(0, limit) ZEND_ARG_INFO(1, count) + ZEND_ARG_INFO(0, flags) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2) @@ -2983,6 +2984,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2) ZEND_ARG_INFO(0, subject) ZEND_ARG_INFO(0, limit) ZEND_ARG_INFO(1, count) + ZEND_ARG_INFO(0, flags) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2) @@ -3027,12 +3029,8 @@ zend_module_entry pcre_module_entry = { pcre_functions, PHP_MINIT(pcre), PHP_MSHUTDOWN(pcre), -#ifdef HAVE_PCRE_JIT_SUPPORT PHP_RINIT(pcre), -#else - NULL, -#endif - NULL, + PHP_RSHUTDOWN(pcre), PHP_MINFO(pcre), PHP_PCRE_VERSION, PHP_MODULE_GLOBALS(pcre), @@ -3081,14 +3079,3 @@ PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce) assert(NULL != pce); return pce->re; }/*}}}*/ - -#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */ - -/* - * Local variables: - * tab-width: 4 - * c-basic-offset: 4 - * End: - * vim600: sw=4 ts=4 fdm=marker - * vim<600: sw=4 ts=4 - */ |