summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c629
1 files changed, 308 insertions, 321 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index e82dc252b2..d19c7960a6 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -2,7 +2,7 @@
+----------------------------------------------------------------------+
| PHP Version 7 |
+----------------------------------------------------------------------+
- | Copyright (c) 1997-2018 The PHP Group |
+ | Copyright (c) The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
@@ -23,8 +23,7 @@
#include "ext/standard/info.h"
#include "ext/standard/basic_functions.h"
#include "zend_smart_str.h"
-
-#if HAVE_PCRE || HAVE_BUNDLED_PCRE
+#include "SAPI.h"
#include "ext/standard/php_string.h"
@@ -95,7 +94,6 @@ static MUTEX_T pcre_mt = NULL;
#define php_pcre_mutex_unlock()
#endif
-#if HAVE_SETLOCALE
ZEND_TLS HashTable char_tables;
static void php_pcre_free_char_table(zval *data)
@@ -103,7 +101,6 @@ static void php_pcre_free_char_table(zval *data)
void *ptr = Z_PTR_P(data);
pefree(ptr, 1);
}/*}}}*/
-#endif
static void pcre_handle_exec_error(int pcre_code) /* {{{ */
{
@@ -146,7 +143,16 @@ static void php_free_pcre_cache(zval *data) /* {{{ */
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
if (!pce) return;
pcre2_code_free(pce->re);
- pefree(pce, 1);
+ free(pce);
+}
+/* }}} */
+
+static void php_efree_pcre_cache(zval *data) /* {{{ */
+{
+ pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
+ if (!pce) return;
+ pcre2_code_free(pce->re);
+ efree(pce);
}
/* }}} */
@@ -255,30 +261,35 @@ static PHP_GINIT_FUNCTION(pcre) /* {{{ */
{
php_pcre_mutex_alloc();
- zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
+ /* If we're on the CLI SAPI, there will only be one request, so we don't need the
+ * cache to survive after RSHUTDOWN. */
+ pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
+ if (!pcre_globals->per_request_cache) {
+ zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
+ }
+
pcre_globals->backtrack_limit = 0;
pcre_globals->recursion_limit = 0;
pcre_globals->error_code = PHP_PCRE_NO_ERROR;
+ ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
+ ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
#ifdef HAVE_PCRE_JIT_SUPPORT
pcre_globals->jit = 1;
#endif
php_pcre_init_pcre2(1);
-#if HAVE_SETLOCALE
zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
-#endif
}
/* }}} */
static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
{
- zend_hash_destroy(&pcre_globals->pcre_cache);
+ if (!pcre_globals->per_request_cache) {
+ zend_hash_destroy(&pcre_globals->pcre_cache);
+ }
php_pcre_shutdown_pcre2();
-#if HAVE_SETLOCALE
zend_hash_destroy(&char_tables);
-#endif
-
php_pcre_mutex_free();
}
/* }}} */
@@ -438,10 +449,10 @@ static PHP_MSHUTDOWN_FUNCTION(pcre)
}
/* }}} */
-#ifdef HAVE_PCRE_JIT_SUPPORT
/* {{{ PHP_RINIT_FUNCTION(pcre) */
static PHP_RINIT_FUNCTION(pcre)
{
+#ifdef HAVE_PCRE_JIT_SUPPORT
if (UNEXPECTED(!pcre2_init_ok)) {
/* Retry. */
php_pcre_mutex_lock();
@@ -454,11 +465,28 @@ static PHP_RINIT_FUNCTION(pcre)
}
mdata_used = 0;
+#endif
+
+ if (PCRE_G(per_request_cache)) {
+ zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
+ }
return SUCCESS;
}
/* }}} */
-#endif
+
+static PHP_RSHUTDOWN_FUNCTION(pcre)
+{
+ if (PCRE_G(per_request_cache)) {
+ zend_hash_destroy(&PCRE_G(pcre_cache));
+ }
+
+ zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
+ zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
+ ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
+ ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
+ return SUCCESS;
+}
/* {{{ static pcre_clean_cache */
static int pcre_clean_cache(zval *data, void *arg)
@@ -475,29 +503,39 @@ static int pcre_clean_cache(zval *data, void *arg)
}
/* }}} */
+static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
+ uint32_t i;
+ for (i = 0; i < num_subpats; i++) {
+ if (subpat_names[i]) {
+ zend_string_release(subpat_names[i]);
+ }
+ }
+ efree(subpat_names);
+}
+
/* {{{ static make_subpats_table */
-static char **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
+static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
{
uint32_t name_cnt = pce->name_count, name_size, ni = 0;
char *name_table;
- unsigned short name_idx;
- char **subpat_names;
+ zend_string **subpat_names;
int rc1, rc2;
rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
if (rc1 < 0 || rc2 < 0) {
- php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc1 < 0 ? rc1 : rc2);
+ php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
return NULL;
}
- subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
+ subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
while (ni++ < name_cnt) {
- name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
- subpat_names[name_idx] = name_table + 2;
- if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
+ unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
+ const char *name = name_table + 2;
+ subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
+ if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
- efree(subpat_names);
+ free_subpats_table(subpat_names, num_subpats);
return NULL;
}
name_table += name_size;
@@ -542,24 +580,19 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
char *pattern;
size_t pattern_len;
uint32_t poptions = 0;
-#if HAVE_SETLOCALE
const uint8_t *tables = NULL;
-#endif
zval *zv;
pcre_cache_entry new_entry;
int rc;
zend_string *key;
pcre_cache_entry *ret;
-#if HAVE_SETLOCALE
if (locale_aware && BG(locale_string) &&
(ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
- } else
-#endif
- {
+ } else {
key = regex;
}
@@ -567,11 +600,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
back the compiled pattern, otherwise go on and compile it. */
zv = zend_hash_find(&PCRE_G(pcre_cache), key);
if (zv) {
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
return (pcre_cache_entry*)Z_PTR_P(zv);
}
@@ -581,11 +612,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
get to the end without encountering a delimiter. */
while (isspace((int)*(unsigned char *)p)) p++;
if (*p == 0) {
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
php_error_docref(NULL, E_WARNING,
p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
@@ -596,11 +625,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
or a backslash. */
delimiter = *p++;
if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
@@ -641,11 +668,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
}
if (*pp == 0) {
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
php_error_docref(NULL,E_WARNING, "Null byte in regex");
} else if (start_delimiter == end_delimiter) {
@@ -706,11 +731,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
}
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
efree(pattern);
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
return NULL;
}
}
@@ -719,15 +742,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
efree(pattern);
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
return NULL;
}
-#if HAVE_SETLOCALE
if (key != regex) {
tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string));
if (!tables) {
@@ -741,12 +761,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
return NULL;
}
_k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1);
+ GC_MAKE_PERSISTENT_LOCAL(_k);
zend_hash_add_ptr(&char_tables, _k, (void *)tables);
zend_string_release(_k);
}
pcre2_set_character_tables(cctx, tables);
}
-#endif
/* Set extra options for the compile context. */
if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
@@ -762,11 +782,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
}
if (re == NULL) {
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
pcre2_get_error_message(errnumber, error, sizeof(error));
php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
@@ -817,11 +835,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
if (rc < 0) {
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
@@ -829,11 +845,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
if (rc < 0) {
-#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release_ex(key, 0);
}
-#endif
php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
@@ -847,22 +861,20 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
* as hash keys especually for this table.
* See bug #63180
*/
- if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
+ if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
-
GC_MAKE_PERSISTENT_LOCAL(str);
-#if HAVE_SETLOCALE
- if (key != regex) {
- zend_string_release_ex(key, 0);
- }
-#endif
ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
zend_string_release(str);
} else {
ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
}
+ if (key != regex) {
+ zend_string_release_ex(key, 0);
+ }
+
return ret;
}
/* }}} */
@@ -877,13 +889,10 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
/* {{{ pcre_get_compiled_regex
*/
-PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options)
+PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
{
pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
- if (preg_options) {
- *preg_options = 0;
- }
if (capture_count) {
*capture_count = pce ? pce->capture_count : 0;
}
@@ -899,7 +908,7 @@ PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capt
pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
if (preg_options) {
- *preg_options = 0;
+ *preg_options = pce ? pce->preg_options : 0;
}
if (compile_options) {
*compile_options = pce ? pce->compile_options : 0;
@@ -945,35 +954,149 @@ PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
}
}/*}}}*/
+static void init_unmatched_null_pair() {
+ zval val1, val2;
+ ZVAL_NULL(&val1);
+ ZVAL_LONG(&val2, -1);
+ ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
+}
+
+static void init_unmatched_empty_pair() {
+ zval val1, val2;
+ ZVAL_EMPTY_STRING(&val1);
+ ZVAL_LONG(&val2, -1);
+ ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
+}
+
+static zend_always_inline void populate_match_value_str(
+ zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
+ if (start_offset == end_offset) {
+ ZVAL_EMPTY_STRING(val);
+ } else if (start_offset + 1 == end_offset) {
+ ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
+ } else {
+ ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
+ }
+}
+
+static inline void populate_match_value(
+ zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
+ uint32_t unmatched_as_null) {
+ if (PCRE2_UNSET == start_offset) {
+ if (unmatched_as_null) {
+ ZVAL_NULL(val);
+ } else {
+ ZVAL_EMPTY_STRING(val);
+ }
+ } else {
+ populate_match_value_str(val, subject, start_offset, end_offset);
+ }
+}
+
/* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, char *name, uint32_t unmatched_as_null)
+static inline void add_offset_pair(
+ zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
+ zend_string *name, uint32_t unmatched_as_null)
{
- zval match_pair, tmp;
-
- array_init_size(&match_pair, 2);
+ zval match_pair;
/* Add (match, offset) to the return value */
- if (PCRE2_UNSET == offset) {
+ if (PCRE2_UNSET == start_offset) {
if (unmatched_as_null) {
- ZVAL_NULL(&tmp);
+ if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
+ init_unmatched_null_pair();
+ }
+ ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
} else {
- ZVAL_EMPTY_STRING(&tmp);
+ if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
+ init_unmatched_empty_pair();
+ }
+ ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
}
} else {
- ZVAL_STRINGL(&tmp, str, len);
+ zval val1, val2;
+ populate_match_value_str(&val1, subject, start_offset, end_offset);
+ ZVAL_LONG(&val2, start_offset);
+ ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
}
- zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
- ZVAL_LONG(&tmp, offset);
- zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
if (name) {
Z_ADDREF(match_pair);
- zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair);
+ zend_hash_update(Z_ARRVAL_P(result), name, &match_pair);
}
zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
}
/* }}} */
+static void populate_subpat_array(
+ zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
+ uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
+ zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
+ zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
+ zval val;
+ int i;
+ if (subpat_names) {
+ if (offset_capture) {
+ for (i = 0; i < count; i++) {
+ add_offset_pair(
+ subpats, subject, offsets[2*i], offsets[2*i+1],
+ subpat_names[i], unmatched_as_null);
+ }
+ if (unmatched_as_null) {
+ for (i = count; i < num_subpats; i++) {
+ add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
+ }
+ }
+ } else {
+ for (i = 0; i < count; i++) {
+ populate_match_value(
+ &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
+ if (subpat_names[i]) {
+ Z_TRY_ADDREF(val);
+ zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &val);
+ }
+ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
+ }
+ if (unmatched_as_null) {
+ for (i = count; i < num_subpats; i++) {
+ ZVAL_NULL(&val);
+ if (subpat_names[i]) {
+ zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &val);
+ }
+ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
+ }
+ }
+ }
+ } else {
+ if (offset_capture) {
+ for (i = 0; i < count; i++) {
+ add_offset_pair(
+ subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
+ }
+ if (unmatched_as_null) {
+ for (i = count; i < num_subpats; i++) {
+ add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
+ }
+ }
+ } else {
+ for (i = 0; i < count; i++) {
+ populate_match_value(
+ &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
+ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
+ }
+ if (unmatched_as_null) {
+ for (i = count; i < num_subpats; i++) {
+ add_next_index_null(subpats);
+ }
+ }
+ }
+ }
+ /* Add MARK, if available */
+ if (mark) {
+ add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
+ }
+}
+
static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
{
/* parameters */
@@ -988,7 +1111,7 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ *
Z_PARAM_STR(regex)
Z_PARAM_STR(subject)
Z_PARAM_OPTIONAL
- Z_PARAM_ZVAL_DEREF(subpats)
+ Z_PARAM_ZVAL(subpats)
Z_PARAM_LONG(flags)
Z_PARAM_LONG(start_offset)
ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
@@ -999,14 +1122,14 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ *
}
pce->refcount++;
- php_pcre_match_impl(pce, ZSTR_VAL(subject), ZSTR_LEN(subject), return_value, subpats,
+ php_pcre_match_impl(pce, subject, return_value, subpats,
global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
pce->refcount--;
}
/* }}} */
/* {{{ php_pcre_match_impl() */
-PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t subject_len, zval *return_value,
+PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
{
zval result_set, /* Holds a set of subpatterns after
@@ -1018,7 +1141,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
uint32_t num_subpats; /* Number of captured subpatterns */
int matched; /* Has anything matched */
- char **subpat_names; /* Array for named subpatterns */
+ zend_string **subpat_names; /* Array for named subpatterns */
size_t i;
uint32_t subpats_order; /* Order of subpattern matches */
uint32_t offset_capture; /* Capture match offsets: yes/no */
@@ -1028,12 +1151,17 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
pcre2_match_data *match_data;
PCRE2_SIZE start_offset2;
+ char *subject = ZSTR_VAL(subject_str);
+ size_t subject_len = ZSTR_LEN(subject_str);
+
ZVAL_UNDEF(&marks);
/* Overwrite the passed-in value for subpatterns with an empty array. */
if (subpats != NULL) {
- zval_ptr_dtor(subpats);
- array_init(subpats);
+ subpats = zend_try_array_init(subpats);
+ if (!subpats) {
+ return;
+ }
}
subpats_order = global ? PREG_PATTERN_ORDER : 0;
@@ -1083,7 +1211,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
* allocate the table only if there are any named subpatterns.
*/
subpat_names = NULL;
- if (pce->name_count > 0) {
+ if (subpats && pce->name_count > 0) {
subpat_names = make_subpats_table(num_subpats, pce);
if (!subpat_names) {
RETURN_FALSE;
@@ -1108,7 +1236,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
if (!match_data) {
PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
if (subpat_names) {
- efree(subpat_names);
+ free_subpats_table(subpat_names, num_subpats);
}
if (match_sets) {
efree(match_sets);
@@ -1117,7 +1245,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t sub
}
}
- options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
+ options = (pce->compile_options & PCRE2_UTF) && !(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)
+ ? 0 : PCRE2_NO_UTF_CHECK;
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
@@ -1148,7 +1277,7 @@ matched:
/* Try to get the list of substrings and display a warning if failed. */
if (offsets[1] < offsets[0]) {
if (subpat_names) {
- efree(subpat_names);
+ free_subpats_table(subpat_names, num_subpats);
}
if (match_sets) efree(match_sets);
php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
@@ -1160,21 +1289,16 @@ matched:
/* For each subpattern, insert it into the appropriate array. */
if (offset_capture) {
for (i = 0; i < count; i++) {
- add_offset_pair(&match_sets[i], subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
+ add_offset_pair(
+ &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
+ NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
- if (PCRE2_UNSET == offsets[i<<1]) {
- if (unmatched_as_null) {
- add_next_index_null(&match_sets[i]);
- } else {
- add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
- }
- } else {
- add_next_index_stringl(&match_sets[i], subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
+ zval val;
+ populate_match_value(
+ &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
+ zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
}
}
mark = pcre2_get_mark(match_data);
@@ -1192,7 +1316,11 @@ matched:
*/
if (count < num_subpats) {
for (; i < num_subpats; i++) {
- if (unmatched_as_null) {
+ if (offset_capture) {
+ add_offset_pair(
+ &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
+ NULL, unmatched_as_null);
+ } else if (unmatched_as_null) {
add_next_index_null(&match_sets[i]);
} else {
add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
@@ -1200,133 +1328,20 @@ matched:
}
}
} else {
- /* Allocate the result set array */
+ /* Allocate and populate the result set array */
array_init_size(&result_set, count + (mark ? 1 : 0));
-
- /* Add all the subpatterns to it */
- if (subpat_names) {
- if (offset_capture) {
- for (i = 0; i < count; i++) {
- add_offset_pair(&result_set, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
- }
- } else {
- for (i = 0; i < count; i++) {
- if (subpat_names[i]) {
- if (PCRE2_UNSET == offsets[i<<1]) {
- if (unmatched_as_null) {
- add_assoc_null(&result_set, subpat_names[i]);
- } else {
- add_assoc_str(&result_set, subpat_names[i], ZSTR_EMPTY_ALLOC());
- }
- } else {
- add_assoc_stringl(&result_set, subpat_names[i], subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
- }
- if (PCRE2_UNSET == offsets[i<<1]) {
- if (unmatched_as_null) {
- add_next_index_null(&result_set);
- } else {
- add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
- }
- } else {
- add_next_index_stringl(&result_set, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
- }
- }
- } else {
- if (offset_capture) {
- for (i = 0; i < count; i++) {
- add_offset_pair(&result_set, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
- }
- } else {
- for (i = 0; i < count; i++) {
- if (PCRE2_UNSET == offsets[i<<1]) {
- if (unmatched_as_null) {
- add_next_index_null(&result_set);
- } else {
- add_next_index_str(&result_set, ZSTR_EMPTY_ALLOC());
- }
- } else {
- add_next_index_stringl(&result_set, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
- }
- }
- }
- /* Add MARK, if available */
mark = pcre2_get_mark(match_data);
- if (mark) {
- add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
- }
+ populate_subpat_array(
+ &result_set, subject, offsets, subpat_names,
+ num_subpats, count, mark, flags);
/* And add it to the output array */
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
}
} else { /* single pattern matching */
/* For each subpattern, insert it into the subpatterns array. */
- if (subpat_names) {
- if (offset_capture) {
- for (i = 0; i < count; i++) {
- add_offset_pair(subpats, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], subpat_names[i], unmatched_as_null);
- }
- } else {
- for (i = 0; i < count; i++) {
- if (subpat_names[i]) {
- if (PCRE2_UNSET == offsets[i<<1]) {
- if (unmatched_as_null) {
- add_assoc_null(subpats, subpat_names[i]);
- } else {
- add_assoc_str(subpats, subpat_names[i], ZSTR_EMPTY_ALLOC());
- }
- } else {
- add_assoc_stringl(subpats, subpat_names[i], subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
- }
- if (PCRE2_UNSET == offsets[i<<1]) {
- if (unmatched_as_null) {
- add_next_index_null(subpats);
- } else {
- add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
- }
- } else {
- add_next_index_stringl(subpats, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
- }
- }
- } else {
- if (offset_capture) {
- for (i = 0; i < count; i++) {
- add_offset_pair(subpats, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], NULL, unmatched_as_null);
- }
- } else {
- for (i = 0; i < count; i++) {
- if (PCRE2_UNSET == offsets[i<<1]) {
- if (unmatched_as_null) {
- add_next_index_null(subpats);
- } else {
- add_next_index_str(subpats, ZSTR_EMPTY_ALLOC());
- }
- } else {
- add_next_index_stringl(subpats, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
- }
- }
- }
- /* Add MARK, if available */
mark = pcre2_get_mark(match_data);
- if (mark) {
- add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
- }
+ populate_subpat_array(
+ subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
break;
}
}
@@ -1394,8 +1409,7 @@ error:
if (subpat_names) {
for (i = 0; i < num_subpats; i++) {
if (subpat_names[i]) {
- zend_hash_str_update(Z_ARRVAL_P(subpats), subpat_names[i],
- strlen(subpat_names[i]), &match_sets[i]);
+ zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
Z_ADDREF(match_sets[i]);
}
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
@@ -1413,11 +1427,15 @@ error:
}
if (subpat_names) {
- efree(subpat_names);
+ free_subpats_table(subpat_names, num_subpats);
}
- /* Did we encounter an error? */
if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
+ /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
+ if ((pce->compile_options & PCRE2_UTF) && !ZSTR_IS_INTERNED(subject_str)) {
+ GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
+ }
+
RETVAL_LONG(matched);
} else {
RETVAL_FALSE;
@@ -1482,29 +1500,14 @@ static int preg_get_backref(char **str, int *backref)
/* {{{ preg_do_repl_func
*/
-static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, char **subpat_names, int count, const PCRE2_SPTR mark)
+static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
{
zend_string *result_str;
zval retval; /* Function return value */
zval arg; /* Argument to pass to function */
- int i;
array_init_size(&arg, count + (mark ? 1 : 0));
- if (subpat_names) {
- for (i = 0; i < count; i++) {
- if (subpat_names[i]) {
- add_assoc_stringl(&arg, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1]);
- }
- add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
- }
- } else {
- for (i = 0; i < count; i++) {
- add_next_index_stringl(&arg, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1]);
- }
- }
- if (mark) {
- add_assoc_string(&arg, "MARK", (char *) mark);
- }
+ populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
fci->retval = &retval;
fci->param_count = 1;
@@ -1543,6 +1546,11 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
pcre_cache_entry *pce; /* Compiled regular expression */
zend_string *result; /* Function result */
+ /* Abort on pending exception, e.g. thrown from __toString(). */
+ if (UNEXPECTED(EG(exception))) {
+ return NULL;
+ }
+
/* Compile regex or get it from cache. */
if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
return NULL;
@@ -1800,12 +1808,12 @@ error:
/* }}} */
/* {{{ php_pcre_replace_func_impl() */
-static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count)
+static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
{
uint32_t options; /* Execution options */
int count; /* Count of matched subpatterns */
PCRE2_SIZE *offsets; /* Array of subpattern offsets */
- char **subpat_names; /* Array for named subpatterns */
+ zend_string **subpat_names; /* Array for named subpatterns */
uint32_t num_subpats; /* Number of captured subpatterns */
size_t new_len; /* Length of needed storage */
size_t alloc_len; /* Actual allocated length */
@@ -1851,7 +1859,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
if (!match_data) {
PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
if (subpat_names) {
- efree(subpat_names);
+ free_subpats_table(subpat_names, num_subpats);
}
mdata_used = old_mdata_used;
return NULL;
@@ -1902,8 +1910,9 @@ matched:
new_len = result_len + offsets[0] - start_offset; /* part before the match */
/* Use custom function to get replacement string and its length. */
- eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count,
- pcre2_get_mark(match_data));
+ eval_result = preg_do_repl_func(
+ fci, fcc, subject, offsets, subpat_names, num_subpats, count,
+ pcre2_get_mark(match_data), flags);
ZEND_ASSERT(eval_result);
new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
@@ -2007,7 +2016,7 @@ error:
mdata_used = old_mdata_used;
if (UNEXPECTED(subpat_names)) {
- efree(subpat_names);
+ free_subpats_table(subpat_names, num_subpats);
}
return result;
@@ -2019,7 +2028,7 @@ error:
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
zend_string *subject_str,
zend_fcall_info *fci, zend_fcall_info_cache *fcc,
- size_t limit, size_t *replace_count)
+ size_t limit, size_t *replace_count, zend_long flags)
{
pcre_cache_entry *pce; /* Compiled regular expression */
zend_string *result; /* Function result */
@@ -2029,8 +2038,9 @@ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
return NULL;
}
pce->refcount++;
- result = php_pcre_replace_func_impl(pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
- limit, replace_count);
+ result = php_pcre_replace_func_impl(
+ pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
+ limit, replace_count, flags);
pce->refcount--;
return result;
@@ -2150,17 +2160,14 @@ static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval
/* {{{ php_replace_in_subject_func
*/
-static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count)
+static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags)
{
zend_string *result;
zend_string *subject_str = zval_get_string(subject);
if (Z_TYPE_P(regex) != IS_ARRAY) {
- result = php_pcre_replace_func(Z_STR_P(regex),
- subject_str,
- fci, fcc,
- limit,
- replace_count);
+ result = php_pcre_replace_func(
+ Z_STR_P(regex), subject_str, fci, fcc, limit, replace_count, flags);
zend_string_release_ex(subject_str, 0);
return result;
} else {
@@ -2176,11 +2183,8 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc
/* Do the actual replacement and put the result back into subject_str
for further replacements. */
- result = php_pcre_replace_func(regex_str,
- subject_str,
- fci, fcc,
- limit,
- replace_count);
+ result = php_pcre_replace_func(
+ regex_str, subject_str, fci, fcc, limit, replace_count, flags);
zend_tmp_string_release(tmp_regex_str);
zend_string_release_ex(subject_str, 0);
subject_str = result;
@@ -2196,7 +2200,7 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc
/* {{{ preg_replace_func_impl
*/
-static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
+static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags)
{
zend_string *result;
size_t replace_count = 0;
@@ -2206,7 +2210,8 @@ static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall
}
if (Z_TYPE_P(subject) != IS_ARRAY) {
- result = php_replace_in_subject_func(regex, fci, fcc, subject, limit_val, &replace_count);
+ result = php_replace_in_subject_func(
+ regex, fci, fcc, subject, limit_val, &replace_count, flags);
if (result != NULL) {
RETVAL_STR(result);
} else {
@@ -2223,7 +2228,8 @@ static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall
/* For each subject entry, convert it to string, then perform replacement
and add the result to the return_value array. */
ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
- result = php_replace_in_subject_func(regex, fci, fcc, subject_entry, limit_val, &replace_count);
+ result = php_replace_in_subject_func(
+ regex, fci, fcc, subject_entry, limit_val, &replace_count, flags);
if (result != NULL) {
/* Add to return array */
ZVAL_STR(&zv, result);
@@ -2257,7 +2263,7 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
Z_PARAM_ZVAL(subject)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(limit)
- Z_PARAM_ZVAL_DEREF(zcount)
+ Z_PARAM_ZVAL(zcount)
ZEND_PARSE_PARAMETERS_END();
if (Z_TYPE_P(replace) != IS_ARRAY) {
@@ -2323,8 +2329,7 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
}
if (zcount) {
- zval_ptr_dtor(zcount);
- ZVAL_LONG(zcount, replace_count);
+ ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
}
}
/* }}} */
@@ -2342,19 +2347,20 @@ static PHP_FUNCTION(preg_replace)
static PHP_FUNCTION(preg_replace_callback)
{
zval *regex, *replace, *subject, *zcount = NULL;
- zend_long limit = -1;
+ zend_long limit = -1, flags = 0;
size_t replace_count;
zend_fcall_info fci;
zend_fcall_info_cache fcc;
/* Get function parameters and do error-checking. */
- ZEND_PARSE_PARAMETERS_START(3, 5)
+ ZEND_PARSE_PARAMETERS_START(3, 6)
Z_PARAM_ZVAL(regex)
Z_PARAM_ZVAL(replace)
Z_PARAM_ZVAL(subject)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(limit)
- Z_PARAM_ZVAL_DEREF(zcount)
+ Z_PARAM_ZVAL(zcount)
+ Z_PARAM_LONG(flags)
ZEND_PARSE_PARAMETERS_END();
if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
@@ -2369,10 +2375,9 @@ static PHP_FUNCTION(preg_replace_callback)
fci.object = NULL;
ZVAL_COPY_VALUE(&fci.function_name, replace);
- replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit);
+ replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit, flags);
if (zcount) {
- zval_ptr_dtor(zcount);
- ZVAL_LONG(zcount, replace_count);
+ ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
}
}
/* }}} */
@@ -2382,19 +2387,20 @@ static PHP_FUNCTION(preg_replace_callback)
static PHP_FUNCTION(preg_replace_callback_array)
{
zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
- zend_long limit = -1;
+ zend_long limit = -1, flags = 0;
zend_string *str_idx;
size_t replace_count = 0;
zend_fcall_info fci;
zend_fcall_info_cache fcc;
/* Get function parameters and do error-checking. */
- ZEND_PARSE_PARAMETERS_START(2, 4)
+ ZEND_PARSE_PARAMETERS_START(2, 5)
Z_PARAM_ARRAY(pattern)
Z_PARAM_ZVAL(subject)
Z_PARAM_OPTIONAL
Z_PARAM_LONG(limit)
- Z_PARAM_ZVAL_DEREF(zcount)
+ Z_PARAM_ZVAL(zcount)
+ Z_PARAM_LONG(flags)
ZEND_PARSE_PARAMETERS_END();
fci.size = sizeof(fci);
@@ -2421,7 +2427,7 @@ static PHP_FUNCTION(preg_replace_callback_array)
ZVAL_COPY_VALUE(&fci.function_name, replace);
- replace_count += preg_replace_func_impl(&zv, &regex, &fci, &fcc, subject, limit);
+ replace_count += preg_replace_func_impl(&zv, &regex, &fci, &fcc, subject, limit, flags);
if (subject != return_value) {
subject = return_value;
} else {
@@ -2439,8 +2445,7 @@ static PHP_FUNCTION(preg_replace_callback_array)
} ZEND_HASH_FOREACH_END();
if (zcount) {
- zval_ptr_dtor(zcount);
- ZVAL_LONG(zcount, replace_count);
+ ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
}
}
/* }}} */
@@ -2492,14 +2497,14 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
uint32_t options; /* Execution options */
int count; /* Count of matched subpatterns */
PCRE2_SIZE start_offset; /* Where the new search starts */
- PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
- char *last_match; /* Location of last match */
+ PCRE2_SIZE last_match_offset; /* Location of last match */
uint32_t no_empty; /* If NO_EMPTY flag is set */
uint32_t delim_capture; /* If delimiters should be captured */
uint32_t offset_capture; /* If offsets should be captured */
uint32_t num_subpats; /* Number of captured subpatterns */
zval tmp;
pcre2_match_data *match_data;
+ char *subject = ZSTR_VAL(subject_str);
no_empty = flags & PREG_SPLIT_NO_EMPTY;
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
@@ -2513,11 +2518,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
/* Start at the beginning of the string */
start_offset = 0;
- next_offset = 0;
- last_match = ZSTR_VAL(subject_str);
+ last_match_offset = 0;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
-
if (limit_val == -1) {
/* pass */
} else if (limit_val == 0) {
@@ -2541,11 +2544,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
#ifdef HAVE_PCRE_JIT_SUPPORT
if ((pce->preg_options & PREG_JIT) && options) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
options, match_data, mctx);
while (1) {
@@ -2565,14 +2568,15 @@ matched:
break;
}
- if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
-
+ if (!no_empty || offsets[0] != last_match_offset) {
if (offset_capture) {
/* Add (match, offset) pair to the return value */
- add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
+ add_offset_pair(
+ return_value, subject, last_match_offset, offsets[0],
+ NULL, 0);
} else {
/* Add the piece to the return value */
- ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
+ populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}
@@ -2581,19 +2585,16 @@ matched:
limit_val--;
}
- last_match = &ZSTR_VAL(subject_str)[offsets[1]];
- next_offset = offsets[1];
-
if (delim_capture) {
- size_t i, match_len;
+ size_t i;
for (i = 1; i < count; i++) {
- match_len = offsets[(i<<1)+1] - offsets[i<<1];
/* If we have matched a delimiter */
- if (!no_empty || match_len > 0) {
+ if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
if (offset_capture) {
- add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
+ add_offset_pair(
+ return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
} else {
- ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
+ populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}
}
@@ -2601,14 +2602,14 @@ matched:
}
/* Advance to the position right after the last full match */
- start_offset = offsets[1];
+ start_offset = last_match_offset = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
if (start_offset == offsets[0]) {
- count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
if (count >= 0) {
goto matched;
@@ -2618,7 +2619,7 @@ matched:
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (start_offset < ZSTR_LEN(subject_str)) {
- start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
+ start_offset += calculate_unit_length(pce, subject + start_offset);
} else {
break;
}
@@ -2642,11 +2643,11 @@ error:
#ifdef HAVE_PCRE_JIT_SUPPORT
if (pce->preg_options & PREG_JIT) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
}
if (match_data != mdata) {
@@ -2659,18 +2660,18 @@ error:
}
last:
- start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
+ start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
if (offset_capture) {
/* Add the last (match, offset) pair to the return value */
- add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
+ add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
} else {
/* Add the last piece to the return value */
- if (last_match == ZSTR_VAL(subject_str)) {
+ if (start_offset == 0) {
ZVAL_STR_COPY(&tmp, subject_str);
} else {
- ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
+ populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
}
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}
@@ -2936,8 +2937,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
Returns the error code of the last regexp execution. */
static PHP_FUNCTION(preg_last_error)
{
- ZEND_PARSE_PARAMETERS_START(0, 0)
- ZEND_PARSE_PARAMETERS_END();
+ ZEND_PARSE_PARAMETERS_NONE();
RETURN_LONG(PCRE_G(error_code));
}
@@ -2976,6 +2976,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
ZEND_ARG_INFO(0, subject)
ZEND_ARG_INFO(0, limit)
ZEND_ARG_INFO(1, count)
+ ZEND_ARG_INFO(0, flags)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
@@ -2983,6 +2984,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
ZEND_ARG_INFO(0, subject)
ZEND_ARG_INFO(0, limit)
ZEND_ARG_INFO(1, count)
+ ZEND_ARG_INFO(0, flags)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
@@ -3027,12 +3029,8 @@ zend_module_entry pcre_module_entry = {
pcre_functions,
PHP_MINIT(pcre),
PHP_MSHUTDOWN(pcre),
-#ifdef HAVE_PCRE_JIT_SUPPORT
PHP_RINIT(pcre),
-#else
- NULL,
-#endif
- NULL,
+ PHP_RSHUTDOWN(pcre),
PHP_MINFO(pcre),
PHP_PCRE_VERSION,
PHP_MODULE_GLOBALS(pcre),
@@ -3081,14 +3079,3 @@ PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
assert(NULL != pce);
return pce->re;
}/*}}}*/
-
-#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
-
-/*
- * Local variables:
- * tab-width: 4
- * c-basic-offset: 4
- * End:
- * vim600: sw=4 ts=4 fdm=marker
- * vim<600: sw=4 ts=4
- */