summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c1698
1 files changed, 998 insertions, 700 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index f919faa298..e82dc252b2 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -16,8 +16,6 @@
+----------------------------------------------------------------------+
*/
-/* $Id$ */
-
#include "php.h"
#include "php_ini.h"
#include "php_globals.h"
@@ -43,12 +41,19 @@
#define PREG_GREP_INVERT (1<<0)
+#define PREG_JIT (1<<3)
+
#define PCRE_CACHE_SIZE 4096
-/* not fully functional workaround for libpcre < 8.0, see bug #70232 */
-#ifndef PCRE_NOTEMPTY_ATSTART
-# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
-#endif
+struct _pcre_cache_entry {
+ pcre2_code *re;
+ uint32_t preg_options;
+ uint32_t capture_count;
+ uint32_t name_count;
+ uint32_t compile_options;
+ uint32_t extra_compile_options;
+ uint32_t refcount;
+};
enum {
PHP_PCRE_NO_ERROR = 0,
@@ -65,10 +70,19 @@ PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
#ifdef HAVE_PCRE_JIT_SUPPORT
#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
-#define PCRE_JIT_STACK_MAX_SIZE (64 * 1024)
-ZEND_TLS pcre_jit_stack *jit_stack = NULL;
+#define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
+ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
#endif
-#if defined(ZTS)
+ZEND_TLS pcre2_general_context *gctx = NULL;
+/* These two are global per thread for now. Though it is possible to use these
+ per pattern. Either one can copy it and use in pce, or one does no global
+ contexts at all, but creates for every pce. */
+ZEND_TLS pcre2_compile_context *cctx = NULL;
+ZEND_TLS pcre2_match_context *mctx = NULL;
+ZEND_TLS pcre2_match_data *mdata = NULL;
+ZEND_TLS zend_bool mdata_used = 0;
+ZEND_TLS uint8_t pcre2_init_ok = 0;
+#if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
static MUTEX_T pcre_mt = NULL;
#define php_pcre_mutex_alloc() if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
#define php_pcre_mutex_free() if (tsrm_is_main_thread() && pcre_mt) tsrm_mutex_free(pcre_mt); pcre_mt = NULL;
@@ -81,35 +95,45 @@ static MUTEX_T pcre_mt = NULL;
#define php_pcre_mutex_unlock()
#endif
+#if HAVE_SETLOCALE
+ZEND_TLS HashTable char_tables;
+
+static void php_pcre_free_char_table(zval *data)
+{/*{{{*/
+ void *ptr = Z_PTR_P(data);
+ pefree(ptr, 1);
+}/*}}}*/
+#endif
+
static void pcre_handle_exec_error(int pcre_code) /* {{{ */
{
int preg_code = 0;
switch (pcre_code) {
- case PCRE_ERROR_MATCHLIMIT:
+ case PCRE2_ERROR_MATCHLIMIT:
preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
break;
- case PCRE_ERROR_RECURSIONLIMIT:
+ case PCRE2_ERROR_RECURSIONLIMIT:
preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
break;
- case PCRE_ERROR_BADUTF8:
- preg_code = PHP_PCRE_BAD_UTF8_ERROR;
- break;
-
- case PCRE_ERROR_BADUTF8_OFFSET:
+ case PCRE2_ERROR_BADUTFOFFSET:
preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
break;
#ifdef HAVE_PCRE_JIT_SUPPORT
- case PCRE_ERROR_JIT_STACKLIMIT:
+ case PCRE2_ERROR_JIT_STACKLIMIT:
preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
break;
#endif
default:
- preg_code = PHP_PCRE_INTERNAL_ERROR;
+ if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
+ preg_code = PHP_PCRE_BAD_UTF8_ERROR;
+ } else {
+ preg_code = PHP_PCRE_INTERNAL_ERROR;
+ }
break;
}
@@ -121,23 +145,128 @@ static void php_free_pcre_cache(zval *data) /* {{{ */
{
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
if (!pce) return;
- pcre_free(pce->re);
- if (pce->extra) {
- pcre_free_study(pce->extra);
- }
-#if HAVE_SETLOCALE
- if ((void*)pce->tables) pefree((void*)pce->tables, 1);
-#endif
+ pcre2_code_free(pce->re);
pefree(pce, 1);
}
/* }}} */
+static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
+{/*{{{*/
+ void *p = pemalloc(size, 1);
+ return p;
+}/*}}}*/
+
+static void php_pcre_free(void *block, void *data)
+{/*{{{*/
+ pefree(block, 1);
+}/*}}}*/
+
+#define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
+#define PHP_PCRE_PREALLOC_MDATA_SIZE 32
+
+static void php_pcre_init_pcre2(uint8_t jit)
+{/*{{{*/
+ if (!gctx) {
+ gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
+ if (!gctx) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+
+ if (!cctx) {
+ cctx = pcre2_compile_context_create(gctx);
+ if (!cctx) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+
+ /* XXX The 'X' modifier is the default behavior in PCRE2. This option is
+ called dangerous in the manual, as typos in patterns can cause
+ unexpected results. We might want to to switch to the default PCRE2
+ behavior, too, thus causing a certain BC break. */
+ pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
+
+ if (!mctx) {
+ mctx = pcre2_match_context_create(gctx);
+ if (!mctx) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ if (jit && !jit_stack) {
+ jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
+ if (!jit_stack) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+#endif
+
+ if (!mdata) {
+ mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
+ if (!mdata) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+
+ pcre2_init_ok = 1;
+}/*}}}*/
+
+static void php_pcre_shutdown_pcre2(void)
+{/*{{{*/
+ if (gctx) {
+ pcre2_general_context_free(gctx);
+ gctx = NULL;
+ }
+
+ if (cctx) {
+ pcre2_compile_context_free(cctx);
+ cctx = NULL;
+ }
+
+ if (mctx) {
+ pcre2_match_context_free(mctx);
+ mctx = NULL;
+ }
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ /* Stack may only be destroyed when no cached patterns
+ possibly associated with it do exist. */
+ if (jit_stack) {
+ pcre2_jit_stack_free(jit_stack);
+ jit_stack = NULL;
+ }
+#endif
+
+ if (mdata) {
+ pcre2_match_data_free(mdata);
+ mdata = NULL;
+ }
+
+ pcre2_init_ok = 0;
+}/*}}}*/
+
static PHP_GINIT_FUNCTION(pcre) /* {{{ */
{
+ php_pcre_mutex_alloc();
+
zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
pcre_globals->backtrack_limit = 0;
pcre_globals->recursion_limit = 0;
pcre_globals->error_code = PHP_PCRE_NO_ERROR;
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ pcre_globals->jit = 1;
+#endif
+
+ php_pcre_init_pcre2(1);
+#if HAVE_SETLOCALE
+ zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
+#endif
}
/* }}} */
@@ -145,44 +274,98 @@ static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
{
zend_hash_destroy(&pcre_globals->pcre_cache);
-#ifdef HAVE_PCRE_JIT_SUPPORT
- /* Stack may only be destroyed when no cached patterns
- possibly associated with it do exist. */
- if (jit_stack) {
- pcre_jit_stack_free(jit_stack);
- jit_stack = NULL;
- }
+ php_pcre_shutdown_pcre2();
+#if HAVE_SETLOCALE
+ zend_hash_destroy(&char_tables);
#endif
+ php_pcre_mutex_free();
}
/* }}} */
+static PHP_INI_MH(OnUpdateBacktrackLimit)
+{/*{{{*/
+ OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
+ if (mctx) {
+ pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
+ }
+
+ return SUCCESS;
+}/*}}}*/
+
+static PHP_INI_MH(OnUpdateRecursionLimit)
+{/*{{{*/
+ OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
+ if (mctx) {
+ pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
+ }
+
+ return SUCCESS;
+}/*}}}*/
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+static PHP_INI_MH(OnUpdateJit)
+{/*{{{*/
+ OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
+ if (PCRE_G(jit) && jit_stack) {
+ pcre2_jit_stack_assign(mctx, NULL, jit_stack);
+ } else {
+ pcre2_jit_stack_assign(mctx, NULL, NULL);
+ }
+
+ return SUCCESS;
+}/*}}}*/
+#endif
+
PHP_INI_BEGIN()
- STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
- STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
+ STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
+ STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
#ifdef HAVE_PCRE_JIT_SUPPORT
- STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateBool, jit, zend_pcre_globals, pcre_globals)
+ STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
#endif
PHP_INI_END()
+static char *_pcre2_config_str(uint32_t what)
+{/*{{{*/
+ int len = pcre2_config(what, NULL);
+ char *ret = (char *) malloc(len + 1);
+
+ len = pcre2_config(what, ret);
+ if (!len) {
+ free(ret);
+ return NULL;
+ }
+
+ return ret;
+}/*}}}*/
/* {{{ PHP_MINFO_FUNCTION(pcre) */
static PHP_MINFO_FUNCTION(pcre)
{
#ifdef HAVE_PCRE_JIT_SUPPORT
- int jit_yes = 0;
+ uint32_t flag = 0;
+ char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
#endif
+ char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
+ char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
php_info_print_table_start();
php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
- php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
+ php_info_print_table_row(2, "PCRE Library Version", version);
+ free(version);
+ php_info_print_table_row(2, "PCRE Unicode Version", unicode);
+ free(unicode);
#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
- php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
+ if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
+ php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
} else {
php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
}
+ if (jit_target) {
+ php_info_print_table_row(2, "PCRE JIT Target", jit_target);
+ }
+ free(jit_target);
#else
php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
#endif
@@ -200,9 +383,19 @@ static PHP_MINFO_FUNCTION(pcre)
/* {{{ PHP_MINIT_FUNCTION(pcre) */
static PHP_MINIT_FUNCTION(pcre)
{
- REGISTER_INI_ENTRIES();
+ char *version;
- php_pcre_mutex_alloc();
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ if (UNEXPECTED(!pcre2_init_ok)) {
+ /* Retry. */
+ php_pcre_init_pcre2(PCRE_G(jit));
+ if (!pcre2_init_ok) {
+ return FAILURE;
+ }
+ }
+#endif
+
+ REGISTER_INI_ENTRIES();
REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
@@ -220,7 +413,17 @@ static PHP_MINIT_FUNCTION(pcre)
REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
+ version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
+ REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
+ free(version);
+ REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
+#else
+ REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
+#endif
return SUCCESS;
}
@@ -231,8 +434,6 @@ static PHP_MSHUTDOWN_FUNCTION(pcre)
{
UNREGISTER_INI_ENTRIES();
- php_pcre_mutex_free();
-
return SUCCESS;
}
/* }}} */
@@ -241,12 +442,19 @@ static PHP_MSHUTDOWN_FUNCTION(pcre)
/* {{{ PHP_RINIT_FUNCTION(pcre) */
static PHP_RINIT_FUNCTION(pcre)
{
- if (PCRE_G(jit) && jit_stack == NULL) {
+ if (UNEXPECTED(!pcre2_init_ok)) {
+ /* Retry. */
php_pcre_mutex_lock();
- jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE);
+ php_pcre_init_pcre2(PCRE_G(jit));
+ if (!pcre2_init_ok) {
+ php_pcre_mutex_unlock();
+ return FAILURE;
+ }
php_pcre_mutex_unlock();
}
+ mdata_used = 0;
+
return SUCCESS;
}
/* }}} */
@@ -268,21 +476,18 @@ static int pcre_clean_cache(zval *data, void *arg)
/* }}} */
/* {{{ static make_subpats_table */
-static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
+static char **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
{
- pcre_extra *extra = pce->extra;
- int name_cnt = pce->name_count, name_size, ni = 0;
- int rc;
+ uint32_t name_cnt = pce->name_count, name_size, ni = 0;
char *name_table;
unsigned short name_idx;
char **subpat_names;
int rc1, rc2;
- rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
- rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
- rc = rc2 ? rc2 : rc1;
- if (rc < 0) {
- php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
+ rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
+ rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
+ if (rc1 < 0 || rc2 < 0) {
+ php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc1 < 0 ? rc1 : rc2);
return NULL;
}
@@ -302,12 +507,12 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
/* }}} */
/* {{{ static calculate_unit_length */
-/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
-static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
+/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
+static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
{
- int unit_len;
+ size_t unit_len;
- if (pce->compile_options & PCRE_UTF8) {
+ if (pce->compile_options & PCRE2_UTF) {
char *end = start;
/* skip continuation bytes */
@@ -324,24 +529,27 @@ static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char
*/
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
{
- pcre *re = NULL;
- pcre_extra *extra;
- int coptions = 0;
- int soptions = 0;
- const char *error;
- int erroffset;
+ pcre2_code *re = NULL;
+ uint32_t coptions = 0;
+ uint32_t extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
+ PCRE2_UCHAR error[128];
+ PCRE2_SIZE erroffset;
+ int errnumber;
char delimiter;
char start_delimiter;
char end_delimiter;
char *p, *pp;
char *pattern;
- int do_study = 0;
- int poptions = 0;
- unsigned const char *tables = NULL;
- pcre_cache_entry *pce;
+ size_t pattern_len;
+ uint32_t poptions = 0;
+#if HAVE_SETLOCALE
+ const uint8_t *tables = NULL;
+#endif
+ zval *zv;
pcre_cache_entry new_entry;
int rc;
zend_string *key;
+ pcre_cache_entry *ret;
#if HAVE_SETLOCALE
if (locale_aware && BG(locale_string) &&
@@ -357,14 +565,14 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
/* Try to lookup the cached regex entry, and if successful, just pass
back the compiled pattern, otherwise go on and compile it. */
- pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), key);
- if (pce) {
+ zv = zend_hash_find(&PCRE_G(pcre_cache), key);
+ if (zv) {
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
- return pce;
+ return (pcre_cache_entry*)Z_PTR_P(zv);
}
p = ZSTR_VAL(regex);
@@ -375,12 +583,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
if (*p == 0) {
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
php_error_docref(NULL, E_WARNING,
p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
@@ -390,11 +598,11 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
@@ -435,7 +643,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
if (*pp == 0) {
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
@@ -445,12 +653,13 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
} else {
php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
}
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
/* Make a copy of the actual pattern. */
- pattern = estrndup(p, pp-p);
+ pattern_len = pp - p;
+ pattern = estrndup(p, pattern_len);
/* Move on to the options */
pp++;
@@ -460,26 +669,26 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
switch (*pp++) {
/* Perl compatible options */
- case 'i': coptions |= PCRE_CASELESS; break;
- case 'm': coptions |= PCRE_MULTILINE; break;
- case 's': coptions |= PCRE_DOTALL; break;
- case 'x': coptions |= PCRE_EXTENDED; break;
+ case 'i': coptions |= PCRE2_CASELESS; break;
+ case 'm': coptions |= PCRE2_MULTILINE; break;
+ case 's': coptions |= PCRE2_DOTALL; break;
+ case 'x': coptions |= PCRE2_EXTENDED; break;
/* PCRE specific options */
- case 'A': coptions |= PCRE_ANCHORED; break;
- case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
- case 'S': do_study = 1; break;
- case 'U': coptions |= PCRE_UNGREEDY; break;
- case 'X': coptions |= PCRE_EXTRA; break;
- case 'u': coptions |= PCRE_UTF8;
+ case 'A': coptions |= PCRE2_ANCHORED; break;
+ case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
+ case 'S': /* Pass. */ break;
+ case 'U': coptions |= PCRE2_UNGREEDY; break;
+ case 'X': extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL; break;
+ case 'u': coptions |= PCRE2_UTF;
/* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
characters, even in UTF-8 mode. However, this can be changed by setting
- the PCRE_UCP option. */
-#ifdef PCRE_UCP
- coptions |= PCRE_UCP;
+ the PCRE2_UCP option. */
+#ifdef PCRE2_UCP
+ coptions |= PCRE2_UCP;
#endif
break;
- case 'J': coptions |= PCRE_DUPNAMES; break;
+ case 'J': coptions |= PCRE2_DUPNAMES; break;
/* Custom preg options */
case 'e': poptions |= PREG_REPLACE_EVAL; break;
@@ -495,77 +704,98 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
} else {
php_error_docref(NULL,E_WARNING, "Null byte in regex");
}
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
efree(pattern);
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
return NULL;
}
}
+ if (poptions & PREG_REPLACE_EVAL) {
+ php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
+ efree(pattern);
+#if HAVE_SETLOCALE
+ if (key != regex) {
+ zend_string_release_ex(key, 0);
+ }
+#endif
+ return NULL;
+ }
+
#if HAVE_SETLOCALE
if (key != regex) {
- tables = pcre_maketables();
+ tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string));
+ if (!tables) {
+ zend_string *_k;
+ tables = pcre2_maketables(gctx);
+ if (UNEXPECTED(!tables)) {
+ php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
+ pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
+ zend_string_release_ex(key, 0);
+ efree(pattern);
+ return NULL;
+ }
+ _k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1);
+ zend_hash_add_ptr(&char_tables, _k, (void *)tables);
+ zend_string_release(_k);
+ }
+ pcre2_set_character_tables(cctx, tables);
}
#endif
+ /* Set extra options for the compile context. */
+ if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
+ pcre2_set_compile_extra_options(cctx, extra_coptions);
+ }
+
/* Compile pattern and display a warning if compilation failed. */
- re = pcre_compile(pattern,
- coptions,
- &error,
- &erroffset,
- tables);
+ re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
+
+ /* Reset the compile context extra options to default. */
+ if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
+ pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
+ }
if (re == NULL) {
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
- php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre2_get_error_message(errnumber, error, sizeof(error));
+ php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
efree(pattern);
- if (tables) {
- pefree((void*)tables, 1);
- }
return NULL;
}
#ifdef HAVE_PCRE_JIT_SUPPORT
if (PCRE_G(jit)) {
/* Enable PCRE JIT compiler */
- do_study = 1;
- soptions |= PCRE_STUDY_JIT_COMPILE;
- }
-#endif
-
- /* If study option was specified, study the pattern and
- store the result in extra for passing to pcre_exec. */
- if (do_study) {
- php_pcre_mutex_lock();
- extra = pcre_study(re, soptions, &error);
- php_pcre_mutex_unlock();
- if (extra) {
- extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-#ifdef HAVE_PCRE_JIT_SUPPORT
- if (PCRE_G(jit) && jit_stack) {
- pcre_assign_jit_stack(extra, NULL, jit_stack);
+ rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
+ if (EXPECTED(rc >= 0)) {
+ size_t jit_size = 0;
+ if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
+ poptions |= PREG_JIT;
}
-#endif
- }
- if (error != NULL) {
- php_error_docref(NULL, E_WARNING, "Error while studying pattern");
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ } else if (rc == PCRE2_ERROR_NOMEMORY) {
+ php_error_docref(NULL, E_WARNING,
+ "Allocation of JIT memory failed, PCRE JIT will be disabled. "
+ "This is likely caused by security restrictions. "
+ "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
+ PCRE_G(jit) = 0;
+ } else {
+ pcre2_get_error_message(rc, error, sizeof(error));
+ php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
}
- } else {
- extra = NULL;
}
-
+#endif
efree(pattern);
/*
@@ -573,42 +803,39 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
* these are supposedly the oldest ones (but not necessarily the least used
* ones).
*/
- if (!pce && zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
+ if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
int num_clean = PCRE_CACHE_SIZE / 8;
zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
}
/* Store the compiled pattern and extra info in the cache. */
new_entry.re = re;
- new_entry.extra = extra;
new_entry.preg_options = poptions;
new_entry.compile_options = coptions;
-#if HAVE_SETLOCALE
- new_entry.tables = tables;
-#endif
+ new_entry.extra_compile_options = extra_coptions;
new_entry.refcount = 0;
- rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
+ rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
if (rc < 0) {
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
- php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
- rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
+ rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
if (rc < 0) {
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
- php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
@@ -620,19 +847,23 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, in
* as hash keys especually for this table.
* See bug #63180
*/
- if (!ZSTR_IS_INTERNED(key) || !(GC_FLAGS(key) & IS_STR_PERMANENT)) {
- pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache),
- ZSTR_VAL(key), ZSTR_LEN(key), &new_entry, sizeof(pcre_cache_entry));
+ if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
+ zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
+
+ GC_MAKE_PERSISTENT_LOCAL(str);
+
#if HAVE_SETLOCALE
if (key != regex) {
- zend_string_release(key);
+ zend_string_release_ex(key, 0);
}
#endif
+ ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
+ zend_string_release(str);
} else {
- pce = zend_hash_update_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
+ ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
}
- return pce;
+ return ret;
}
/* }}} */
@@ -646,15 +877,15 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
/* {{{ pcre_get_compiled_regex
*/
-PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
+PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options)
{
pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
- if (extra) {
- *extra = pce ? pce->extra : NULL;
- }
if (preg_options) {
- *preg_options = pce ? pce->preg_options : 0;
+ *preg_options = 0;
+ }
+ if (capture_count) {
+ *capture_count = pce ? pce->capture_count : 0;
}
return pce ? pce->re : NULL;
@@ -663,33 +894,66 @@ PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int
/* {{{ pcre_get_compiled_regex_ex
*/
-PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
+PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
{
pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
- if (extra) {
- *extra = pce ? pce->extra : NULL;
- }
if (preg_options) {
- *preg_options = pce ? pce->preg_options : 0;
+ *preg_options = 0;
}
if (compile_options) {
*compile_options = pce ? pce->compile_options : 0;
}
+ if (capture_count) {
+ *capture_count = pce ? pce->capture_count : 0;
+ }
return pce ? pce->re : NULL;
}
/* }}} */
+/* XXX For the cases where it's only about match yes/no and no capture
+ required, perhaps just a minimum sized data would suffice. */
+PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
+{/*{{{*/
+
+ assert(NULL != re);
+
+ if (EXPECTED(!mdata_used)) {
+ int rc = 0;
+
+ if (!capture_count) {
+ /* As we deal with a non cached pattern, no other way to gather this info. */
+ rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
+ }
+
+ if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ mdata_used = 1;
+ return mdata;
+ }
+ }
+
+ return pcre2_match_data_create_from_pattern(re, gctx);
+}/*}}}*/
+
+PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
+{/*{{{*/
+ if (UNEXPECTED(match_data != mdata)) {
+ pcre2_match_data_free(match_data);
+ } else {
+ mdata_used = 0;
+ }
+}/*}}}*/
+
/* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
+static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, char *name, uint32_t unmatched_as_null)
{
zval match_pair, tmp;
array_init_size(&match_pair, 2);
/* Add (match, offset) to the return value */
- if (offset < 0) {
+ if (PCRE2_UNSET == offset) {
if (unmatched_as_null) {
ZVAL_NULL(&tmp);
} else {
@@ -729,49 +993,40 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ *
Z_PARAM_LONG(start_offset)
ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
- if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
- php_error_docref(NULL, E_WARNING, "Subject is too long");
- RETURN_FALSE;
- }
-
/* Compile regex or get it from cache. */
if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
RETURN_FALSE;
}
pce->refcount++;
- php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
+ php_pcre_match_impl(pce, ZSTR_VAL(subject), ZSTR_LEN(subject), return_value, subpats,
global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
pce->refcount--;
}
/* }}} */
/* {{{ php_pcre_match_impl() */
-PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
- zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
+PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t subject_len, zval *return_value,
+ zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
{
zval result_set, /* Holds a set of subpatterns after
a global match */
*match_sets = NULL; /* An array of sets of matches for each
subpattern after a global match */
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int no_utf_check = 0; /* Execution options */
- int count = 0; /* Count of matched subpatterns */
- int *offsets; /* Array of subpattern offsets */
- int num_subpats; /* Number of captured subpatterns */
- int size_offsets; /* Size of the offsets array */
+ uint32_t options; /* Execution options */
+ int count; /* Count of matched subpatterns */
+ PCRE2_SIZE *offsets; /* Array of subpattern offsets */
+ uint32_t num_subpats; /* Number of captured subpatterns */
int matched; /* Has anything matched */
- int g_notempty = 0; /* If the match should not be empty */
char **subpat_names; /* Array for named subpatterns */
- int i;
- int subpats_order; /* Order of subpattern matches */
- int offset_capture; /* Capture match offsets: yes/no */
- int unmatched_as_null; /* Null non-matches: yes/no */
- unsigned char *mark = NULL; /* Target for MARK name */
+ size_t i;
+ uint32_t subpats_order; /* Order of subpattern matches */
+ uint32_t offset_capture; /* Capture match offsets: yes/no */
+ uint32_t unmatched_as_null; /* Null non-matches: yes/no */
+ PCRE2_SPTR mark = NULL; /* Target for MARK name */
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
-
- ALLOCA_FLAG(use_heap);
+ pcre2_match_data *match_data;
+ PCRE2_SIZE start_offset2;
ZVAL_UNDEF(&marks);
@@ -806,26 +1061,22 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
/* Negative offset counts from the end of the string. */
if (start_offset < 0) {
- start_offset = subject_len + start_offset;
- if (start_offset < 0) {
- start_offset = 0;
+ if ((PCRE2_SIZE)-start_offset <= subject_len) {
+ start_offset2 = subject_len + start_offset;
+ } else {
+ start_offset2 = 0;
}
+ } else {
+ start_offset2 = (PCRE2_SIZE)start_offset;
}
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
+ if (start_offset2 > subject_len) {
+ pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
+ RETURN_FALSE;
}
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-#ifdef PCRE_EXTRA_MARK
- extra->mark = &mark;
- extra->flags |= PCRE_EXTRA_MARK;
-#endif
/* Calculate the size of the offsets array, and allocate memory for it. */
num_subpats = pce->capture_count + 1;
- size_offsets = num_subpats * 3;
/*
* Build a mapping from subpattern numbers to their names. We will
@@ -839,12 +1090,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
- memset(offsets, 0, size_offsets*sizeof(int));
/* Allocate match sets array and initialize the values. */
if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
@@ -856,53 +1101,55 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
matched = 0;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
-#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!(pce->compile_options & PCRE_UTF8)) {
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ if (subpat_names) {
+ efree(subpat_names);
+ }
+ if (match_sets) {
+ efree(match_sets);
+ }
+ RETURN_FALSE;
+ }
}
-#endif
- do {
- /* Execute the regular expression. */
+ options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
+
+ /* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
- && no_utf_check && !g_notempty) {
- if (start_offset < 0 || start_offset > subject_len) {
- pcre_handle_exec_error(PCRE_ERROR_BADOFFSET);
- break;
- }
- count = pcre_jit_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
- no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
- } else
+ if ((pce->preg_options & PREG_JIT) && options) {
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ } else
#endif
- count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
- no_utf_check|g_notempty, offsets, size_offsets);
-
- /* the string was already proved to be valid UTF-8 */
- no_utf_check = PCRE_NO_UTF8_CHECK;
-
- /* Check for too many substrings condition. */
- if (count == 0) {
- php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
- }
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
+ options, match_data, mctx);
+ while (1) {
/* If something has matched */
- if (count > 0) {
+ if (count >= 0) {
+ /* Check for too many substrings condition. */
+ if (UNEXPECTED(count == 0)) {
+ php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
+ count = num_subpats;
+ }
+
+matched:
matched++;
+ offsets = pcre2_get_ovector_pointer(match_data);
+
/* If subpatterns array has been passed, fill it in with values. */
if (subpats != NULL) {
/* Try to get the list of substrings and display a warning if failed. */
- if (offsets[1] - offsets[0] < 0) {
+ if (offsets[1] < offsets[0]) {
if (subpat_names) {
efree(subpat_names);
}
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
if (match_sets) efree(match_sets);
php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
RETURN_FALSE;
@@ -918,7 +1165,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(&match_sets[i]);
} else {
@@ -930,6 +1177,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
}
+ mark = pcre2_get_mark(match_data);
/* Add MARK, if available */
if (mark) {
if (Z_TYPE(marks) == IS_UNDEF) {
@@ -965,7 +1213,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_assoc_null(&result_set, subpat_names[i]);
} else {
@@ -976,7 +1224,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(&result_set);
} else {
@@ -996,7 +1244,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(&result_set);
} else {
@@ -1010,6 +1258,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
/* Add MARK, if available */
+ mark = pcre2_get_mark(match_data);
if (mark) {
add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
}
@@ -1028,7 +1277,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_assoc_null(subpats, subpat_names[i]);
} else {
@@ -1039,7 +1288,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(subpats);
} else {
@@ -1060,7 +1309,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(subpats);
} else {
@@ -1074,6 +1323,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
/* Add MARK, if available */
+ mark = pcre2_get_mark(match_data);
if (mark) {
add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
}
@@ -1082,31 +1332,62 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
/* Advance to the next piece. */
- start_offset = offsets[1];
+ start_offset2 = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
+ This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
-
- } else if (count == PCRE_ERROR_NOMATCH) {
- /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
- this is not necessarily the end. We need to advance
- the start offset, and continue. Fudge the offset values
- to achieve this, unless we're already at the end of the string. */
- if (g_notempty != 0 && start_offset < subject_len) {
- int unit_len = calculate_unit_length(pce, subject + start_offset);
-
- start_offset += unit_len;
- g_notempty = 0;
- } else
- break;
+ if (start_offset2 == offsets[0]) {
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
+ PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
+ if (count >= 0) {
+ goto matched;
+ } else if (count == PCRE2_ERROR_NOMATCH) {
+ /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
+ this is not necessarily the end. We need to advance
+ the start offset, and continue. Fudge the offset values
+ to achieve this, unless we're already at the end of the string. */
+ if (start_offset2 < subject_len) {
+ size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
+
+ start_offset2 += unit_len;
+ } else {
+ break;
+ }
+ } else {
+ goto error;
+ }
+ }
+ } else if (count == PCRE2_ERROR_NOMATCH) {
+ break;
} else {
+error:
pcre_handle_exec_error(count);
break;
}
- } while (global);
+
+ if (!global) {
+ break;
+ }
+
+ /* Execute the regular expression. */
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ if ((pce->preg_options & PREG_JIT)) {
+ if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
+ pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
+ break;
+ }
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ } else
+#endif
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ }
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
+ }
/* Add the match sets to the output array and clean up */
if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
@@ -1131,11 +1412,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
if (subpat_names) {
efree(subpat_names);
}
@@ -1206,7 +1482,7 @@ static int preg_get_backref(char **str, int *backref)
/* {{{ preg_do_repl_func
*/
-static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
+static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, char **subpat_names, int count, const PCRE2_SPTR mark)
{
zend_string *result_str;
zval retval; /* Function return value */
@@ -1236,8 +1512,12 @@ static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cach
fci->no_separation = 0;
if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
- result_str = zval_get_string(&retval);
- zval_ptr_dtor(&retval);
+ if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
+ result_str = Z_STR(retval);
+ } else {
+ result_str = zval_get_string_func(&retval);
+ zval_ptr_dtor(&retval);
+ }
} else {
if (!EG(exception)) {
php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
@@ -1256,9 +1536,9 @@ static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cach
*/
PHPAPI zend_string *php_pcre_replace(zend_string *regex,
zend_string *subject_str,
- char *subject, int subject_len,
+ char *subject, size_t subject_len,
zend_string *replace_str,
- int limit, int *replace_count)
+ size_t limit, size_t *replace_count)
{
pcre_cache_entry *pce; /* Compiled regular expression */
zend_string *result; /* Function result */
@@ -1277,22 +1557,17 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
/* }}} */
/* {{{ php_pcre_replace_impl() */
-PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_string *replace_str, int limit, int *replace_count)
+PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
{
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int no_utf_check = 0; /* Execution options */
- int count = 0; /* Count of matched subpatterns */
- int *offsets; /* Array of subpattern offsets */
- char **subpat_names; /* Array for named subpatterns */
- int num_subpats; /* Number of captured subpatterns */
- int size_offsets; /* Size of the offsets array */
+ uint32_t options; /* Execution options */
+ int count; /* Count of matched subpatterns */
+ PCRE2_SIZE *offsets; /* Array of subpattern offsets */
+ uint32_t num_subpats; /* Number of captured subpatterns */
size_t new_len; /* Length of needed storage */
size_t alloc_len; /* Actual allocated length */
- int match_len; /* Length of the current match */
+ size_t match_len; /* Length of the current match */
int backref; /* Backreference number */
- int start_offset; /* Where the new search starts */
- int g_notempty=0; /* If the match should not be empty */
+ PCRE2_SIZE start_offset; /* Where the new search starts */
char *walkbuf, /* Location of current replacement in the result */
*walk, /* Used to walk the replacement string */
*match, /* The current match */
@@ -1301,48 +1576,10 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
walk_last; /* Last walked character */
size_t result_len; /* Length of result */
zend_string *result; /* Result of replacement */
-
- ALLOCA_FLAG(use_heap);
-
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
-
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-
- if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
- php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
- return NULL;
- }
+ pcre2_match_data *match_data;
/* Calculate the size of the offsets array, and allocate memory for it. */
num_subpats = pce->capture_count + 1;
- size_offsets = num_subpats * 3;
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
-
- /*
- * Build a mapping from subpattern numbers to their names. We will
- * allocate the table only if there are any named subpatterns.
- */
- subpat_names = NULL;
- if (UNEXPECTED(pce->name_count > 0)) {
- subpat_names = make_subpats_table(num_subpats, pce);
- if (!subpat_names) {
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
- return NULL;
- }
- }
-
alloc_len = 0;
result = NULL;
@@ -1352,42 +1589,51 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
result_len = 0;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
-#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!(pce->compile_options & PCRE_UTF8)) {
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ return NULL;
+ }
}
-#endif
-#ifdef PCRE_EXTRA_MARK
- extra->flags &= ~PCRE_EXTRA_MARK;
-#endif
+ options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
- while (1) {
- /* Execute the regular expression. */
+ /* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
- && no_utf_check && !g_notempty) {
- count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset,
- no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
- } else
+ if ((pce->preg_options & PREG_JIT) && options) {
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ } else
#endif
- count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
- no_utf_check|g_notempty, offsets, size_offsets);
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ options, match_data, mctx);
- /* the string was already proved to be valid UTF-8 */
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ while (1) {
+ piece = subject + start_offset;
- /* Check for too many substrings condition. */
- if (UNEXPECTED(count == 0)) {
- php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = size_offsets / 3;
- }
+ if (count >= 0 && limit > 0) {
+ zend_bool simple_string;
- piece = subject + start_offset;
+ /* Check for too many substrings condition. */
+ if (UNEXPECTED(count == 0)) {
+ php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
+ count = num_subpats;
+ }
- /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
- if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) {
- zend_bool simple_string = 1;
+matched:
+ offsets = pcre2_get_ovector_pointer(match_data);
+
+ if (UNEXPECTED(offsets[1] < offsets[0])) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ if (result) {
+ zend_string_release_ex(result, 0);
+ result = NULL;
+ }
+ break;
+ }
if (replace_count) {
++*replace_count;
@@ -1401,7 +1647,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
walk = ZSTR_VAL(replace_str);
replace_end = walk + ZSTR_LEN(replace_str);
walk_last = 0;
-
+ simple_string = 1;
while (walk < replace_end) {
if ('\\' == *walk || '$' == *walk) {
simple_string = 0;
@@ -1470,69 +1716,83 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
}
- if (limit) {
- limit--;
- }
+ limit--;
/* Advance to the next piece. */
start_offset = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
+ This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
-
- } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
- /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
- this is not necessarily the end. We need to advance
- the start offset, and continue. Fudge the offset values
- to achieve this, unless we're already at the end of the string. */
- if (g_notempty != 0 && start_offset < subject_len) {
- int unit_len = calculate_unit_length(pce, piece);
-
- start_offset += unit_len;
- memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
- result_len += unit_len;
- g_notempty = 0;
- } else {
- if (!result && subject_str) {
- result = zend_string_copy(subject_str);
- break;
- }
- new_len = result_len + subject_len - start_offset;
- if (new_len >= alloc_len) {
- alloc_len = new_len; /* now we know exactly how long it is */
- if (NULL != result) {
- result = zend_string_realloc(result, alloc_len, 0);
+ if (start_offset == offsets[0]) {
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
+
+ piece = subject + start_offset;
+ if (count >= 0 && limit > 0) {
+ goto matched;
+ } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
+ /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
+ this is not necessarily the end. We need to advance
+ the start offset, and continue. Fudge the offset values
+ to achieve this, unless we're already at the end of the string. */
+ if (start_offset < subject_len) {
+ size_t unit_len = calculate_unit_length(pce, piece);
+
+ start_offset += unit_len;
+ memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
+ result_len += unit_len;
} else {
- result = zend_string_alloc(alloc_len, 0);
+ goto not_matched;
}
+ } else {
+ goto error;
}
- /* stick that last bit of string on our output */
- memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
- result_len += subject_len - start_offset;
- ZSTR_VAL(result)[result_len] = '\0';
- ZSTR_LEN(result) = result_len;
+ }
+
+ } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
+not_matched:
+ if (!result && subject_str) {
+ result = zend_string_copy(subject_str);
break;
}
+ new_len = result_len + subject_len - start_offset;
+ if (new_len >= alloc_len) {
+ alloc_len = new_len; /* now we know exactly how long it is */
+ if (NULL != result) {
+ result = zend_string_realloc(result, alloc_len, 0);
+ } else {
+ result = zend_string_alloc(alloc_len, 0);
+ }
+ }
+ /* stick that last bit of string on our output */
+ memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
+ result_len += subject_len - start_offset;
+ ZSTR_VAL(result)[result_len] = '\0';
+ ZSTR_LEN(result) = result_len;
+ break;
} else {
+error:
pcre_handle_exec_error(count);
if (result) {
- zend_string_release(result);
+ zend_string_release_ex(result, 0);
result = NULL;
}
break;
}
- }
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ if (pce->preg_options & PREG_JIT) {
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ } else
+#endif
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
}
- if (UNEXPECTED(subpat_names)) {
- efree(subpat_names);
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
}
return result;
@@ -1540,50 +1800,26 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
/* }}} */
/* {{{ php_pcre_replace_func_impl() */
-static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, int limit, int *replace_count)
+static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count)
{
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int no_utf_check = 0; /* Execution options */
- int count = 0; /* Count of matched subpatterns */
- int *offsets; /* Array of subpattern offsets */
+ uint32_t options; /* Execution options */
+ int count; /* Count of matched subpatterns */
+ PCRE2_SIZE *offsets; /* Array of subpattern offsets */
char **subpat_names; /* Array for named subpatterns */
- int num_subpats; /* Number of captured subpatterns */
- int size_offsets; /* Size of the offsets array */
+ uint32_t num_subpats; /* Number of captured subpatterns */
size_t new_len; /* Length of needed storage */
size_t alloc_len; /* Actual allocated length */
- int start_offset; /* Where the new search starts */
- int g_notempty=0; /* If the match should not be empty */
+ PCRE2_SIZE start_offset; /* Where the new search starts */
char *match, /* The current match */
*piece; /* The current piece of subject */
size_t result_len; /* Length of result */
- unsigned char *mark = NULL; /* Target for MARK name */
zend_string *result; /* Result of replacement */
- zend_string *eval_result=NULL; /* Result of custom function */
-
- ALLOCA_FLAG(use_heap);
-
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
-
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-
- if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
- php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
- return NULL;
- }
+ zend_string *eval_result; /* Result of custom function */
+ pcre2_match_data *match_data;
+ zend_bool old_mdata_used;
/* Calculate the size of the offsets array, and allocate memory for it. */
num_subpats = pce->capture_count + 1;
- size_offsets = num_subpats * 3;
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
/*
* Build a mapping from subpattern numbers to their names. We will
@@ -1593,11 +1829,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
if (UNEXPECTED(pce->name_count > 0)) {
subpat_names = make_subpats_table(num_subpats, pce);
if (!subpat_names) {
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
return NULL;
}
}
@@ -1611,42 +1842,56 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
result_len = 0;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
-#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!(pce->compile_options & PCRE_UTF8)) {
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ old_mdata_used = mdata_used;
+ if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ mdata_used = 1;
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ if (subpat_names) {
+ efree(subpat_names);
+ }
+ mdata_used = old_mdata_used;
+ return NULL;
+ }
}
-#endif
-#ifdef PCRE_EXTRA_MARK
- extra->mark = &mark;
- extra->flags |= PCRE_EXTRA_MARK;
-#endif
+ options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
- while (1) {
- /* Execute the regular expression. */
+ /* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
- && no_utf_check && !g_notempty) {
- count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset,
- no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
- } else
+ if ((pce->preg_options & PREG_JIT) && options) {
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ } else
#endif
- count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
- no_utf_check|g_notempty, offsets, size_offsets);
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ options, match_data, mctx);
- /* the string was already proved to be valid UTF-8 */
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ while (1) {
+ piece = subject + start_offset;
- /* Check for too many substrings condition. */
- if (UNEXPECTED(count == 0)) {
- php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = size_offsets / 3;
- }
+ if (count >= 0 && limit) {
+ /* Check for too many substrings condition. */
+ if (UNEXPECTED(count == 0)) {
+ php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
+ count = num_subpats;
+ }
- piece = subject + start_offset;
+matched:
+ offsets = pcre2_get_ovector_pointer(match_data);
+
+ if (UNEXPECTED(offsets[1] < offsets[0])) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ if (result) {
+ zend_string_release_ex(result, 0);
+ result = NULL;
+ }
+ break;
+ }
- /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
- if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) {
if (replace_count) {
++*replace_count;
}
@@ -1657,7 +1902,9 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
new_len = result_len + offsets[0] - start_offset; /* part before the match */
/* Use custom function to get replacement string and its length. */
- eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count, mark);
+ eval_result = preg_do_repl_func(fci, fcc, subject, offsets, subpat_names, count,
+ pcre2_get_mark(match_data));
+
ZEND_ASSERT(eval_result);
new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
if (new_len >= alloc_len) {
@@ -1672,80 +1919,93 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
if (match-piece > 0) {
/* copy the part of the string before the match */
memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
- result_len += (int)(match-piece);
+ result_len += (match-piece);
}
/* If using custom function, copy result to the buffer and clean up. */
memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
- result_len += (int)ZSTR_LEN(eval_result);
- zend_string_release(eval_result);
+ result_len += ZSTR_LEN(eval_result);
+ zend_string_release_ex(eval_result, 0);
- if (limit) {
- limit--;
- }
+ limit--;
/* Advance to the next piece. */
start_offset = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
+ This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
-
-#ifdef PCRE_EXTRA_MARK
- /* replace function may use the same regex recursively */
- extra->mark = &mark;
- extra->flags |= PCRE_EXTRA_MARK;
-#endif
- } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
- /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
- this is not necessarily the end. We need to advance
- the start offset, and continue. Fudge the offset values
- to achieve this, unless we're already at the end of the string. */
- if (g_notempty != 0 && start_offset < subject_len) {
- int unit_len = calculate_unit_length(pce, piece);
-
- start_offset += unit_len;
- memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
- result_len += unit_len;
- g_notempty = 0;
- } else {
- if (!result && subject_str) {
- result = zend_string_copy(subject_str);
- break;
- }
- new_len = result_len + subject_len - start_offset;
- if (new_len >= alloc_len) {
- alloc_len = new_len; /* now we know exactly how long it is */
- if (NULL != result) {
- result = zend_string_realloc(result, alloc_len, 0);
+ if (start_offset == offsets[0]) {
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
+
+ piece = subject + start_offset;
+ if (count >= 0 && limit) {
+ goto matched;
+ } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
+ /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
+ this is not necessarily the end. We need to advance
+ the start offset, and continue. Fudge the offset values
+ to achieve this, unless we're already at the end of the string. */
+ if (start_offset < subject_len) {
+ size_t unit_len = calculate_unit_length(pce, piece);
+
+ start_offset += unit_len;
+ memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
+ result_len += unit_len;
} else {
- result = zend_string_alloc(alloc_len, 0);
+ goto not_matched;
}
+ } else {
+ goto error;
}
- /* stick that last bit of string on our output */
- memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
- result_len += subject_len - start_offset;
- ZSTR_VAL(result)[result_len] = '\0';
- ZSTR_LEN(result) = result_len;
+ }
+
+ } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
+not_matched:
+ if (!result && subject_str) {
+ result = zend_string_copy(subject_str);
break;
}
+ new_len = result_len + subject_len - start_offset;
+ if (new_len >= alloc_len) {
+ alloc_len = new_len; /* now we know exactly how long it is */
+ if (NULL != result) {
+ result = zend_string_realloc(result, alloc_len, 0);
+ } else {
+ result = zend_string_alloc(alloc_len, 0);
+ }
+ }
+ /* stick that last bit of string on our output */
+ memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - start_offset);
+ result_len += subject_len - start_offset;
+ ZSTR_VAL(result)[result_len] = '\0';
+ ZSTR_LEN(result) = result_len;
+ break;
} else {
+error:
pcre_handle_exec_error(count);
if (result) {
- zend_string_release(result);
+ zend_string_release_ex(result, 0);
result = NULL;
}
break;
}
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ if ((pce->preg_options & PREG_JIT)) {
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ } else
+#endif
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
}
-
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
}
+ mdata_used = old_mdata_used;
+
if (UNEXPECTED(subpat_names)) {
efree(subpat_names);
}
@@ -1759,7 +2019,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
zend_string *subject_str,
zend_fcall_info *fci, zend_fcall_info_cache *fcc,
- int limit, int *replace_count)
+ size_t limit, size_t *replace_count)
{
pcre_cache_entry *pce; /* Compiled regular expression */
zend_string *result; /* Function result */
@@ -1779,11 +2039,11 @@ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
/* {{{ php_pcre_replace_array
*/
-static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, int limit, int *replace_count)
+static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
{
zval *regex_entry;
zend_string *result;
- zend_string *replace_str;
+ zend_string *replace_str, *tmp_replace_str;
if (Z_TYPE_P(replace) == IS_ARRAY) {
uint32_t replace_idx = 0;
@@ -1792,19 +2052,21 @@ static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend
/* For each entry in the regex array, get the entry */
ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
/* Make sure we're dealing with strings. */
- zend_string *regex_str = zval_get_string(regex_entry);
+ zend_string *tmp_regex_str;
+ zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
zval *zv;
/* Get current entry */
while (1) {
if (replace_idx == replace_ht->nNumUsed) {
replace_str = ZSTR_EMPTY_ALLOC();
+ tmp_replace_str = NULL;
break;
}
zv = &replace_ht->arData[replace_idx].val;
replace_idx++;
if (Z_TYPE_P(zv) != IS_UNDEF) {
- replace_str = zval_get_string(zv);
+ replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
break;
}
}
@@ -1814,13 +2076,13 @@ static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend
result = php_pcre_replace(regex_str,
subject_str,
ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str),
+ ZSTR_LEN(subject_str),
replace_str,
limit,
replace_count);
- zend_string_release(replace_str);
- zend_string_release(regex_str);
- zend_string_release(subject_str);
+ zend_tmp_string_release(tmp_replace_str);
+ zend_tmp_string_release(tmp_regex_str);
+ zend_string_release_ex(subject_str, 0);
subject_str = result;
if (UNEXPECTED(result == NULL)) {
break;
@@ -1833,19 +2095,20 @@ static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend
/* For each entry in the regex array, get the entry */
ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
/* Make sure we're dealing with strings. */
- zend_string *regex_str = zval_get_string(regex_entry);
+ zend_string *tmp_regex_str;
+ zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
/* Do the actual replacement and put the result back into subject_str
for further replacements. */
result = php_pcre_replace(regex_str,
subject_str,
ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str),
+ ZSTR_LEN(subject_str),
replace_str,
limit,
replace_count);
- zend_string_release(regex_str);
- zend_string_release(subject_str);
+ zend_tmp_string_release(tmp_regex_str);
+ zend_string_release_ex(subject_str, 0);
subject_str = result;
if (UNEXPECTED(result == NULL)) {
@@ -1860,24 +2123,20 @@ static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend
/* {{{ php_replace_in_subject
*/
-static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int *replace_count)
+static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
{
zend_string *result;
zend_string *subject_str = zval_get_string(subject);
- if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) {
- zend_string_release(subject_str);
- php_error_docref(NULL, E_WARNING, "Subject is too long");
- result = NULL;
- } else if (Z_TYPE_P(regex) != IS_ARRAY) {
+ if (Z_TYPE_P(regex) != IS_ARRAY) {
result = php_pcre_replace(Z_STR_P(regex),
subject_str,
ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str),
+ ZSTR_LEN(subject_str),
Z_STR_P(replace),
limit,
replace_count);
- zend_string_release(subject_str);
+ zend_string_release_ex(subject_str, 0);
} else {
result = php_pcre_replace_array(Z_ARRVAL_P(regex),
replace,
@@ -1891,32 +2150,29 @@ static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval
/* {{{ php_replace_in_subject_func
*/
-static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, int limit, int *replace_count)
+static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count)
{
- zval *regex_entry;
zend_string *result;
zend_string *subject_str = zval_get_string(subject);
- if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) {
- php_error_docref(NULL, E_WARNING, "Subject is too long");
- return NULL;
- }
-
if (Z_TYPE_P(regex) != IS_ARRAY) {
result = php_pcre_replace_func(Z_STR_P(regex),
subject_str,
fci, fcc,
limit,
replace_count);
- zend_string_release(subject_str);
+ zend_string_release_ex(subject_str, 0);
return result;
} else {
+ zval *regex_entry;
+
/* If regex is an array */
/* For each entry in the regex array, get the entry */
ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
/* Make sure we're dealing with strings. */
- zend_string *regex_str = zval_get_string(regex_entry);
+ zend_string *tmp_regex_str;
+ zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
/* Do the actual replacement and put the result back into subject_str
for further replacements. */
@@ -1925,8 +2181,8 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc
fci, fcc,
limit,
replace_count);
- zend_string_release(regex_str);
- zend_string_release(subject_str);
+ zend_tmp_string_release(tmp_regex_str);
+ zend_string_release_ex(subject_str, 0);
subject_str = result;
if (UNEXPECTED(result == NULL)) {
break;
@@ -1940,10 +2196,10 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc
/* {{{ preg_replace_func_impl
*/
-static int preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
+static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
{
zend_string *result;
- int replace_count = 0;
+ size_t replace_count = 0;
if (Z_TYPE_P(regex) != IS_ARRAY) {
convert_to_string_ex(regex);
@@ -1990,9 +2246,9 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
{
zval *regex, *replace, *subject, *zcount = NULL;
zend_long limit = -1;
- int replace_count = 0;
+ size_t replace_count = 0;
zend_string *result;
- int old_replace_count;
+ size_t old_replace_count;
/* Get function parameters and do error-checking. */
ZEND_PARSE_PARAMETERS_START(3, 5)
@@ -2027,7 +2283,7 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
if (!is_filter || replace_count > old_replace_count) {
RETVAL_STR(result);
} else {
- zend_string_release(result);
+ zend_string_release_ex(result, 0);
RETVAL_NULL();
}
} else {
@@ -2060,7 +2316,7 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
}
} else {
- zend_string_release(result);
+ zend_string_release_ex(result, 0);
}
}
} ZEND_HASH_FOREACH_END();
@@ -2087,7 +2343,7 @@ static PHP_FUNCTION(preg_replace_callback)
{
zval *regex, *replace, *subject, *zcount = NULL;
zend_long limit = -1;
- int replace_count;
+ size_t replace_count;
zend_fcall_info fci;
zend_fcall_info_cache fcc;
@@ -2104,7 +2360,7 @@ static PHP_FUNCTION(preg_replace_callback)
if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
zend_string *callback_name = zend_get_callable_name(replace);
php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
- zend_string_release(callback_name);
+ zend_string_release_ex(callback_name, 0);
ZVAL_STR(return_value, zval_get_string(subject));
return;
}
@@ -2128,7 +2384,7 @@ static PHP_FUNCTION(preg_replace_callback_array)
zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
zend_long limit = -1;
zend_string *str_idx;
- int replace_count = 0;
+ size_t replace_count = 0;
zend_fcall_info fci;
zend_fcall_info_cache fcc;
@@ -2156,7 +2412,7 @@ static PHP_FUNCTION(preg_replace_callback_array)
if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
zend_string *callback_name = zend_get_callable_name(replace);
php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
- zend_string_release(callback_name);
+ zend_string_release_ex(callback_name, 0);
zval_ptr_dtor(&regex);
zval_ptr_dtor(return_value);
ZVAL_COPY(return_value, subject);
@@ -2216,18 +2472,13 @@ static PHP_FUNCTION(preg_split)
Z_PARAM_LONG(flags)
ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
- if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
- php_error_docref(NULL, E_WARNING, "Subject is too long");
- RETURN_FALSE;
- }
-
/* Compile regex or get it from cache. */
if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
RETURN_FALSE;
}
pce->refcount++;
- php_pcre_split_impl(pce, subject, return_value, (int)limit_val, flags);
+ php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
pce->refcount--;
}
/* }}} */
@@ -2237,50 +2488,28 @@ static PHP_FUNCTION(preg_split)
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
zend_long limit_val, zend_long flags)
{
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int *offsets; /* Array of subpattern offsets */
- int size_offsets; /* Size of the offsets array */
- int no_utf_check = 0; /* Execution options */
- int count = 0; /* Count of matched subpatterns */
- int start_offset; /* Where the new search starts */
- int next_offset; /* End of the last delimiter match + 1 */
- int g_notempty = 0; /* If the match should not be empty */
+ PCRE2_SIZE *offsets; /* Array of subpattern offsets */
+ uint32_t options; /* Execution options */
+ int count; /* Count of matched subpatterns */
+ PCRE2_SIZE start_offset; /* Where the new search starts */
+ PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
char *last_match; /* Location of last match */
- int no_empty; /* If NO_EMPTY flag is set */
- int delim_capture; /* If delimiters should be captured */
- int offset_capture; /* If offsets should be captured */
+ uint32_t no_empty; /* If NO_EMPTY flag is set */
+ uint32_t delim_capture; /* If delimiters should be captured */
+ uint32_t offset_capture; /* If offsets should be captured */
+ uint32_t num_subpats; /* Number of captured subpatterns */
zval tmp;
- ALLOCA_FLAG(use_heap);
+ pcre2_match_data *match_data;
no_empty = flags & PREG_SPLIT_NO_EMPTY;
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
- if (limit_val == 0) {
- limit_val = -1;
- }
-
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-#ifdef PCRE_EXTRA_MARK
- extra->flags &= ~PCRE_EXTRA_MARK;
-#endif
-
/* Initialize return value */
array_init(return_value);
/* Calculate the size of the offsets array, and allocate memory for it. */
- size_offsets = (pce->capture_count + 1) * 3;
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
+ num_subpats = pce->capture_count + 1;
/* Start at the beginning of the string */
start_offset = 0;
@@ -2288,42 +2517,59 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
last_match = ZSTR_VAL(subject_str);
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
-#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!(pce->compile_options & PCRE_UTF8)) {
- no_utf_check = PCRE_NO_UTF8_CHECK;
+
+ if (limit_val == -1) {
+ /* pass */
+ } else if (limit_val == 0) {
+ limit_val = -1;
+ } else if (limit_val <= 1) {
+ goto last;
+ }
+
+ if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ zval_ptr_dtor(return_value);
+ RETURN_FALSE;
+ }
}
-#endif
- /* Get next piece if no limit or limit not yet reached and something matched*/
- while ((limit_val == -1 || limit_val > 1)) {
+ options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
+
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
- && no_utf_check && !g_notempty) {
- count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
- ZSTR_LEN(subject_str), start_offset,
- no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
- } else
+ if ((pce->preg_options & PREG_JIT) && options) {
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ } else
#endif
- count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
- ZSTR_LEN(subject_str), start_offset,
- no_utf_check|g_notempty, offsets, size_offsets);
+ count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ options, match_data, mctx);
- /* the string was already proved to be valid UTF-8 */
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ while (1) {
+ /* If something matched */
+ if (count >= 0) {
+ /* Check for too many substrings condition. */
+ if (UNEXPECTED(count == 0)) {
+ php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
+ count = num_subpats;
+ }
- /* Check for too many substrings condition. */
- if (count == 0) {
- php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
- }
+matched:
+ offsets = pcre2_get_ovector_pointer(match_data);
+
+ if (UNEXPECTED(offsets[1] < offsets[0])) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ break;
+ }
- /* If something matched */
- if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
if (offset_capture) {
/* Add (match, offset) pair to the return value */
- add_offset_pair(return_value, last_match, (int)(&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
+ add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
} else {
/* Add the piece to the return value */
ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
@@ -2339,7 +2585,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
next_offset = offsets[1];
if (delim_capture) {
- int i, match_len;
+ size_t i, match_len;
for (i = 1; i < count; i++) {
match_len = offsets[(i<<1)+1] - offsets[i<<1];
/* If we have matched a delimiter */
@@ -2358,30 +2604,62 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
start_offset = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
+ This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (start_offset == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
-
- } else if (count == PCRE_ERROR_NOMATCH) {
- /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
- this is not necessarily the end. We need to advance
- the start offset, and continue. Fudge the offset values
- to achieve this, unless we're already at the end of the string. */
- if (g_notempty != 0 && start_offset < ZSTR_LEN(subject_str)) {
- start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
- g_notempty = 0;
- } else {
- break;
+ if (start_offset == offsets[0]) {
+ count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
+ if (count >= 0) {
+ goto matched;
+ } else if (count == PCRE2_ERROR_NOMATCH) {
+ /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
+ this is not necessarily the end. We need to advance
+ the start offset, and continue. Fudge the offset values
+ to achieve this, unless we're already at the end of the string. */
+ if (start_offset < ZSTR_LEN(subject_str)) {
+ start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
+ } else {
+ break;
+ }
+ } else {
+ goto error;
+ }
}
+
+ } else if (count == PCRE2_ERROR_NOMATCH) {
+ break;
} else {
+error:
pcre_handle_exec_error(count);
break;
}
+
+ /* Get next piece if no limit or limit not yet reached and something matched*/
+ if (limit_val != -1 && limit_val <= 1) {
+ break;
+ }
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ if (pce->preg_options & PREG_JIT) {
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ } else
+#endif
+ count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
+ }
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
}
+ if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
+ zval_ptr_dtor(return_value);
+ RETURN_FALSE;
+ }
- start_offset = (int)(last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
+last:
+ start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
if (offset_capture) {
@@ -2397,14 +2675,6 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}
}
-
-
- /* Clean up */
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
}
/* }}} */
@@ -2468,6 +2738,7 @@ static PHP_FUNCTION(preg_quote)
case '|':
case ':':
case '-':
+ case '#':
extra_len++;
break;
@@ -2517,6 +2788,7 @@ static PHP_FUNCTION(preg_quote)
case '|':
case ':':
case '-':
+ case '#':
*q++ = '\\';
*q++ = c;
break;
@@ -2573,97 +2845,89 @@ static PHP_FUNCTION(preg_grep)
PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
{
- zval *entry; /* An entry in the input array */
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int *offsets; /* Array of subpattern offsets */
- int size_offsets; /* Size of the offsets array */
- int count = 0; /* Count of matched subpatterns */
- int no_utf_check = 0; /* Execution options */
+ zval *entry; /* An entry in the input array */
+ uint32_t num_subpats; /* Number of captured subpatterns */
+ int count; /* Count of matched subpatterns */
+ uint32_t options; /* Execution options */
zend_string *string_key;
zend_ulong num_key;
zend_bool invert; /* Whether to return non-matching
entries */
- ALLOCA_FLAG(use_heap);
-
+ pcre2_match_data *match_data;
invert = flags & PREG_GREP_INVERT ? 1 : 0;
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-#ifdef PCRE_EXTRA_MARK
- extra->flags &= ~PCRE_EXTRA_MARK;
-#endif
-
/* Calculate the size of the offsets array, and allocate memory for it. */
- size_offsets = (pce->capture_count + 1) * 3;
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
+ num_subpats = pce->capture_count + 1;
/* Initialize return array */
array_init(return_value);
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
-#ifdef HAVE_PCRE_JIT_SUPPORT
- no_utf_check = (pce->compile_options & PCRE_UTF8) ? 0 : PCRE_NO_UTF8_CHECK;
-#endif
+ if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ return;
+ }
+ }
+
+ options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
/* Go through the input array */
ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
- zend_string *subject_str = zval_get_string(entry);
+ zend_string *tmp_subject_str;
+ zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
/* Perform the match */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
- && no_utf_check) {
- count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str), 0,
- no_utf_check, offsets, size_offsets, jit_stack);
+ if ((pce->preg_options & PREG_JIT) && options) {
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str), 0,
- no_utf_check, offsets, size_offsets);
-
- /* Check for too many substrings condition. */
- if (count == 0) {
- php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
- } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
- pcre_handle_exec_error(count);
- zend_string_release(subject_str);
- break;
- }
+ count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
+ options, match_data, mctx);
/* If the entry fits our requirements */
- if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
- if (Z_REFCOUNTED_P(entry)) {
- Z_ADDREF_P(entry);
+ if (count >= 0) {
+ /* Check for too many substrings condition. */
+ if (UNEXPECTED(count == 0)) {
+ php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
}
+ if (!invert) {
+ Z_TRY_ADDREF_P(entry);
- /* Add to return array */
- if (string_key) {
- zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
- } else {
- zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
+ /* Add to return array */
+ if (string_key) {
+ zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
+ } else {
+ zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
+ }
+ }
+ } else if (count == PCRE2_ERROR_NOMATCH) {
+ if (invert) {
+ Z_TRY_ADDREF_P(entry);
+
+ /* Add to return array */
+ if (string_key) {
+ zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
+ } else {
+ zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
+ }
}
+ } else {
+ pcre_handle_exec_error(count);
+ zend_tmp_string_release(tmp_subject_str);
+ break;
}
- zend_string_release(subject_str);
+ zend_tmp_string_release(tmp_subject_str);
} ZEND_HASH_FOREACH_END();
-
- /* Clean up */
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
}
}
/* }}} */
@@ -2784,6 +3048,40 @@ ZEND_GET_MODULE(pcre)
/* }}} */
+PHPAPI pcre2_match_context *php_pcre_mctx(void)
+{/*{{{*/
+ return mctx;
+}/*}}}*/
+
+PHPAPI pcre2_general_context *php_pcre_gctx(void)
+{/*{{{*/
+ return gctx;
+}/*}}}*/
+
+PHPAPI pcre2_compile_context *php_pcre_cctx(void)
+{/*{{{*/
+ return cctx;
+}/*}}}*/
+
+PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
+{/*{{{*/
+ assert(NULL != pce);
+ pce->refcount++;
+}/*}}}*/
+
+PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
+{/*{{{*/
+ assert(NULL != pce);
+ assert(0 != pce->refcount);
+ pce->refcount--;
+}/*}}}*/
+
+PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
+{/*{{{*/
+ assert(NULL != pce);
+ return pce->re;
+}/*}}}*/
+
#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
/*