summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c1148
1 files changed, 670 insertions, 478 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index 2fb1133799..83cbad4eea 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -43,12 +43,22 @@
#define PREG_GREP_INVERT (1<<0)
+#define PREG_JIT (1<<3)
+
#define PCRE_CACHE_SIZE 4096
-/* not fully functional workaround for libpcre < 8.0, see bug #70232 */
-#ifndef PCRE_NOTEMPTY_ATSTART
-# define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
+struct _pcre_cache_entry {
+ pcre2_code *re;
+ uint32_t preg_options;
+ uint32_t capture_count;
+ uint32_t name_count;
+ uint32_t compile_options;
+ uint32_t extra_compile_options;
+ uint32_t refcount;
+#if HAVE_SETLOCALE
+ const uint8_t *tables;
#endif
+};
enum {
PHP_PCRE_NO_ERROR = 0,
@@ -66,9 +76,18 @@ PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
#ifdef HAVE_PCRE_JIT_SUPPORT
#define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
#define PCRE_JIT_STACK_MAX_SIZE (64 * 1024)
-ZEND_TLS pcre_jit_stack *jit_stack = NULL;
+ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
#endif
-#if defined(ZTS)
+ZEND_TLS pcre2_general_context *gctx = NULL;
+/* These two are global per thread for now. Though it is possible to use these
+ per pattern. Either one can copy it and use in pce, or one does no global
+ contexts at all, but creates for every pce. */
+ZEND_TLS pcre2_compile_context *cctx = NULL;
+ZEND_TLS pcre2_match_context *mctx = NULL;
+ZEND_TLS pcre2_match_data *mdata = NULL;
+ZEND_TLS zend_bool mdata_used = 0;
+ZEND_TLS uint8_t pcre2_init_ok = 0;
+#if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
static MUTEX_T pcre_mt = NULL;
#define php_pcre_mutex_alloc() if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
#define php_pcre_mutex_free() if (tsrm_is_main_thread() && pcre_mt) tsrm_mutex_free(pcre_mt); pcre_mt = NULL;
@@ -86,30 +105,30 @@ static void pcre_handle_exec_error(int pcre_code) /* {{{ */
int preg_code = 0;
switch (pcre_code) {
- case PCRE_ERROR_MATCHLIMIT:
+ case PCRE2_ERROR_MATCHLIMIT:
preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
break;
- case PCRE_ERROR_RECURSIONLIMIT:
+ case PCRE2_ERROR_RECURSIONLIMIT:
preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
break;
- case PCRE_ERROR_BADUTF8:
- preg_code = PHP_PCRE_BAD_UTF8_ERROR;
- break;
-
- case PCRE_ERROR_BADUTF8_OFFSET:
+ case PCRE2_ERROR_BADUTFOFFSET:
preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
break;
#ifdef HAVE_PCRE_JIT_SUPPORT
- case PCRE_ERROR_JIT_STACKLIMIT:
+ case PCRE2_ERROR_JIT_STACKLIMIT:
preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
break;
#endif
default:
- preg_code = PHP_PCRE_INTERNAL_ERROR;
+ if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
+ preg_code = PHP_PCRE_BAD_UTF8_ERROR;
+ } else {
+ preg_code = PHP_PCRE_INTERNAL_ERROR;
+ }
break;
}
@@ -121,10 +140,7 @@ static void php_free_pcre_cache(zval *data) /* {{{ */
{
pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
if (!pce) return;
- pcre_free(pce->re);
- if (pce->extra) {
- pcre_free_study(pce->extra);
- }
+ pcre2_code_free(pce->re);
#if HAVE_SETLOCALE
if ((void*)pce->tables) pefree((void*)pce->tables, 1);
#endif
@@ -132,12 +148,120 @@ static void php_free_pcre_cache(zval *data) /* {{{ */
}
/* }}} */
+static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
+{/*{{{*/
+ void *p = pemalloc(size, 1);
+ return p;
+}/*}}}*/
+
+static void php_pcre_free(void *block, void *data)
+{/*{{{*/
+ pefree(block, 1);
+}/*}}}*/
+
+#define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
+#define PHP_PCRE_PREALLOC_MDATA_SIZE 32
+
+static void php_pcre_init_pcre2(uint8_t jit)
+{/*{{{*/
+ if (!gctx) {
+ gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
+ if (!gctx) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+
+ if (!cctx) {
+ cctx = pcre2_compile_context_create(gctx);
+ if (!cctx) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+
+ /* XXX The 'X' modifier is the default behavior in PCRE2. This option is
+ called dangerous in the manual, as typos in patterns can cause
+ unexpected results. We might want to to switch to the default PCRE2
+ behavior, too, thus causing a certain BC break. */
+ pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
+
+ if (!mctx) {
+ mctx = pcre2_match_context_create(gctx);
+ if (!mctx) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ if (jit && !jit_stack) {
+ jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
+ if (!jit_stack) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+#endif
+
+ if (!mdata) {
+ mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
+ if (!mdata) {
+ pcre2_init_ok = 0;
+ return;
+ }
+ }
+
+ pcre2_init_ok = 1;
+}/*}}}*/
+
+static void php_pcre_shutdown_pcre2(void)
+{/*{{{*/
+ if (gctx) {
+ pcre2_general_context_free(gctx);
+ gctx = NULL;
+ }
+
+ if (cctx) {
+ pcre2_compile_context_free(cctx);
+ cctx = NULL;
+ }
+
+ if (mctx) {
+ pcre2_match_context_free(mctx);
+ mctx = NULL;
+ }
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ /* Stack may only be destroyed when no cached patterns
+ possibly associated with it do exist. */
+ if (jit_stack) {
+ pcre2_jit_stack_free(jit_stack);
+ jit_stack = NULL;
+ }
+#endif
+
+ if (mdata) {
+ pcre2_match_data_free(mdata);
+ mdata = NULL;
+ }
+
+ pcre2_init_ok = 0;
+}/*}}}*/
+
static PHP_GINIT_FUNCTION(pcre) /* {{{ */
{
+ php_pcre_mutex_alloc();
+
zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
pcre_globals->backtrack_limit = 0;
pcre_globals->recursion_limit = 0;
pcre_globals->error_code = PHP_PCRE_NO_ERROR;
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ pcre_globals->jit = 1;
+#endif
+
+ php_pcre_init_pcre2(1);
}
/* }}} */
@@ -145,44 +269,95 @@ static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
{
zend_hash_destroy(&pcre_globals->pcre_cache);
-#ifdef HAVE_PCRE_JIT_SUPPORT
- /* Stack may only be destroyed when no cached patterns
- possibly associated with it do exist. */
- if (jit_stack) {
- pcre_jit_stack_free(jit_stack);
- jit_stack = NULL;
- }
-#endif
+ php_pcre_shutdown_pcre2();
+ php_pcre_mutex_free();
}
/* }}} */
+static PHP_INI_MH(OnUpdateBacktrackLimit)
+{/*{{{*/
+ OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
+ if (mctx) {
+ pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
+ }
+
+ return SUCCESS;
+}/*}}}*/
+
+static PHP_INI_MH(OnUpdateRecursionLimit)
+{/*{{{*/
+ OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
+ if (mctx) {
+ pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
+ }
+
+ return SUCCESS;
+}/*}}}*/
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+static PHP_INI_MH(OnUpdateJit)
+{/*{{{*/
+ OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
+ if (PCRE_G(jit) && jit_stack) {
+ pcre2_jit_stack_assign(mctx, NULL, jit_stack);
+ } else {
+ pcre2_jit_stack_assign(mctx, NULL, NULL);
+ }
+
+ return SUCCESS;
+}/*}}}*/
+#endif
+
PHP_INI_BEGIN()
- STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
- STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
+ STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
+ STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
#ifdef HAVE_PCRE_JIT_SUPPORT
- STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateBool, jit, zend_pcre_globals, pcre_globals)
+ STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
#endif
PHP_INI_END()
+static char *_pcre2_config_str(uint32_t what)
+{/*{{{*/
+ int len = pcre2_config(what, NULL);
+ char *ret = (char *) malloc(len + 1);
+
+ len = pcre2_config(what, ret);
+ if (!len) {
+ free(ret);
+ return NULL;
+ }
+
+ return ret;
+}/*}}}*/
/* {{{ PHP_MINFO_FUNCTION(pcre) */
static PHP_MINFO_FUNCTION(pcre)
{
#ifdef HAVE_PCRE_JIT_SUPPORT
- int jit_yes = 0;
+ uint32_t flag = 0;
+ char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
#endif
+ char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
+ char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
php_info_print_table_start();
php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
- php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
+ php_info_print_table_row(2, "PCRE Library Version", version);
+ free(version);
+ php_info_print_table_row(2, "PCRE Unicode Version", unicode);
+ free(unicode);
#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!pcre_config(PCRE_CONFIG_JIT, &jit_yes)) {
- php_info_print_table_row(2, "PCRE JIT Support", jit_yes ? "enabled" : "disabled");
+ if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
+ php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
} else {
php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
}
+ if (jit_target) {
+ php_info_print_table_row(2, "PCRE JIT Target", jit_target);
+ }
+ free(jit_target);
#else
php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
#endif
@@ -200,9 +375,19 @@ static PHP_MINFO_FUNCTION(pcre)
/* {{{ PHP_MINIT_FUNCTION(pcre) */
static PHP_MINIT_FUNCTION(pcre)
{
- REGISTER_INI_ENTRIES();
+ char *version;
- php_pcre_mutex_alloc();
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ if (!pcre2_init_ok) {
+ /* Retry. */
+ php_pcre_init_pcre2(PCRE_G(jit));
+ if (!pcre2_init_ok) {
+ return FAILURE;
+ }
+ }
+#endif
+
+ REGISTER_INI_ENTRIES();
REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
@@ -220,7 +405,17 @@ static PHP_MINIT_FUNCTION(pcre)
REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
- REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
+ version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
+ REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
+ free(version);
+ REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
+
+#ifdef HAVE_PCRE_JIT_SUPPORT
+ REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
+#else
+ REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
+#endif
return SUCCESS;
}
@@ -231,8 +426,6 @@ static PHP_MSHUTDOWN_FUNCTION(pcre)
{
UNREGISTER_INI_ENTRIES();
- php_pcre_mutex_free();
-
return SUCCESS;
}
/* }}} */
@@ -241,12 +434,19 @@ static PHP_MSHUTDOWN_FUNCTION(pcre)
/* {{{ PHP_RINIT_FUNCTION(pcre) */
static PHP_RINIT_FUNCTION(pcre)
{
- if (PCRE_G(jit) && jit_stack == NULL) {
+ if (!pcre2_init_ok) {
+ /* Retry. */
php_pcre_mutex_lock();
- jit_stack = pcre_jit_stack_alloc(PCRE_JIT_STACK_MIN_SIZE,PCRE_JIT_STACK_MAX_SIZE);
+ php_pcre_init_pcre2(PCRE_G(jit));
+ if (!pcre2_init_ok) {
+ php_pcre_mutex_unlock();
+ return FAILURE;
+ }
php_pcre_mutex_unlock();
}
+ mdata_used = 0;
+
return SUCCESS;
}
/* }}} */
@@ -268,21 +468,18 @@ static int pcre_clean_cache(zval *data, void *arg)
/* }}} */
/* {{{ static make_subpats_table */
-static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
+static char **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
{
- pcre_extra *extra = pce->extra;
- int name_cnt = pce->name_count, name_size, ni = 0;
- int rc;
+ uint32_t name_cnt = pce->name_count, name_size, ni = 0;
char *name_table;
unsigned short name_idx;
char **subpat_names;
int rc1, rc2;
- rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
- rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
- rc = rc2 ? rc2 : rc1;
- if (rc < 0) {
- php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
+ rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
+ rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
+ if (rc1 < 0 || rc2 < 0) {
+ php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc1 < 0 ? rc1 : rc2);
return NULL;
}
@@ -302,12 +499,12 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce)
/* }}} */
/* {{{ static calculate_unit_length */
-/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
-static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
+/* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
+static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
{
- int unit_len;
+ size_t unit_len;
- if (pce->compile_options & PCRE_UTF8) {
+ if (pce->compile_options & PCRE2_UTF) {
char *end = start;
/* skip continuation bytes */
@@ -324,21 +521,21 @@ static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char
*/
PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
{
- pcre *re = NULL;
- pcre_extra *extra;
- int coptions = 0;
- int soptions = 0;
- const char *error;
- int erroffset;
+ pcre2_code *re = NULL;
+ uint32_t coptions = 0;
+ uint32_t extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
+ PCRE2_UCHAR error[256];
+ PCRE2_SIZE erroffset;
+ int errnumber;
char delimiter;
char start_delimiter;
char end_delimiter;
char *p, *pp;
char *pattern;
- int do_study = 0;
- int poptions = 0;
- unsigned const char *tables = NULL;
- pcre_cache_entry *pce;
+ size_t pattern_len;
+ uint32_t poptions = 0;
+ const uint8_t *tables = NULL;
+ zval *zv;
pcre_cache_entry new_entry;
int rc;
zend_string *key;
@@ -357,14 +554,14 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
/* Try to lookup the cached regex entry, and if successful, just pass
back the compiled pattern, otherwise go on and compile it. */
- pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), key);
- if (pce) {
+ zv = zend_hash_find(&PCRE_G(pcre_cache), key);
+ if (zv) {
#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release(key);
}
#endif
- return pce;
+ return (pcre_cache_entry*)Z_PTR_P(zv);
}
p = ZSTR_VAL(regex);
@@ -380,7 +577,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
#endif
php_error_docref(NULL, E_WARNING,
p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
@@ -394,7 +591,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
}
#endif
php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
@@ -445,12 +642,13 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
} else {
php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
}
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
/* Make a copy of the actual pattern. */
- pattern = estrndup(p, pp-p);
+ pattern_len = pp - p;
+ pattern = estrndup(p, pattern_len);
/* Move on to the options */
pp++;
@@ -460,26 +658,26 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
switch (*pp++) {
/* Perl compatible options */
- case 'i': coptions |= PCRE_CASELESS; break;
- case 'm': coptions |= PCRE_MULTILINE; break;
- case 's': coptions |= PCRE_DOTALL; break;
- case 'x': coptions |= PCRE_EXTENDED; break;
+ case 'i': coptions |= PCRE2_CASELESS; break;
+ case 'm': coptions |= PCRE2_MULTILINE; break;
+ case 's': coptions |= PCRE2_DOTALL; break;
+ case 'x': coptions |= PCRE2_EXTENDED; break;
/* PCRE specific options */
- case 'A': coptions |= PCRE_ANCHORED; break;
- case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
- case 'S': do_study = 1; break;
- case 'U': coptions |= PCRE_UNGREEDY; break;
- case 'X': coptions |= PCRE_EXTRA; break;
- case 'u': coptions |= PCRE_UTF8;
+ case 'A': coptions |= PCRE2_ANCHORED; break;
+ case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
+ case 'S': /* Pass. */ break;
+ case 'U': coptions |= PCRE2_UNGREEDY; break;
+ case 'X': extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL; break;
+ case 'u': coptions |= PCRE2_UTF;
/* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
characters, even in UTF-8 mode. However, this can be changed by setting
the PCRE_UCP option. */
#ifdef PCRE_UCP
- coptions |= PCRE_UCP;
+ coptions |= PCRE2_UCP;
#endif
break;
- case 'J': coptions |= PCRE_DUPNAMES; break;
+ case 'J': coptions |= PCRE2_DUPNAMES; break;
/* Custom preg options */
case 'e': poptions |= PREG_REPLACE_EVAL; break;
@@ -494,7 +692,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
} else {
php_error_docref(NULL,E_WARNING, "Null byte in regex");
}
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
efree(pattern);
#if HAVE_SETLOCALE
if (key != regex) {
@@ -507,16 +705,29 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
#if HAVE_SETLOCALE
if (key != regex) {
- tables = pcre_maketables();
+ /* XXX a better solution here were to create a copy of cctx
+ and to cache it along with the generated tables. Once same locale
+ is encountered, the cached cctx and tables would be fetched.
+ Currently the tables are generated every time a locale based
+ pattern is used, and the tables are overwritten in the global
+ cctx. */
+ tables = pcre2_maketables(gctx);
+ pcre2_set_character_tables(cctx, tables);
}
#endif
+ /* Set extra options for the compile context. */
+ if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
+ pcre2_set_compile_extra_options(cctx, extra_coptions);
+ }
+
/* Compile pattern and display a warning if compilation failed. */
- re = pcre_compile(pattern,
- coptions,
- &error,
- &erroffset,
- tables);
+ re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
+
+ /* Reset the compile context extra options to default. */
+ if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
+ pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
+ }
if (re == NULL) {
#if HAVE_SETLOCALE
@@ -524,8 +735,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
zend_string_release(key);
}
#endif
- php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ pcre2_get_error_message(errnumber, error, sizeof(error));
+ php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
efree(pattern);
if (tables) {
pefree((void*)tables, 1);
@@ -536,35 +748,19 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
#ifdef HAVE_PCRE_JIT_SUPPORT
if (PCRE_G(jit)) {
/* Enable PCRE JIT compiler */
- do_study = 1;
- soptions |= PCRE_STUDY_JIT_COMPILE;
- }
-#endif
-
- /* If study option was specified, study the pattern and
- store the result in extra for passing to pcre_exec. */
- if (do_study) {
- php_pcre_mutex_lock();
- extra = pcre_study(re, soptions, &error);
- php_pcre_mutex_unlock();
- if (extra) {
- extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-#ifdef HAVE_PCRE_JIT_SUPPORT
- if (PCRE_G(jit) && jit_stack) {
- pcre_assign_jit_stack(extra, NULL, jit_stack);
+ rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
+ if (EXPECTED(rc >= 0)) {
+ size_t jit_size = 0;
+ if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
+ poptions |= PREG_JIT;
}
-#endif
- }
- if (error != NULL) {
- php_error_docref(NULL, E_WARNING, "Error while studying pattern");
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ } else {
+ pcre2_get_error_message(rc, error, sizeof(error));
+ php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
}
- } else {
- extra = NULL;
}
-
+#endif
efree(pattern);
/*
@@ -572,42 +768,42 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
* these are supposedly the oldest ones (but not necessarily the least used
* ones).
*/
- if (!pce && zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
+ if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
int num_clean = PCRE_CACHE_SIZE / 8;
zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
}
/* Store the compiled pattern and extra info in the cache. */
new_entry.re = re;
- new_entry.extra = extra;
new_entry.preg_options = poptions;
new_entry.compile_options = coptions;
+ new_entry.extra_compile_options = extra_coptions;
#if HAVE_SETLOCALE
new_entry.tables = tables;
#endif
new_entry.refcount = 0;
- rc = pcre_fullinfo(re, extra, PCRE_INFO_CAPTURECOUNT, &new_entry.capture_count);
+ rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
if (rc < 0) {
#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release(key);
}
#endif
- php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
- rc = pcre_fullinfo(re, extra, PCRE_INFO_NAMECOUNT, &new_entry.name_count);
+ rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
if (rc < 0) {
#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release(key);
}
#endif
- php_error_docref(NULL, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
- pcre_handle_exec_error(PCRE_ERROR_INTERNAL);
+ php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
+ pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
return NULL;
}
@@ -619,34 +815,34 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
* as hash keys especually for this table.
* See bug #63180
*/
- if (!ZSTR_IS_INTERNED(key) || !(GC_FLAGS(key) & IS_STR_PERMANENT)) {
- pce = zend_hash_str_update_mem(&PCRE_G(pcre_cache),
- ZSTR_VAL(key), ZSTR_LEN(key), &new_entry, sizeof(pcre_cache_entry));
+ if (!(GC_FLAGS(key) & IS_STR_PERMANENT)) {
+ zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
+
+ GC_MAKE_PERSISTENT_LOCAL(str);
#if HAVE_SETLOCALE
if (key != regex) {
zend_string_release(key);
}
#endif
- } else {
- pce = zend_hash_update_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
+ key = str;
}
- return pce;
+ return zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
}
/* }}} */
/* {{{ pcre_get_compiled_regex
*/
-PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options)
+PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options)
{
pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
- if (extra) {
- *extra = pce ? pce->extra : NULL;
- }
if (preg_options) {
*preg_options = pce ? pce->preg_options : 0;
}
+ if (capture_count) {
+ *capture_count = pce ? pce->capture_count : 0;
+ }
return pce ? pce->re : NULL;
}
@@ -654,33 +850,60 @@ PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int
/* {{{ pcre_get_compiled_regex_ex
*/
-PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options)
+PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
{
pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
- if (extra) {
- *extra = pce ? pce->extra : NULL;
- }
if (preg_options) {
*preg_options = pce ? pce->preg_options : 0;
}
if (compile_options) {
*compile_options = pce ? pce->compile_options : 0;
}
+ if (capture_count) {
+ *capture_count = pce ? pce->capture_count : 0;
+ }
return pce ? pce->re : NULL;
}
/* }}} */
+/* XXX For the cases where it's only about match yes/no and no capture
+ required, perhaps just a minimum sized data would suffice. */
+PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
+{/*{{{*/
+ int rc = 0;
+
+ assert(NULL != re);
+
+ if (!capture_count) {
+ /* As we deal with a non cached pattern, no other way to gather this info. */
+ rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
+ }
+
+ if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ return mdata;
+ }
+
+ return pcre2_match_data_create_from_pattern(re, gctx);
+}/*}}}*/
+
+PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
+{/*{{{*/
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
+ }
+}/*}}}*/
+
/* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
+static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, char *name, uint32_t unmatched_as_null)
{
zval match_pair, tmp;
array_init_size(&match_pair, 2);
/* Add (match, offset) to the return value */
- if (offset < 0) {
+ if (PCRE2_UNSET == offset) {
if (unmatched_as_null) {
ZVAL_NULL(&tmp);
} else {
@@ -720,49 +943,41 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ *
Z_PARAM_LONG(start_offset)
ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
- if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
- php_error_docref(NULL, E_WARNING, "Subject is too long");
- RETURN_FALSE;
- }
-
/* Compile regex or get it from cache. */
if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
RETURN_FALSE;
}
pce->refcount++;
- php_pcre_match_impl(pce, ZSTR_VAL(subject), (int)ZSTR_LEN(subject), return_value, subpats,
+ php_pcre_match_impl(pce, ZSTR_VAL(subject), ZSTR_LEN(subject), return_value, subpats,
global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
pce->refcount--;
}
/* }}} */
/* {{{ php_pcre_match_impl() */
-PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
- zval *subpats, int global, int use_flags, zend_long flags, zend_long start_offset)
+PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, size_t subject_len, zval *return_value,
+ zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
{
zval result_set, /* Holds a set of subpatterns after
a global match */
*match_sets = NULL; /* An array of sets of matches for each
subpattern after a global match */
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int no_utf_check = 0; /* Execution options */
+ uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
- int *offsets; /* Array of subpattern offsets */
- int num_subpats; /* Number of captured subpatterns */
- int size_offsets; /* Size of the offsets array */
+ PCRE2_SIZE *offsets; /* Array of subpattern offsets */
+ uint32_t num_subpats; /* Number of captured subpatterns */
int matched; /* Has anything matched */
- int g_notempty = 0; /* If the match should not be empty */
+ uint32_t g_notempty = 0; /* If the match should not be empty */
char **subpat_names; /* Array for named subpatterns */
- int i;
- int subpats_order; /* Order of subpattern matches */
- int offset_capture; /* Capture match offsets: yes/no */
- int unmatched_as_null; /* Null non-matches: yes/no */
- unsigned char *mark = NULL; /* Target for MARK name */
+ size_t i;
+ uint32_t subpats_order; /* Order of subpattern matches */
+ uint32_t offset_capture; /* Capture match offsets: yes/no */
+ uint32_t unmatched_as_null; /* Null non-matches: yes/no */
+ PCRE2_SPTR mark = NULL; /* Target for MARK name */
zval marks; /* Array of marks for PREG_PATTERN_ORDER */
-
- ALLOCA_FLAG(use_heap);
+ pcre2_match_data *match_data;
+ PCRE2_SIZE start_offset2;
ZVAL_UNDEF(&marks);
@@ -797,26 +1012,17 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
/* Negative offset counts from the end of the string. */
if (start_offset < 0) {
- start_offset = subject_len + start_offset;
- if (start_offset < 0) {
- start_offset = 0;
+ if ((PCRE2_SIZE)-start_offset <= subject_len) {
+ start_offset2 = subject_len + start_offset;
+ } else {
+ start_offset2 = 0;
}
+ } else {
+ start_offset2 = (PCRE2_SIZE)start_offset;
}
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-#ifdef PCRE_EXTRA_MARK
- extra->mark = &mark;
- extra->flags |= PCRE_EXTRA_MARK;
-#endif
-
/* Calculate the size of the offsets array, and allocate memory for it. */
num_subpats = pce->capture_count + 1;
- size_offsets = num_subpats * 3;
/*
* Build a mapping from subpattern numbers to their names. We will
@@ -830,12 +1036,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
- memset(offsets, 0, size_offsets*sizeof(int));
/* Allocate match sets array and initialize the values. */
if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
@@ -847,53 +1047,67 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
matched = 0;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
+
#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!(pce->compile_options & PCRE_UTF8)) {
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ if (!(pce->compile_options & PCRE2_UTF)) {
+ no_utf_check = PCRE2_NO_UTF_CHECK;
}
+
#endif
+ if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ if (subpat_names) {
+ efree(subpat_names);
+ }
+ if (match_sets) {
+ efree(match_sets);
+ }
+ RETURN_FALSE;
+ }
+ }
do {
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
+ if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
&& no_utf_check && !g_notempty) {
- if (start_offset < 0 || start_offset > subject_len) {
- pcre_handle_exec_error(PCRE_ERROR_BADOFFSET);
+ if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
+ pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
break;
}
- count = pcre_jit_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
- no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre_exec(pce->re, extra, subject, (int)subject_len, (int)start_offset,
- no_utf_check|g_notempty, offsets, size_offsets);
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
+ no_utf_check|g_notempty, match_data, mctx);
/* the string was already proved to be valid UTF-8 */
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ no_utf_check = PCRE2_NO_UTF_CHECK;
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
+ count = num_subpats;
}
/* If something has matched */
if (count > 0) {
matched++;
+ offsets = pcre2_get_ovector_pointer(match_data);
+
/* If subpatterns array has been passed, fill it in with values. */
if (subpats != NULL) {
/* Try to get the list of substrings and display a warning if failed. */
- if (offsets[1] - offsets[0] < 0) {
+ if (offsets[1] < offsets[0]) {
if (subpat_names) {
efree(subpat_names);
}
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
if (match_sets) efree(match_sets);
php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
RETURN_FALSE;
@@ -909,7 +1123,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(&match_sets[i]);
} else {
@@ -921,6 +1135,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
}
+ mark = pcre2_get_mark(match_data);
/* Add MARK, if available */
if (mark) {
if (Z_TYPE(marks) == IS_UNDEF) {
@@ -956,7 +1171,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_assoc_null(&result_set, subpat_names[i]);
} else {
@@ -967,7 +1182,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(&result_set);
} else {
@@ -987,7 +1202,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(&result_set);
} else {
@@ -1001,6 +1216,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
/* Add MARK, if available */
+ mark = pcre2_get_mark(match_data);
if (mark) {
add_assoc_string_ex(&result_set, "MARK", sizeof("MARK") - 1, (char *)mark);
}
@@ -1019,7 +1235,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_assoc_null(subpats, subpat_names[i]);
} else {
@@ -1030,7 +1246,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(subpats);
} else {
@@ -1051,7 +1267,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) {
+ if (PCRE2_UNSET == offsets[i<<1]) {
if (unmatched_as_null) {
add_next_index_null(subpats);
} else {
@@ -1065,6 +1281,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
/* Add MARK, if available */
+ mark = pcre2_get_mark(match_data);
if (mark) {
add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
}
@@ -1073,31 +1290,35 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
/* Advance to the next piece. */
- start_offset = offsets[1];
+ start_offset2 = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
+ This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
+ g_notempty = (start_offset2 == offsets[0]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
- } else if (count == PCRE_ERROR_NOMATCH) {
- /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
+ } else if (count == PCRE2_ERROR_NOMATCH) {
+ /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
- if (g_notempty != 0 && start_offset < subject_len) {
- int unit_len = calculate_unit_length(pce, subject + start_offset);
+ if (g_notempty != 0 && start_offset2 < subject_len) {
+ size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
- start_offset += unit_len;
+ start_offset2 += unit_len;
g_notempty = 0;
- } else
+ } else {
break;
+ }
} else {
pcre_handle_exec_error(count);
break;
}
} while (global);
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
+ }
/* Add the match sets to the output array and clean up */
if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
@@ -1122,11 +1343,6 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
}
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
if (subpat_names) {
efree(subpat_names);
}
@@ -1197,7 +1413,7 @@ static int preg_get_backref(char **str, int *backref)
/* {{{ preg_do_repl_func
*/
-static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark)
+static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, char **subpat_names, int count, const PCRE2_SPTR mark)
{
zend_string *result_str;
zval retval; /* Function return value */
@@ -1227,8 +1443,12 @@ static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cach
fci->no_separation = 0;
if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
- result_str = zval_get_string(&retval);
- zval_ptr_dtor(&retval);
+ if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
+ result_str = Z_STR(retval);
+ } else {
+ result_str = zval_get_string_func(&retval);
+ zval_ptr_dtor(&retval);
+ }
} else {
if (!EG(exception)) {
php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
@@ -1247,9 +1467,9 @@ static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cach
*/
PHPAPI zend_string *php_pcre_replace(zend_string *regex,
zend_string *subject_str,
- char *subject, int subject_len,
+ char *subject, size_t subject_len,
zend_string *replace_str,
- int limit, int *replace_count)
+ size_t limit, size_t *replace_count)
{
pcre_cache_entry *pce; /* Compiled regular expression */
zend_string *result; /* Function result */
@@ -1268,22 +1488,19 @@ PHPAPI zend_string *php_pcre_replace(zend_string *regex,
/* }}} */
/* {{{ php_pcre_replace_impl() */
-PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_string *replace_str, int limit, int *replace_count)
+PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
{
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int no_utf_check = 0; /* Execution options */
+ uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
- int *offsets; /* Array of subpattern offsets */
+ PCRE2_SIZE *offsets; /* Array of subpattern offsets */
char **subpat_names; /* Array for named subpatterns */
- int num_subpats; /* Number of captured subpatterns */
- int size_offsets; /* Size of the offsets array */
+ uint32_t num_subpats; /* Number of captured subpatterns */
size_t new_len; /* Length of needed storage */
size_t alloc_len; /* Actual allocated length */
- int match_len; /* Length of the current match */
+ size_t match_len; /* Length of the current match */
int backref; /* Backreference number */
- int start_offset; /* Where the new search starts */
- int g_notempty=0; /* If the match should not be empty */
+ PCRE2_SIZE start_offset; /* Where the new search starts */
+ uint32_t g_notempty=0; /* If the match should not be empty */
char *walkbuf, /* Location of current replacement in the result */
*walk, /* Used to walk the replacement string */
*match, /* The current match */
@@ -1292,16 +1509,7 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
walk_last; /* Last walked character */
size_t result_len; /* Length of result */
zend_string *result; /* Result of replacement */
-
- ALLOCA_FLAG(use_heap);
-
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
-
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
+ pcre2_match_data *match_data;
if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
@@ -1310,12 +1518,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
/* Calculate the size of the offsets array, and allocate memory for it. */
num_subpats = pce->capture_count + 1;
- size_offsets = num_subpats * 3;
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
/*
* Build a mapping from subpattern numbers to their names. We will
@@ -1325,11 +1527,6 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
if (UNEXPECTED(pce->name_count > 0)) {
subpat_names = make_subpats_table(num_subpats, pce);
if (!subpat_names) {
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
return NULL;
}
}
@@ -1344,40 +1541,50 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!(pce->compile_options & PCRE_UTF8)) {
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ if (!(pce->compile_options & PCRE2_UTF)) {
+ no_utf_check = PCRE2_NO_UTF_CHECK;
}
-#endif
-#ifdef PCRE_EXTRA_MARK
- extra->flags &= ~PCRE_EXTRA_MARK;
#endif
+ if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ if (subpat_names) {
+ efree(subpat_names);
+ }
+ return NULL;
+ }
+ }
while (1) {
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
+ if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
&& no_utf_check && !g_notempty) {
- count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset,
- no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
- no_utf_check|g_notempty, offsets, size_offsets);
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ no_utf_check|g_notempty, match_data, mctx);
/* the string was already proved to be valid UTF-8 */
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ no_utf_check = PCRE2_NO_UTF_CHECK;
/* Check for too many substrings condition. */
if (UNEXPECTED(count == 0)) {
php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = size_offsets / 3;
+ count = num_subpats;
}
piece = subject + start_offset;
- /* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
- if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) {
+ offsets = pcre2_get_ovector_pointer(match_data);
+
+ if (count > 0 && (offsets[1] >= offsets[0]) && limit > 0) {
zend_bool simple_string = 1;
if (replace_count) {
@@ -1469,18 +1676,18 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
start_offset = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
+ This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
+ g_notempty = (start_offset == offsets[0]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
- } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
- /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
+ } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
+ /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) {
- int unit_len = calculate_unit_length(pce, piece);
+ size_t unit_len = calculate_unit_length(pce, piece);
start_offset += unit_len;
memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
@@ -1516,12 +1723,10 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
break;
}
}
-
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
}
+
if (UNEXPECTED(subpat_names)) {
efree(subpat_names);
}
@@ -1531,36 +1736,25 @@ PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *su
/* }}} */
/* {{{ php_pcre_replace_func_impl() */
-static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, int subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, int limit, int *replace_count)
+static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count)
{
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int no_utf_check = 0; /* Execution options */
+ uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
- int *offsets; /* Array of subpattern offsets */
+ PCRE2_SIZE *offsets; /* Array of subpattern offsets */
char **subpat_names; /* Array for named subpatterns */
- int num_subpats; /* Number of captured subpatterns */
- int size_offsets; /* Size of the offsets array */
+ uint32_t num_subpats; /* Number of captured subpatterns */
size_t new_len; /* Length of needed storage */
size_t alloc_len; /* Actual allocated length */
- int start_offset; /* Where the new search starts */
- int g_notempty=0; /* If the match should not be empty */
+ PCRE2_SIZE start_offset; /* Where the new search starts */
+ uint32_t g_notempty=0; /* If the match should not be empty */
char *match, /* The current match */
*piece; /* The current piece of subject */
size_t result_len; /* Length of result */
- unsigned char *mark = NULL; /* Target for MARK name */
+ PCRE2_SPTR mark = NULL; /* Target for MARK name */
zend_string *result; /* Result of replacement */
zend_string *eval_result=NULL; /* Result of custom function */
-
- ALLOCA_FLAG(use_heap);
-
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
-
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
+ pcre2_match_data *match_data;
+ zend_bool old_mdata_used;
if (UNEXPECTED(pce->preg_options & PREG_REPLACE_EVAL)) {
php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
@@ -1569,12 +1763,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
/* Calculate the size of the offsets array, and allocate memory for it. */
num_subpats = pce->capture_count + 1;
- size_offsets = num_subpats * 3;
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
/*
* Build a mapping from subpattern numbers to their names. We will
@@ -1584,11 +1772,6 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
if (UNEXPECTED(pce->name_count > 0)) {
subpat_names = make_subpats_table(num_subpats, pce);
if (!subpat_names) {
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
return NULL;
}
}
@@ -1603,41 +1786,55 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!(pce->compile_options & PCRE_UTF8)) {
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ if (!(pce->compile_options & PCRE2_UTF)) {
+ no_utf_check = PCRE2_NO_UTF_CHECK;
}
-#endif
-#ifdef PCRE_EXTRA_MARK
- extra->mark = &mark;
- extra->flags |= PCRE_EXTRA_MARK;
#endif
+ old_mdata_used = mdata_used;
+ if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ mdata_used = 1;
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ if (subpat_names) {
+ efree(subpat_names);
+ }
+ mdata_used = old_mdata_used;
+ return NULL;
+ }
+ }
while (1) {
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
+ if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
&& no_utf_check && !g_notempty) {
- count = pcre_jit_exec(pce->re, extra, subject, subject_len, start_offset,
- no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
- no_utf_check|g_notempty, offsets, size_offsets);
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
+ no_utf_check|g_notempty, match_data, mctx);
/* the string was already proved to be valid UTF-8 */
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ no_utf_check = PCRE2_NO_UTF_CHECK;
/* Check for too many substrings condition. */
- if (UNEXPECTED(count == 0)) {
+ if (count == 0) {
php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = size_offsets / 3;
+ count = num_subpats;
}
piece = subject + start_offset;
+ offsets = pcre2_get_ovector_pointer(match_data);
+ mark = pcre2_get_mark(match_data);
+
/* if (EXPECTED(count > 0 && (limit == -1 || limit > 0))) */
- if (count > 0 && (offsets[1] - offsets[0] >= 0) && limit) {
+ if (count > 0 && (offsets[1] >= offsets[0]) && limit) {
if (replace_count) {
++*replace_count;
}
@@ -1663,12 +1860,12 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
if (match-piece > 0) {
/* copy the part of the string before the match */
memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
- result_len += (int)(match-piece);
+ result_len += (match-piece);
}
/* If using custom function, copy result to the buffer and clean up. */
memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
- result_len += (int)ZSTR_LEN(eval_result);
+ result_len += ZSTR_LEN(eval_result);
zend_string_release(eval_result);
if (limit) {
@@ -1679,18 +1876,18 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
start_offset = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
+ This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (start_offset == offsets[0]) ? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
+ g_notempty = (start_offset == offsets[0]) ? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
- } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
- /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
+ } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
+ /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) {
- int unit_len = calculate_unit_length(pce, piece);
+ size_t unit_len = calculate_unit_length(pce, piece);
start_offset += unit_len;
memcpy(ZSTR_VAL(result) + result_len, piece, unit_len);
@@ -1726,12 +1923,11 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
break;
}
}
-
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
}
+ mdata_used = old_mdata_used;
+
if (UNEXPECTED(subpat_names)) {
efree(subpat_names);
}
@@ -1745,7 +1941,7 @@ static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_strin
static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
zend_string *subject_str,
zend_fcall_info *fci, zend_fcall_info_cache *fcc,
- int limit, int *replace_count)
+ size_t limit, size_t *replace_count)
{
pcre_cache_entry *pce; /* Compiled regular expression */
zend_string *result; /* Function result */
@@ -1765,11 +1961,11 @@ static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
/* {{{ php_pcre_replace_array
*/
-static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, int limit, int *replace_count)
+static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
{
zval *regex_entry;
zend_string *result;
- zend_string *replace_str;
+ zend_string *replace_str, *tmp_replace_str;
if (Z_TYPE_P(replace) == IS_ARRAY) {
uint32_t replace_idx = 0;
@@ -1778,19 +1974,21 @@ static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend
/* For each entry in the regex array, get the entry */
ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
/* Make sure we're dealing with strings. */
- zend_string *regex_str = zval_get_string(regex_entry);
+ zend_string *tmp_regex_str;
+ zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
zval *zv;
/* Get current entry */
while (1) {
if (replace_idx == replace_ht->nNumUsed) {
replace_str = ZSTR_EMPTY_ALLOC();
+ tmp_replace_str = NULL;
break;
}
zv = &replace_ht->arData[replace_idx].val;
replace_idx++;
if (Z_TYPE_P(zv) != IS_UNDEF) {
- replace_str = zval_get_string(zv);
+ replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
break;
}
}
@@ -1800,12 +1998,12 @@ static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend
result = php_pcre_replace(regex_str,
subject_str,
ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str),
+ ZSTR_LEN(subject_str),
replace_str,
limit,
replace_count);
- zend_string_release(replace_str);
- zend_string_release(regex_str);
+ zend_tmp_string_release(tmp_replace_str);
+ zend_tmp_string_release(tmp_regex_str);
zend_string_release(subject_str);
subject_str = result;
if (UNEXPECTED(result == NULL)) {
@@ -1819,18 +2017,19 @@ static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend
/* For each entry in the regex array, get the entry */
ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
/* Make sure we're dealing with strings. */
- zend_string *regex_str = zval_get_string(regex_entry);
+ zend_string *tmp_regex_str;
+ zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
/* Do the actual replacement and put the result back into subject_str
for further replacements. */
result = php_pcre_replace(regex_str,
subject_str,
ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str),
+ ZSTR_LEN(subject_str),
replace_str,
limit,
replace_count);
- zend_string_release(regex_str);
+ zend_tmp_string_release(tmp_regex_str);
zend_string_release(subject_str);
subject_str = result;
@@ -1846,20 +2045,16 @@ static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend
/* {{{ php_replace_in_subject
*/
-static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, int limit, int *replace_count)
+static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
{
zend_string *result;
zend_string *subject_str = zval_get_string(subject);
- if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) {
- zend_string_release(subject_str);
- php_error_docref(NULL, E_WARNING, "Subject is too long");
- result = NULL;
- } else if (Z_TYPE_P(regex) != IS_ARRAY) {
+ if (Z_TYPE_P(regex) != IS_ARRAY) {
result = php_pcre_replace(Z_STR_P(regex),
subject_str,
ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str),
+ ZSTR_LEN(subject_str),
Z_STR_P(replace),
limit,
replace_count);
@@ -1877,17 +2072,12 @@ static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval
/* {{{ php_replace_in_subject_func
*/
-static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, int limit, int *replace_count)
+static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count)
{
zval *regex_entry;
zend_string *result;
zend_string *subject_str = zval_get_string(subject);
- if (UNEXPECTED(ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject_str)))) {
- php_error_docref(NULL, E_WARNING, "Subject is too long");
- return NULL;
- }
-
if (Z_TYPE_P(regex) != IS_ARRAY) {
result = php_pcre_replace_func(Z_STR_P(regex),
subject_str,
@@ -1902,7 +2092,8 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc
/* For each entry in the regex array, get the entry */
ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
/* Make sure we're dealing with strings. */
- zend_string *regex_str = zval_get_string(regex_entry);
+ zend_string *tmp_regex_str;
+ zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
/* Do the actual replacement and put the result back into subject_str
for further replacements. */
@@ -1911,7 +2102,7 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc
fci, fcc,
limit,
replace_count);
- zend_string_release(regex_str);
+ zend_tmp_string_release(tmp_regex_str);
zend_string_release(subject_str);
subject_str = result;
if (UNEXPECTED(result == NULL)) {
@@ -1926,10 +2117,10 @@ static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fc
/* {{{ preg_replace_func_impl
*/
-static int preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
+static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val)
{
zend_string *result;
- int replace_count = 0;
+ size_t replace_count = 0;
if (Z_TYPE_P(regex) != IS_ARRAY) {
convert_to_string_ex(regex);
@@ -1976,9 +2167,9 @@ static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
{
zval *regex, *replace, *subject, *zcount = NULL;
zend_long limit = -1;
- int replace_count = 0;
+ size_t replace_count = 0;
zend_string *result;
- int old_replace_count;
+ size_t old_replace_count;
/* Get function parameters and do error-checking. */
ZEND_PARSE_PARAMETERS_START(3, 5)
@@ -2073,7 +2264,7 @@ static PHP_FUNCTION(preg_replace_callback)
{
zval *regex, *replace, *subject, *zcount = NULL;
zend_long limit = -1;
- int replace_count;
+ size_t replace_count;
zend_fcall_info fci;
zend_fcall_info_cache fcc;
@@ -2114,7 +2305,7 @@ static PHP_FUNCTION(preg_replace_callback_array)
zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
zend_long limit = -1;
zend_string *str_idx;
- int replace_count = 0;
+ size_t replace_count = 0;
zend_fcall_info fci;
zend_fcall_info_cache fcc;
@@ -2202,18 +2393,13 @@ static PHP_FUNCTION(preg_split)
Z_PARAM_LONG(flags)
ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
- if (ZEND_SIZE_T_INT_OVFL(ZSTR_LEN(subject))) {
- php_error_docref(NULL, E_WARNING, "Subject is too long");
- RETURN_FALSE;
- }
-
/* Compile regex or get it from cache. */
if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
RETURN_FALSE;
}
pce->refcount++;
- php_pcre_split_impl(pce, subject, return_value, (int)limit_val, flags);
+ php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
pce->refcount--;
}
/* }}} */
@@ -2223,21 +2409,19 @@ static PHP_FUNCTION(preg_split)
PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
zend_long limit_val, zend_long flags)
{
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int *offsets; /* Array of subpattern offsets */
- int size_offsets; /* Size of the offsets array */
- int no_utf_check = 0; /* Execution options */
+ PCRE2_SIZE *offsets; /* Array of subpattern offsets */
+ uint32_t no_utf_check = 0; /* Execution options */
int count = 0; /* Count of matched subpatterns */
- int start_offset; /* Where the new search starts */
- int next_offset; /* End of the last delimiter match + 1 */
- int g_notempty = 0; /* If the match should not be empty */
+ PCRE2_SIZE start_offset; /* Where the new search starts */
+ PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
+ uint32_t g_notempty = 0; /* If the match should not be empty */
char *last_match; /* Location of last match */
- int no_empty; /* If NO_EMPTY flag is set */
- int delim_capture; /* If delimiters should be captured */
- int offset_capture; /* If offsets should be captured */
+ uint32_t no_empty; /* If NO_EMPTY flag is set */
+ uint32_t delim_capture; /* If delimiters should be captured */
+ uint32_t offset_capture; /* If offsets should be captured */
+ uint32_t num_subpats; /* Number of captured subpatterns */
zval tmp;
- ALLOCA_FLAG(use_heap);
+ pcre2_match_data *match_data;
no_empty = flags & PREG_SPLIT_NO_EMPTY;
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
@@ -2247,26 +2431,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
limit_val = -1;
}
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-#ifdef PCRE_EXTRA_MARK
- extra->flags &= ~PCRE_EXTRA_MARK;
-#endif
-
/* Initialize return value */
array_init(return_value);
/* Calculate the size of the offsets array, and allocate memory for it. */
- size_offsets = (pce->capture_count + 1) * 3;
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
+ num_subpats = pce->capture_count + 1;
/* Start at the beginning of the string */
start_offset = 0;
@@ -2275,41 +2444,51 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
#ifdef HAVE_PCRE_JIT_SUPPORT
- if (!(pce->compile_options & PCRE_UTF8)) {
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ if (!(pce->compile_options & PCRE2_UTF)) {
+ no_utf_check = PCRE2_NO_UTF_CHECK;
}
+
#endif
+ if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ return;
+ }
+ }
/* Get next piece if no limit or limit not yet reached and something matched*/
while ((limit_val == -1 || limit_val > 1)) {
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
+ if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
&& no_utf_check && !g_notempty) {
- count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
- ZSTR_LEN(subject_str), start_offset,
- no_utf_check|g_notempty, offsets, size_offsets, jit_stack);
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
- ZSTR_LEN(subject_str), start_offset,
- no_utf_check|g_notempty, offsets, size_offsets);
+ count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ no_utf_check|g_notempty, match_data, mctx);
/* the string was already proved to be valid UTF-8 */
- no_utf_check = PCRE_NO_UTF8_CHECK;
+ no_utf_check = PCRE2_NO_UTF_CHECK;
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
+ count = num_subpats;
}
+ offsets = pcre2_get_ovector_pointer(match_data);
+
/* If something matched */
- if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
+ if (count > 0 && (offsets[1] >= offsets[0])) {
if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
if (offset_capture) {
/* Add (match, offset) pair to the return value */
- add_offset_pair(return_value, last_match, (int)(&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
+ add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
} else {
/* Add the piece to the return value */
ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
@@ -2325,7 +2504,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
next_offset = offsets[1];
if (delim_capture) {
- int i, match_len;
+ size_t i, match_len;
for (i = 1; i < count; i++) {
match_len = offsets[(i<<1)+1] - offsets[i<<1];
/* If we have matched a delimiter */
@@ -2344,13 +2523,13 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
start_offset = offsets[1];
/* If we have matched an empty string, mimic what Perl's /g options does.
- This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
+ This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
- g_notempty = (start_offset == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
+ g_notempty = (start_offset == offsets[0])? PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED : 0;
- } else if (count == PCRE_ERROR_NOMATCH) {
- /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
+ } else if (count == PCRE2_ERROR_NOMATCH) {
+ /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
this is not necessarily the end. We need to advance
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
@@ -2365,9 +2544,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
break;
}
}
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
+ }
-
- start_offset = (int)(last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
+ start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
if (offset_capture) {
@@ -2383,14 +2564,6 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str,
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}
}
-
-
- /* Clean up */
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
- }
}
/* }}} */
@@ -2454,6 +2627,7 @@ static PHP_FUNCTION(preg_quote)
case '|':
case ':':
case '-':
+ case '#':
extra_len++;
break;
@@ -2503,6 +2677,7 @@ static PHP_FUNCTION(preg_quote)
case '|':
case ':':
case '-':
+ case '#':
*q++ = '\\';
*q++ = c;
break;
@@ -2559,46 +2734,37 @@ static PHP_FUNCTION(preg_grep)
PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
{
- zval *entry; /* An entry in the input array */
- pcre_extra *extra = pce->extra;/* Holds results of studying */
- pcre_extra extra_data; /* Used locally for exec options */
- int *offsets; /* Array of subpattern offsets */
- int size_offsets; /* Size of the offsets array */
+ zval *entry; /* An entry in the input array */
+ uint32_t num_subpats; /* Number of captured subpatterns */
int count = 0; /* Count of matched subpatterns */
- int no_utf_check = 0; /* Execution options */
+ uint32_t no_utf_check = 0; /* Execution options */
zend_string *string_key;
zend_ulong num_key;
zend_bool invert; /* Whether to return non-matching
entries */
- ALLOCA_FLAG(use_heap);
-
+ pcre2_match_data *match_data;
invert = flags & PREG_GREP_INVERT ? 1 : 0;
- if (extra == NULL) {
- extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- extra = &extra_data;
- }
- extra->match_limit = (unsigned long)PCRE_G(backtrack_limit);
- extra->match_limit_recursion = (unsigned long)PCRE_G(recursion_limit);
-#ifdef PCRE_EXTRA_MARK
- extra->flags &= ~PCRE_EXTRA_MARK;
-#endif
-
/* Calculate the size of the offsets array, and allocate memory for it. */
- size_offsets = (pce->capture_count + 1) * 3;
- if (size_offsets <= 32) {
- offsets = (int *)do_alloca(size_offsets * sizeof(int), use_heap);
- } else {
- offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
- }
+ num_subpats = pce->capture_count + 1;
/* Initialize return array */
array_init(return_value);
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
+ if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
+ match_data = mdata;
+ } else {
+ match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
+ if (!match_data) {
+ PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
+ return;
+ }
+ }
+
#ifdef HAVE_PCRE_JIT_SUPPORT
- no_utf_check = (pce->compile_options & PCRE_UTF8) ? 0 : PCRE_NO_UTF8_CHECK;
+ no_utf_check = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
#endif
/* Go through the input array */
@@ -2607,32 +2773,28 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
/* Perform the match */
#ifdef HAVE_PCRE_JIT_SUPPORT
- if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)
+ if (PCRE_G(jit) && (pce->preg_options & PREG_JIT)
&& no_utf_check) {
- count = pcre_jit_exec(pce->re, extra, ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str), 0,
- no_utf_check, offsets, size_offsets, jit_stack);
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
+ PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre_exec(pce->re, extra, ZSTR_VAL(subject_str),
- (int)ZSTR_LEN(subject_str), 0,
- no_utf_check, offsets, size_offsets);
+ count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
+ no_utf_check, match_data, mctx);
/* Check for too many substrings condition. */
if (count == 0) {
php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
- count = size_offsets/3;
- } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
+ count = num_subpats;
+ } else if (count < 0 && count != PCRE2_ERROR_NOMATCH) {
pcre_handle_exec_error(count);
zend_string_release(subject_str);
break;
}
/* If the entry fits our requirements */
- if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
- if (Z_REFCOUNTED_P(entry)) {
- Z_ADDREF_P(entry);
- }
+ if ((count > 0 && !invert) || (count == PCRE2_ERROR_NOMATCH && invert)) {
+ Z_TRY_ADDREF_P(entry);
/* Add to return array */
if (string_key) {
@@ -2644,12 +2806,8 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
zend_string_release(subject_str);
} ZEND_HASH_FOREACH_END();
-
- /* Clean up */
- if (size_offsets <= 32) {
- free_alloca(offsets, use_heap);
- } else {
- efree(offsets);
+ if (match_data != mdata) {
+ pcre2_match_data_free(match_data);
}
}
/* }}} */
@@ -2770,6 +2928,40 @@ ZEND_GET_MODULE(pcre)
/* }}} */
+PHPAPI pcre2_match_context *php_pcre_mctx(void)
+{/*{{{*/
+ return mctx;
+}/*}}}*/
+
+PHPAPI pcre2_general_context *php_pcre_gctx(void)
+{/*{{{*/
+ return gctx;
+}/*}}}*/
+
+PHPAPI pcre2_compile_context *php_pcre_cctx(void)
+{/*{{{*/
+ return cctx;
+}/*}}}*/
+
+PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
+{/*{{{*/
+ assert(NULL != pce);
+ pce->refcount++;
+}/*}}}*/
+
+PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
+{/*{{{*/
+ assert(NULL != pce);
+ assert(0 != pce->refcount);
+ pce->refcount--;
+}/*}}}*/
+
+PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
+{/*{{{*/
+ assert(NULL != pce);
+ return pce->re;
+}/*}}}*/
+
#endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
/*