From f4cfaf36e23ca47da3e352e1c60909104c059647 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Mon, 10 Feb 2014 10:04:30 +0400 Subject: Use better data structures (incomplete) --- ext/pcre/php_pcre.c | 258 +++++++++++++++++++++++++--------------------------- 1 file changed, 122 insertions(+), 136 deletions(-) (limited to 'ext/pcre/php_pcre.c') diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 84ad12311d..a8976eeede 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -86,9 +86,9 @@ static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */ } /* }}} */ -static void php_free_pcre_cache(void *data) /* {{{ */ +static void php_free_pcre_cache(zval *data) /* {{{ */ { - pcre_cache_entry *pce = (pcre_cache_entry *) data; + pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data); if (!pce) return; pefree(pce->re, 1); if (pce->extra) pefree(pce->extra, 1); @@ -167,7 +167,7 @@ static PHP_MSHUTDOWN_FUNCTION(pcre) /* }}} */ /* {{{ static pcre_clean_cache */ -static int pcre_clean_cache(void *data, void *arg TSRMLS_DC) +static int pcre_clean_cache(zval *data, void *arg TSRMLS_DC) { int *num_clean = (int *)arg; @@ -226,7 +226,7 @@ static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_D /* {{{ pcre_get_compiled_regex_cache */ -PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC) +PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex TSRMLS_DC) { pcre *re = NULL; pcre_extra *extra; @@ -248,7 +248,6 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le #endif pcre_cache_entry *pce; pcre_cache_entry new_entry; - char *tmp = NULL; #if HAVE_SETLOCALE # if defined(PHP_WIN32) && defined(ZTS) @@ -259,7 +258,8 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le /* Try to lookup the cached regex entry, and if successful, just pass back the compiled pattern, otherwise go on and compile it. */ - if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) { + pce = zend_hash_find_ptr(&PCRE_G(pcre_cache), regex); + if (pce) { /* * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it * is, we flush it and compile the pattern from scratch. @@ -277,14 +277,14 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le } } - p = regex; + p = regex->val; /* Parse through the leading whitespace, and display a warning if we get to the end without encountering a delimiter. */ while (isspace((int)*(unsigned char *)p)) p++; if (*p == 0) { php_error_docref(NULL TSRMLS_CC, E_WARNING, - p < regex + regex_len ? "Null byte in regex" : "Empty regular expression"); + p < regex->val + regex->len ? "Null byte in regex" : "Empty regular expression"); return NULL; } @@ -331,7 +331,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le } if (*pp == 0) { - if (pp < regex + regex_len) { + if (pp < regex->val + regex->len) { php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex"); } else if (start_delimiter == end_delimiter) { php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter); @@ -349,7 +349,7 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le /* Parse through the options, setting appropriate flags. Display a warning if we encounter an unknown modifier. */ - while (pp < regex + regex_len) { + while (pp < regex->val + regex->len) { switch (*pp++) { /* Perl compatible options */ case 'i': coptions |= PCRE_CASELESS; break; @@ -455,16 +455,12 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le * as hash keys especually for this table. * See bug #63180 */ - if (IS_INTERNED(regex)) { - regex = tmp = estrndup(regex, regex_len); - } + STR_ADDREF(regex); - zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry, - sizeof(pcre_cache_entry), (void**)&pce); + pce = zend_hash_update_mem(&PCRE_G(pcre_cache), regex, (void *)&new_entry, + sizeof(pcre_cache_entry)); - if (tmp) { - efree(tmp); - } + STR_RELEASE(regex); return pce; } @@ -472,9 +468,9 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le /* {{{ pcre_get_compiled_regex */ -PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC) +PHPAPI pcre* pcre_get_compiled_regex(zend_string *regex, pcre_extra **extra, int *preg_options TSRMLS_DC) { - pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); + pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC); if (extra) { *extra = pce ? pce->extra : NULL; @@ -489,9 +485,9 @@ PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_ /* {{{ pcre_get_compiled_regex_ex */ -PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC) +PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC) { - pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC); + pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC); if (extra) { *extra = pce ? pce->extra : NULL; @@ -510,43 +506,40 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *pr /* {{{ add_offset_pair */ static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name) { - zval *match_pair; + zval match_pair; - ALLOC_ZVAL(match_pair); - array_init(match_pair); - INIT_PZVAL(match_pair); + array_init(&match_pair); /* Add (match, offset) to the return value */ - add_next_index_stringl(match_pair, str, len, 1); - add_next_index_long(match_pair, offset); + add_next_index_stringl(&match_pair, str, len, 1); + add_next_index_long(&match_pair, offset); if (name) { zval_add_ref(&match_pair); - zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL); + zend_hash_str_update(Z_ARRVAL_P(result), name, strlen(name), &match_pair); } - zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL); + zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair); } /* }}} */ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */ { /* parameters */ - char *regex; /* Regular expression */ + zend_string *regex; /* Regular expression */ char *subject; /* String to match against */ - int regex_len; int subject_len; pcre_cache_entry *pce; /* Compiled regular expression */ zval *subpats = NULL; /* Array for subpatterns */ long flags = 0; /* Match control flags */ long start_offset = 0; /* Where the new search starts */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", ®ex, ®ex_len, + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Ss|zll", ®ex, &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) { RETURN_FALSE; } /* Compile regex or get it from cache. */ - if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) { RETURN_FALSE; } @@ -559,9 +552,9 @@ static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ * PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC) { - zval *result_set, /* Holds a set of subpatterns after + zval result_set, /* Holds a set of subpatterns after a global match */ - **match_sets = NULL; /* An array of sets of matches for each + *match_sets = NULL; /* An array of sets of matches for each subpattern after a global match */ pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ @@ -643,11 +636,9 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec /* Allocate match sets array and initialize the values. */ if (global && subpats && subpats_order == PREG_PATTERN_ORDER) { - match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0); + match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0); for (i=0; i old_replace_count) { /* Add to return array */ - switch(zend_hash_get_current_key_ex(Z_ARRVAL_PP(subject), &string_key, &string_key_len, &num_key, 0, NULL)) + switch(zend_hash_get_current_key_ex(Z_ARRVAL_P(subject), &string_key, &num_key, 0, NULL)) { case HASH_KEY_IS_STRING: - add_assoc_stringl_ex(return_value, string_key, string_key_len, result, result_len, 0); +//??? + add_assoc_stringl_ex(return_value, string_key->val, string_key->len, result, result_len, 0); break; case HASH_KEY_IS_LONG: @@ -1410,21 +1395,22 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl } } - zend_hash_move_forward(Z_ARRVAL_PP(subject)); + zend_hash_move_forward(Z_ARRVAL_P(subject)); } } else { /* if subject is not an array */ old_replace_count = replace_count; - if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { + if ((result = php_replace_in_subject(regex, replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { if (!is_filter || replace_count > old_replace_count) { - RETVAL_STRINGL(result, result_len, 0); +//??? RETVAL_STRINGL(result, result_len, 0); + RETVAL_STRINGL(result, result_len); } else { efree(result); } } } if (ZEND_NUM_ARGS() > 4) { - zval_dtor(*zcount); - ZVAL_LONG(*zcount, replace_count); + zval_dtor(zcount); + ZVAL_LONG(zcount, replace_count); } } @@ -1458,22 +1444,21 @@ static PHP_FUNCTION(preg_filter) Split string into an array using a perl-style regular expression as a delimiter */ static PHP_FUNCTION(preg_split) { - char *regex; /* Regular expression */ + zend_string *regex; /* Regular expression */ char *subject; /* String to match against */ - int regex_len; int subject_len; long limit_val = -1;/* Integer value of limit */ long flags = 0; /* Match control flags */ pcre_cache_entry *pce; /* Compiled regular expression */ /* Get function parameters and do error checking */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len, + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Ss|ll", ®ex, &subject, &subject_len, &limit_val, &flags) == FAILURE) { RETURN_FALSE; } /* Compile regex or get it from cache. */ - if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) { RETURN_FALSE; } @@ -1597,8 +1582,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec if (pce->compile_options & PCRE_UTF8) { if (re_bump == NULL) { int dummy; - - if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) { + zend_string *regex = STR_INIT("/./us", sizeof("/./us")-1, 0); + re_bump = pcre_get_compiled_regex(regex, &extra_bump, &dummy TSRMLS_CC); + STR_RELEASE(regex); + if (re_bump == NULL) { RETURN_FALSE; } } @@ -1733,7 +1720,8 @@ static PHP_FUNCTION(preg_quote) *q = '\0'; /* Reallocate string and return it */ - RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0); +//??? RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0); + RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str); } /* }}} */ @@ -1741,20 +1729,19 @@ static PHP_FUNCTION(preg_quote) Searches array and returns entries which match regex */ static PHP_FUNCTION(preg_grep) { - char *regex; /* Regular expression */ - int regex_len; + zend_string *regex; /* Regular expression */ zval *input; /* Input array */ long flags = 0; /* Match control flags */ pcre_cache_entry *pce; /* Compiled regular expression */ /* Get arguments and do error checking */ - if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len, + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Sa|l", ®ex, &input, &flags) == FAILURE) { return; } /* Compile regex or get it from cache. */ - if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) { + if ((pce = pcre_get_compiled_regex_cache(regex TSRMLS_CC)) == NULL) { RETURN_FALSE; } @@ -1764,14 +1751,13 @@ static PHP_FUNCTION(preg_grep) PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */ { - zval **entry; /* An entry in the input array */ + zval *entry; /* An entry in the input array */ pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ int *offsets; /* Array of subpattern offsets */ int size_offsets; /* Size of the offsets array */ int count = 0; /* Count of matched subpatterns */ - char *string_key; - uint string_key_len; + zend_string *string_key; ulong num_key; zend_bool invert; /* Whether to return non-matching entries */ @@ -1802,10 +1788,12 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* Go through the input array */ zend_hash_internal_pointer_reset(Z_ARRVAL_P(input)); - while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) { - zval subject = **entry; + while ((entry = zend_hash_get_current_data(Z_ARRVAL_P(input))) != NULL) { + zval subject; + + ZVAL_COPY_VALUE(&subject, entry); - if (Z_TYPE_PP(entry) != IS_STRING) { + if (Z_TYPE_P(entry) != IS_STRING) { zval_copy_ctor(&subject); convert_to_string(&subject); } @@ -1827,24 +1815,22 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return /* If the entry fits our requirements */ if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) { - Z_ADDREF_PP(entry); + Z_ADDREF_P(entry); /* Add to return array */ - switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL)) + switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &num_key, 0, NULL)) { case HASH_KEY_IS_STRING: - zend_hash_update(Z_ARRVAL_P(return_value), string_key, - string_key_len, entry, sizeof(zval *), NULL); + zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry); break; case HASH_KEY_IS_LONG: - zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry, - sizeof(zval *), NULL); + zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry); break; } } - if (Z_TYPE_PP(entry) != IS_STRING) { + if (Z_TYPE_P(entry) != IS_STRING) { zval_dtor(&subject); } -- cgit v1.2.1