diff options
author | Nicolas Grekas <nicolas.grekas@gmail.com> | 2017-05-16 12:46:32 +0200 |
---|---|---|
committer | Remi Collet <remi@php.net> | 2017-05-29 11:40:29 +0200 |
commit | 3c43510b19a94c41d3c2a724664fcd1aa9f63d14 (patch) | |
tree | 3064c58f6c33ce660aad8b73f5153bb3badcce2a /ext/pcre/php_pcre.c | |
parent | 0f20970216df909ac0e46a55398decc5270643d4 (diff) | |
download | php-git-3c43510b19a94c41d3c2a724664fcd1aa9f63d14.tar.gz |
add PREG_UNMATCHED_AS_NULL flag to allow distinguish between unmatched subpatterns and empty matches
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r-- | ext/pcre/php_pcre.c | 66 |
1 files changed, 37 insertions, 29 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 324a4acbfb..c7d6507d77 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -33,6 +33,7 @@ #define PREG_PATTERN_ORDER 1 #define PREG_SET_ORDER 2 #define PREG_OFFSET_CAPTURE (1<<8) +#define PREG_UNMATCHED_AS_NULL (1<<9) #define PREG_SPLIT_NO_EMPTY (1<<0) #define PREG_SPLIT_DELIM_CAPTURE (1<<1) @@ -188,6 +189,7 @@ static PHP_MINIT_FUNCTION(pcre) REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT); REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT); @@ -639,14 +641,14 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra, /* }}} */ /* {{{ add_offset_pair */ -static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name) +static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null) { zval match_pair, tmp; array_init_size(&match_pair, 2); /* Add (match, offset) to the return value */ - if (offset < 0) { /* unset substring */ + if (unmatched_as_null && offset < 0) { ZVAL_NULL(&tmp); } else { ZVAL_STRINGL(&tmp, str, len); @@ -705,7 +707,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec { zval result_set, /* Holds a set of subpatterns after a global match */ - *match_sets = NULL; /* An array of sets of matches for each + *match_sets = NULL; /* An array of sets of matches for each subpattern after a global match */ pcre_extra *extra = pce->extra;/* Holds results of studying */ pcre_extra extra_data; /* Used locally for exec options */ @@ -720,9 +722,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec char **subpat_names; /* Array for named subpatterns */ int i; int subpats_order; /* Order of subpattern matches */ - int offset_capture; /* Capture match offsets: yes/no */ - unsigned char *mark = NULL; /* Target for MARK name */ - zval marks; /* Array of marks for PREG_PATTERN_ORDER */ + int offset_capture; /* Capture match offsets: yes/no */ + int unmatched_as_null; /* Null non-matches: yes/no */ + unsigned char *mark = NULL; /* Target for MARK name */ + zval marks; /* Array of marks for PREG_PATTERN_ORDER */ ALLOCA_FLAG(use_heap); ZVAL_UNDEF(&marks); @@ -737,6 +740,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (use_flags) { offset_capture = flags & PREG_OFFSET_CAPTURE; + unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL; /* * subpats_order is pre-set to pattern mode so we change it only if @@ -752,6 +756,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } } else { offset_capture = 0; + unmatched_as_null = 0; } /* Negative offset counts from the end of the string. */ @@ -847,11 +852,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair(&match_sets[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null); } } else { for (i = 0; i < count; i++) { - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(&match_sets[i]); } else { add_next_index_stringl(&match_sets[i], (char *)stringlist[i], @@ -869,11 +874,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec /* * If the number of captured subpatterns on this run is * less than the total possible number, pad the result - * arrays with NULLs. + * arrays with NULLs or empty strings. */ if (count < num_subpats) { for (; i < num_subpats; i++) { - add_next_index_null(&match_sets[i]); + if (unmatched_as_null) { + add_next_index_null(&match_sets[i]); + } else { + add_next_index_string(&match_sets[i], ""); + } } } } else { @@ -885,19 +894,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair(&result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]); + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null); } } else { for (i = 0; i < count; i++) { if (subpat_names[i]) { - if (offsets[i<<1] < 0) { /* unset substring */ - add_assoc_null(&result_set, subpat_names[i]); - } else { + if (unmatched_as_null && offsets[i<<1] < 0) { + add_assoc_null(&result_set, subpat_names[i]); + } else { add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1]); - } + offsets[(i<<1)+1] - offsets[i<<1]); + } } - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(&result_set); } else { add_next_index_stringl(&result_set, (char *)stringlist[i], @@ -909,11 +918,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { for (i = 0; i < count; i++) { add_offset_pair(&result_set, (char *)stringlist[i], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL); + offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null); } } else { for (i = 0; i < count; i++) { - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(&result_set); } else { add_next_index_stringl(&result_set, (char *)stringlist[i], @@ -936,19 +945,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec for (i = 0; i < count; i++) { add_offset_pair(subpats, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], subpat_names[i]); + offsets[i<<1], subpat_names[i], unmatched_as_null); } } else { for (i = 0; i < count; i++) { if (subpat_names[i]) { - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_assoc_null(subpats, subpat_names[i]); } else { add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1]); } } - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(subpats); } else { add_next_index_stringl(subpats, (char *)stringlist[i], @@ -961,11 +970,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec for (i = 0; i < count; i++) { add_offset_pair(subpats, (char *)stringlist[i], offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], NULL); + offsets[i<<1], NULL, unmatched_as_null); } } else { for (i = 0; i < count; i++) { - if (offsets[i<<1] < 0) { /* unset substring */ + if (unmatched_as_null && offsets[i<<1] < 0) { add_next_index_null(subpats); } else { add_next_index_stringl(subpats, (char *)stringlist[i], @@ -1869,7 +1878,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec if (offset_capture) { /* Add (match, offset) pair to the return value */ - add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL); + add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL, 0); } else { /* Add the piece to the return value */ ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match); @@ -1891,7 +1900,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec /* If we have matched a delimiter */ if (!no_empty || match_len > 0) { if (offset_capture) { - add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL); + add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0); } else { ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len); zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); @@ -1928,11 +1937,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */ - if (!no_empty || start_offset < subject_len) - { + if (!no_empty || start_offset < subject_len) { if (offset_capture) { /* Add the last (match, offset) pair to the return value */ - add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL); + add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, 0); } else { /* Add the last piece to the return value */ ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match); |