summaryrefslogtreecommitdiff
path: root/ext/pcre/php_pcre.c
diff options
context:
space:
mode:
authorNicolas Grekas <nicolas.grekas@gmail.com>2017-05-16 12:46:32 +0200
committerRemi Collet <remi@php.net>2017-05-29 11:40:29 +0200
commit3c43510b19a94c41d3c2a724664fcd1aa9f63d14 (patch)
tree3064c58f6c33ce660aad8b73f5153bb3badcce2a /ext/pcre/php_pcre.c
parent0f20970216df909ac0e46a55398decc5270643d4 (diff)
downloadphp-git-3c43510b19a94c41d3c2a724664fcd1aa9f63d14.tar.gz
add PREG_UNMATCHED_AS_NULL flag to allow distinguish between unmatched subpatterns and empty matches
Diffstat (limited to 'ext/pcre/php_pcre.c')
-rw-r--r--ext/pcre/php_pcre.c66
1 files changed, 37 insertions, 29 deletions
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index 324a4acbfb..c7d6507d77 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -33,6 +33,7 @@
#define PREG_PATTERN_ORDER 1
#define PREG_SET_ORDER 2
#define PREG_OFFSET_CAPTURE (1<<8)
+#define PREG_UNMATCHED_AS_NULL (1<<9)
#define PREG_SPLIT_NO_EMPTY (1<<0)
#define PREG_SPLIT_DELIM_CAPTURE (1<<1)
@@ -188,6 +189,7 @@ static PHP_MINIT_FUNCTION(pcre)
REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
@@ -639,14 +641,14 @@ PHPAPI pcre* pcre_get_compiled_regex_ex(zend_string *regex, pcre_extra **extra,
/* }}} */
/* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
+static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name, int unmatched_as_null)
{
zval match_pair, tmp;
array_init_size(&match_pair, 2);
/* Add (match, offset) to the return value */
- if (offset < 0) { /* unset substring */
+ if (unmatched_as_null && offset < 0) {
ZVAL_NULL(&tmp);
} else {
ZVAL_STRINGL(&tmp, str, len);
@@ -705,7 +707,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
{
zval result_set, /* Holds a set of subpatterns after
a global match */
- *match_sets = NULL; /* An array of sets of matches for each
+ *match_sets = NULL; /* An array of sets of matches for each
subpattern after a global match */
pcre_extra *extra = pce->extra;/* Holds results of studying */
pcre_extra extra_data; /* Used locally for exec options */
@@ -720,9 +722,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
char **subpat_names; /* Array for named subpatterns */
int i;
int subpats_order; /* Order of subpattern matches */
- int offset_capture; /* Capture match offsets: yes/no */
- unsigned char *mark = NULL; /* Target for MARK name */
- zval marks; /* Array of marks for PREG_PATTERN_ORDER */
+ int offset_capture; /* Capture match offsets: yes/no */
+ int unmatched_as_null; /* Null non-matches: yes/no */
+ unsigned char *mark = NULL; /* Target for MARK name */
+ zval marks; /* Array of marks for PREG_PATTERN_ORDER */
ALLOCA_FLAG(use_heap);
ZVAL_UNDEF(&marks);
@@ -737,6 +740,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (use_flags) {
offset_capture = flags & PREG_OFFSET_CAPTURE;
+ unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
/*
* subpats_order is pre-set to pattern mode so we change it only if
@@ -752,6 +756,7 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
} else {
offset_capture = 0;
+ unmatched_as_null = 0;
}
/* Negative offset counts from the end of the string. */
@@ -847,11 +852,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&match_sets[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&match_sets[i]);
} else {
add_next_index_stringl(&match_sets[i], (char *)stringlist[i],
@@ -869,11 +874,15 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
/*
* If the number of captured subpatterns on this run is
* less than the total possible number, pad the result
- * arrays with NULLs.
+ * arrays with NULLs or empty strings.
*/
if (count < num_subpats) {
for (; i < num_subpats; i++) {
- add_next_index_null(&match_sets[i]);
+ if (unmatched_as_null) {
+ add_next_index_null(&match_sets[i]);
+ } else {
+ add_next_index_string(&match_sets[i], "");
+ }
}
}
} else {
@@ -885,19 +894,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&result_set, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i], unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
- if (offsets[i<<1] < 0) { /* unset substring */
- add_assoc_null(&result_set, subpat_names[i]);
- } else {
+ if (unmatched_as_null && offsets[i<<1] < 0) {
+ add_assoc_null(&result_set, subpat_names[i]);
+ } else {
add_assoc_stringl(&result_set, subpat_names[i], (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1]);
- }
+ offsets[(i<<1)+1] - offsets[i<<1]);
+ }
}
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&result_set);
} else {
add_next_index_stringl(&result_set, (char *)stringlist[i],
@@ -909,11 +918,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
if (offset_capture) {
for (i = 0; i < count; i++) {
add_offset_pair(&result_set, (char *)stringlist[i],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
+ offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(&result_set);
} else {
add_next_index_stringl(&result_set, (char *)stringlist[i],
@@ -936,19 +945,19 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
for (i = 0; i < count; i++) {
add_offset_pair(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], subpat_names[i]);
+ offsets[i<<1], subpat_names[i], unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
if (subpat_names[i]) {
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_assoc_null(subpats, subpat_names[i]);
} else {
add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1]);
}
}
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(subpats);
} else {
add_next_index_stringl(subpats, (char *)stringlist[i],
@@ -961,11 +970,11 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
for (i = 0; i < count; i++) {
add_offset_pair(subpats, (char *)stringlist[i],
offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], NULL);
+ offsets[i<<1], NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
- if (offsets[i<<1] < 0) { /* unset substring */
+ if (unmatched_as_null && offsets[i<<1] < 0) {
add_next_index_null(subpats);
} else {
add_next_index_stringl(subpats, (char *)stringlist[i],
@@ -1869,7 +1878,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
if (offset_capture) {
/* Add (match, offset) pair to the return value */
- add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL);
+ add_offset_pair(return_value, last_match, (int)(&subject[offsets[0]]-last_match), next_offset, NULL, 0);
} else {
/* Add the piece to the return value */
ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
@@ -1891,7 +1900,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
/* If we have matched a delimiter */
if (!no_empty || match_len > 0) {
if (offset_capture) {
- add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
+ add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
} else {
ZVAL_STRINGL(&tmp, &subject[offsets[i<<1]], match_len);
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
@@ -1928,11 +1937,10 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
start_offset = (int)(last_match - subject); /* the offset might have been incremented, but without further successful matches */
- if (!no_empty || start_offset < subject_len)
- {
+ if (!no_empty || start_offset < subject_len) {
if (offset_capture) {
/* Add the last (match, offset) pair to the return value */
- add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
+ add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL, 0);
} else {
/* Add the last piece to the return value */
ZVAL_STRINGL(&tmp, last_match, subject + subject_len - last_match);