summaryrefslogtreecommitdiff
path: root/ext/pcre
diff options
context:
space:
mode:
Diffstat (limited to 'ext/pcre')
-rw-r--r--ext/pcre/pcrelib/ChangeLog4
-rw-r--r--ext/pcre/pcrelib/HACKING2
-rw-r--r--ext/pcre/pcrelib/README2
-rw-r--r--ext/pcre/pcrelib/doc/pcre.txt2
-rw-r--r--ext/pcre/pcrelib/pcre_exec.c2
-rw-r--r--ext/pcre/pcrelib/pcre_study.c2
-rw-r--r--ext/pcre/pcrelib/pcredemo.c4
-rw-r--r--ext/pcre/php_pcre.c54
-rw-r--r--ext/pcre/tests/marks.phpt202
9 files changed, 259 insertions, 15 deletions
diff --git a/ext/pcre/pcrelib/ChangeLog b/ext/pcre/pcrelib/ChangeLog
index 359b412958..ff71273e21 100644
--- a/ext/pcre/pcrelib/ChangeLog
+++ b/ext/pcre/pcrelib/ChangeLog
@@ -1809,7 +1809,7 @@ Version 8.10 25-Jun-2010
7. Minor change to pcretest.c to avoid a compiler warning.
-8. Added four artifical Unicode properties to help with an option to make
+8. Added four artificial Unicode properties to help with an option to make
\s etc use properties (see next item). The new properties are: Xan
(alphanumeric), Xsp (Perl space), Xps (POSIX space), and Xwd (word).
@@ -4434,7 +4434,7 @@ Version 4.3 21-May-03
(i) The utf8_table... variables are now declared "const".
(ii) The code for \cx, which used the "case flipping" table to upper case
- lower case letters, now just substracts 32. This is ASCII-specific,
+ lower case letters, now just subtracts 32. This is ASCII-specific,
but the whole concept of \cx is ASCII-specific, so it seems
reasonable.
diff --git a/ext/pcre/pcrelib/HACKING b/ext/pcre/pcrelib/HACKING
index 691b7a14e5..8395504212 100644
--- a/ext/pcre/pcrelib/HACKING
+++ b/ext/pcre/pcrelib/HACKING
@@ -360,7 +360,7 @@ reference number if the reference is to a unique capturing group (either by
number or by name). When named groups are used, there may be more than one
group with the same name. In this case, a reference by name generates OP_DNREF
or OP_DNREFI. These are followed by two counts: the index (not the byte offset)
-in the group name table of the first entry for the requred name, followed by
+in the group name table of the first entry for the required name, followed by
the number of groups with the same name.
diff --git a/ext/pcre/pcrelib/README b/ext/pcre/pcrelib/README
index 4887ebf350..7a6ddff5c7 100644
--- a/ext/pcre/pcrelib/README
+++ b/ext/pcre/pcrelib/README
@@ -403,7 +403,7 @@ library. They are also documented in the pcrebuild man page.
avoided by linking with libedit (which has a BSD licence) instead.
Enabling libreadline causes the -lreadline option to be added to the pcretest
- build. In many operating environments with a sytem-installed readline
+ build. In many operating environments with a system-installed readline
library this is sufficient. However, in some environments (e.g. if an
unmodified distribution version of readline is in use), it may be necessary
to specify something like LIBS="-lncurses" as well. This is because, to quote
diff --git a/ext/pcre/pcrelib/doc/pcre.txt b/ext/pcre/pcrelib/doc/pcre.txt
index ce27f4b3e0..40523687c0 100644
--- a/ext/pcre/pcrelib/doc/pcre.txt
+++ b/ext/pcre/pcrelib/doc/pcre.txt
@@ -1242,7 +1242,7 @@ PCRETEST OPTION FOR LIBREADLINE SUPPORT
pcretest linked in this way, there may be licensing issues.
Setting this option causes the -lreadline option to be added to the
- pcretest build. In many operating environments with a sytem-installed
+ pcretest build. In many operating environments with a system-installed
libreadline this is sufficient. However, in some environments (e.g. if
an unmodified distribution version of readline is in use), some extra
configuration may be necessary. The INSTALL file for libreadline says
diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c
index 3942076490..7efdc443c9 100644
--- a/ext/pcre/pcrelib/pcre_exec.c
+++ b/ext/pcre/pcrelib/pcre_exec.c
@@ -1040,7 +1040,7 @@ for (;;)
the result of a recursive call to match() whatever happened so it was
possible to reduce stack usage by turning this into a tail recursion,
except in the case of a possibly empty group. However, now that there is
- the possiblity of (*THEN) occurring in the final alternative, this
+ the possibility of (*THEN) occurring in the final alternative, this
optimization is no longer always possible.
We can optimize if we know there are no (*THEN)s in the pattern; at present
diff --git a/ext/pcre/pcrelib/pcre_study.c b/ext/pcre/pcrelib/pcre_study.c
index 998fe2325e..88df2a0064 100644
--- a/ext/pcre/pcrelib/pcre_study.c
+++ b/ext/pcre/pcrelib/pcre_study.c
@@ -1050,7 +1050,7 @@ do
tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
break;
- /* Single-char upto sets the bit and tries the next */
+ /* Single-char up to sets the bit and tries the next */
case OP_UPTO:
case OP_MINUPTO:
diff --git a/ext/pcre/pcrelib/pcredemo.c b/ext/pcre/pcrelib/pcredemo.c
index 946aba45cd..1ca77f1537 100644
--- a/ext/pcre/pcrelib/pcredemo.c
+++ b/ext/pcre/pcrelib/pcredemo.c
@@ -144,7 +144,7 @@ if (rc < 0)
return 1;
}
-/* Match succeded */
+/* Match succeeded */
printf("\nMatch succeeded at offset %d\n", ovector[0]);
@@ -362,7 +362,7 @@ for (;;)
return 1;
}
- /* Match succeded */
+ /* Match succeeded */
printf("\nMatch succeeded again at offset %d\n", ovector[0]);
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index bb78a16a63..23a7833d93 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -577,6 +577,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
int i, rc;
int subpats_order; /* Order of subpattern matches */
int offset_capture; /* Capture match offsets: yes/no */
+ unsigned char *mark = NULL; /* Target for MARK name */
+ zval *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */
/* Overwrite the passed-in value for subpatterns with an empty array. */
if (subpats != NULL) {
@@ -619,6 +621,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
}
extra->match_limit = PCRE_G(backtrack_limit);
extra->match_limit_recursion = PCRE_G(recursion_limit);
+#ifdef PCRE_EXTRA_MARK
+ extra->mark = &mark;
+ extra->flags |= PCRE_EXTRA_MARK;
+#endif
/* Calculate the size of the offsets array, and allocate memory for it. */
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
@@ -695,6 +701,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
}
+ /* Add MARK, if available */
+ if (mark) {
+ if (!marks) {
+ MAKE_STD_ZVAL(marks);
+ array_init(marks);
+ }
+ add_index_string(marks, matched - 1, (char *) mark, 1);
+ }
/*
* If the number of captured subpatterns on this run is
* less than the total possible number, pad the result
@@ -725,6 +739,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
}
+ /* Add MARK, if available */
+ if (mark) {
+ add_assoc_string(result_set, "MARK", (char *) mark, 1);
+ }
/* And add it to the output array */
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
}
@@ -744,6 +762,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
offsets[(i<<1)+1] - offsets[i<<1], 1);
}
}
+ /* Add MARK, if available */
+ if (mark) {
+ add_assoc_string(subpats, "MARK", (char *) mark, 1);
+ }
}
pcre_free((void *) stringlist);
@@ -784,6 +806,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec
zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
}
efree(match_sets);
+
+ if (marks) {
+ add_assoc_zval(subpats, "MARK", marks);
+ }
}
efree(offsets);
@@ -855,7 +881,7 @@ static int preg_get_backref(char **str, int *backref)
/* {{{ preg_do_repl_func
*/
-static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC)
+static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC)
{
zval *retval_ptr; /* Function return value */
zval **args[1]; /* Argument to pass to function */
@@ -871,6 +897,9 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, char *
}
add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
}
+ if (mark) {
+ add_assoc_string(subpats, "MARK", (char *) mark, 1);
+ }
args[0] = &subpats;
if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
@@ -1032,6 +1061,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
*eval_result, /* Result of eval or custom function */
walk_last; /* Last walked character */
int rc;
+ unsigned char *mark = NULL; /* Target for MARK name */
if (extra == NULL) {
extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
@@ -1039,6 +1069,10 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
}
extra->match_limit = PCRE_G(backtrack_limit);
extra->match_limit_recursion = PCRE_G(recursion_limit);
+#ifdef PCRE_EXTRA_MARK
+ extra->mark = &mark;
+ extra->flags |= PCRE_EXTRA_MARK;
+#endif
eval = pce->preg_options & PREG_REPLACE_EVAL;
if (is_callable_replace) {
@@ -1118,7 +1152,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub
new_len += eval_result_len;
} else if (is_callable_replace) {
/* Use custom function to get replacement string and its length. */
- eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC);
+ eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC);
new_len += eval_result_len;
} else { /* do regular substitution */
walk = replace;
@@ -1343,6 +1377,7 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl
int limit_val = -1;
long limit = -1;
char *string_key;
+ uint string_key_len;
ulong num_key;
char *callback_name;
int replace_count=0, old_replace_count;
@@ -1394,10 +1429,10 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl
if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
if (!is_filter || replace_count > old_replace_count) {
/* Add to return array */
- switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
+ switch(zend_hash_get_current_key_ex(Z_ARRVAL_PP(subject), &string_key, &string_key_len, &num_key, 0, NULL))
{
case HASH_KEY_IS_STRING:
- add_assoc_stringl(return_value, string_key, result, result_len, 0);
+ add_assoc_stringl_ex(return_value, string_key, string_key_len, result, result_len, 0);
break;
case HASH_KEY_IS_LONG:
@@ -1516,6 +1551,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec
}
extra->match_limit = PCRE_G(backtrack_limit);
extra->match_limit_recursion = PCRE_G(recursion_limit);
+#ifdef PCRE_EXTRA_MARK
+ extra->flags &= ~PCRE_EXTRA_MARK;
+#endif
/* Initialize return value */
array_init(return_value);
@@ -1770,6 +1808,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
int size_offsets; /* Size of the offsets array */
int count = 0; /* Count of matched subpatterns */
char *string_key;
+ uint string_key_len;
ulong num_key;
zend_bool invert; /* Whether to return non-matching
entries */
@@ -1783,6 +1822,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
}
extra->match_limit = PCRE_G(backtrack_limit);
extra->match_limit_recursion = PCRE_G(recursion_limit);
+#ifdef PCRE_EXTRA_MARK
+ extra->flags &= ~PCRE_EXTRA_MARK;
+#endif
/* Calculate the size of the offsets array, and allocate memory for it. */
rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
@@ -1828,11 +1870,11 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return
Z_ADDREF_PP(entry);
/* Add to return array */
- switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
+ switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL))
{
case HASH_KEY_IS_STRING:
zend_hash_update(Z_ARRVAL_P(return_value), string_key,
- strlen(string_key)+1, entry, sizeof(zval *), NULL);
+ string_key_len, entry, sizeof(zval *), NULL);
break;
case HASH_KEY_IS_LONG:
diff --git a/ext/pcre/tests/marks.phpt b/ext/pcre/tests/marks.phpt
new file mode 100644
index 0000000000..8838a00500
--- /dev/null
+++ b/ext/pcre/tests/marks.phpt
@@ -0,0 +1,202 @@
+--TEST--
+Test support for PCRE marks
+--SKIPIF--
+<?php
+if (version_compare(PCRE_VERSION, '8.1', '<')) {
+ die('skip PCRE_VERSION >= 8.1 is required for MARK support');
+}
+?>
+--FILE--
+<?php
+
+$regex = <<<'REGEX'
+/
+ _ (a) (*MARK:A_MARK) _
+ | _ (b) _
+ | _ (c) (*MARK:C_MARK) _
+ | _ (d) _
+/x
+REGEX;
+
+var_dump(preg_match($regex, '_c_', $matches));
+var_dump($matches);
+
+var_dump(preg_match_all($regex, '_a__b__c__d_', $matches, PREG_PATTERN_ORDER));
+var_dump($matches);
+
+var_dump(preg_match_all($regex, '_a__b__c__d_', $matches, PREG_SET_ORDER));
+var_dump($matches);
+
+var_dump(preg_replace_callback($regex, function($matches) {
+ var_dump($matches);
+ return $matches[0];
+}, '_a__b__c__d_'));
+
+?>
+--EXPECTF--
+int(1)
+array(5) {
+ [0]=>
+ string(3) "_c_"
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(1) "c"
+ ["MARK"]=>
+ string(6) "C_MARK"
+}
+int(4)
+array(6) {
+ [0]=>
+ array(4) {
+ [0]=>
+ string(3) "_a_"
+ [1]=>
+ string(3) "_b_"
+ [2]=>
+ string(3) "_c_"
+ [3]=>
+ string(3) "_d_"
+ }
+ [1]=>
+ array(4) {
+ [0]=>
+ string(1) "a"
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(0) ""
+ }
+ [2]=>
+ array(4) {
+ [0]=>
+ string(0) ""
+ [1]=>
+ string(1) "b"
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(0) ""
+ }
+ [3]=>
+ array(4) {
+ [0]=>
+ string(0) ""
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(1) "c"
+ [3]=>
+ string(0) ""
+ }
+ [4]=>
+ array(4) {
+ [0]=>
+ string(0) ""
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(1) "d"
+ }
+ ["MARK"]=>
+ array(2) {
+ [0]=>
+ string(6) "A_MARK"
+ [2]=>
+ string(6) "C_MARK"
+ }
+}
+int(4)
+array(4) {
+ [0]=>
+ array(3) {
+ [0]=>
+ string(3) "_a_"
+ [1]=>
+ string(1) "a"
+ ["MARK"]=>
+ string(6) "A_MARK"
+ }
+ [1]=>
+ array(3) {
+ [0]=>
+ string(3) "_b_"
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(1) "b"
+ }
+ [2]=>
+ array(5) {
+ [0]=>
+ string(3) "_c_"
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(1) "c"
+ ["MARK"]=>
+ string(6) "C_MARK"
+ }
+ [3]=>
+ array(5) {
+ [0]=>
+ string(3) "_d_"
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(0) ""
+ [4]=>
+ string(1) "d"
+ }
+}
+array(3) {
+ [0]=>
+ string(3) "_a_"
+ [1]=>
+ string(1) "a"
+ ["MARK"]=>
+ string(6) "A_MARK"
+}
+array(3) {
+ [0]=>
+ string(3) "_b_"
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(1) "b"
+}
+array(5) {
+ [0]=>
+ string(3) "_c_"
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(1) "c"
+ ["MARK"]=>
+ string(6) "C_MARK"
+}
+array(5) {
+ [0]=>
+ string(3) "_d_"
+ [1]=>
+ string(0) ""
+ [2]=>
+ string(0) ""
+ [3]=>
+ string(0) ""
+ [4]=>
+ string(1) "d"
+}
+string(12) "_a__b__c__d_"