diff options
Diffstat (limited to 'ext/pcre')
-rw-r--r-- | ext/pcre/pcrelib/ChangeLog | 4 | ||||
-rw-r--r-- | ext/pcre/pcrelib/HACKING | 2 | ||||
-rw-r--r-- | ext/pcre/pcrelib/README | 2 | ||||
-rw-r--r-- | ext/pcre/pcrelib/doc/pcre.txt | 2 | ||||
-rw-r--r-- | ext/pcre/pcrelib/pcre_exec.c | 2 | ||||
-rw-r--r-- | ext/pcre/pcrelib/pcre_study.c | 2 | ||||
-rw-r--r-- | ext/pcre/pcrelib/pcredemo.c | 4 | ||||
-rw-r--r-- | ext/pcre/php_pcre.c | 54 | ||||
-rw-r--r-- | ext/pcre/tests/marks.phpt | 202 |
9 files changed, 259 insertions, 15 deletions
diff --git a/ext/pcre/pcrelib/ChangeLog b/ext/pcre/pcrelib/ChangeLog index 359b412958..ff71273e21 100644 --- a/ext/pcre/pcrelib/ChangeLog +++ b/ext/pcre/pcrelib/ChangeLog @@ -1809,7 +1809,7 @@ Version 8.10 25-Jun-2010 7. Minor change to pcretest.c to avoid a compiler warning. -8. Added four artifical Unicode properties to help with an option to make +8. Added four artificial Unicode properties to help with an option to make \s etc use properties (see next item). The new properties are: Xan (alphanumeric), Xsp (Perl space), Xps (POSIX space), and Xwd (word). @@ -4434,7 +4434,7 @@ Version 4.3 21-May-03 (i) The utf8_table... variables are now declared "const". (ii) The code for \cx, which used the "case flipping" table to upper case - lower case letters, now just substracts 32. This is ASCII-specific, + lower case letters, now just subtracts 32. This is ASCII-specific, but the whole concept of \cx is ASCII-specific, so it seems reasonable. diff --git a/ext/pcre/pcrelib/HACKING b/ext/pcre/pcrelib/HACKING index 691b7a14e5..8395504212 100644 --- a/ext/pcre/pcrelib/HACKING +++ b/ext/pcre/pcrelib/HACKING @@ -360,7 +360,7 @@ reference number if the reference is to a unique capturing group (either by number or by name). When named groups are used, there may be more than one group with the same name. In this case, a reference by name generates OP_DNREF or OP_DNREFI. These are followed by two counts: the index (not the byte offset) -in the group name table of the first entry for the requred name, followed by +in the group name table of the first entry for the required name, followed by the number of groups with the same name. diff --git a/ext/pcre/pcrelib/README b/ext/pcre/pcrelib/README index 4887ebf350..7a6ddff5c7 100644 --- a/ext/pcre/pcrelib/README +++ b/ext/pcre/pcrelib/README @@ -403,7 +403,7 @@ library. They are also documented in the pcrebuild man page. avoided by linking with libedit (which has a BSD licence) instead. Enabling libreadline causes the -lreadline option to be added to the pcretest - build. In many operating environments with a sytem-installed readline + build. In many operating environments with a system-installed readline library this is sufficient. However, in some environments (e.g. if an unmodified distribution version of readline is in use), it may be necessary to specify something like LIBS="-lncurses" as well. This is because, to quote diff --git a/ext/pcre/pcrelib/doc/pcre.txt b/ext/pcre/pcrelib/doc/pcre.txt index ce27f4b3e0..40523687c0 100644 --- a/ext/pcre/pcrelib/doc/pcre.txt +++ b/ext/pcre/pcrelib/doc/pcre.txt @@ -1242,7 +1242,7 @@ PCRETEST OPTION FOR LIBREADLINE SUPPORT pcretest linked in this way, there may be licensing issues. Setting this option causes the -lreadline option to be added to the - pcretest build. In many operating environments with a sytem-installed + pcretest build. In many operating environments with a system-installed libreadline this is sufficient. However, in some environments (e.g. if an unmodified distribution version of readline is in use), some extra configuration may be necessary. The INSTALL file for libreadline says diff --git a/ext/pcre/pcrelib/pcre_exec.c b/ext/pcre/pcrelib/pcre_exec.c index 3942076490..7efdc443c9 100644 --- a/ext/pcre/pcrelib/pcre_exec.c +++ b/ext/pcre/pcrelib/pcre_exec.c @@ -1040,7 +1040,7 @@ for (;;) the result of a recursive call to match() whatever happened so it was possible to reduce stack usage by turning this into a tail recursion, except in the case of a possibly empty group. However, now that there is - the possiblity of (*THEN) occurring in the final alternative, this + the possibility of (*THEN) occurring in the final alternative, this optimization is no longer always possible. We can optimize if we know there are no (*THEN)s in the pattern; at present diff --git a/ext/pcre/pcrelib/pcre_study.c b/ext/pcre/pcrelib/pcre_study.c index 998fe2325e..88df2a0064 100644 --- a/ext/pcre/pcrelib/pcre_study.c +++ b/ext/pcre/pcrelib/pcre_study.c @@ -1050,7 +1050,7 @@ do tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); break; - /* Single-char upto sets the bit and tries the next */ + /* Single-char up to sets the bit and tries the next */ case OP_UPTO: case OP_MINUPTO: diff --git a/ext/pcre/pcrelib/pcredemo.c b/ext/pcre/pcrelib/pcredemo.c index 946aba45cd..1ca77f1537 100644 --- a/ext/pcre/pcrelib/pcredemo.c +++ b/ext/pcre/pcrelib/pcredemo.c @@ -144,7 +144,7 @@ if (rc < 0) return 1; } -/* Match succeded */ +/* Match succeeded */ printf("\nMatch succeeded at offset %d\n", ovector[0]); @@ -362,7 +362,7 @@ for (;;) return 1; } - /* Match succeded */ + /* Match succeeded */ printf("\nMatch succeeded again at offset %d\n", ovector[0]); diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index bb78a16a63..23a7833d93 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -577,6 +577,8 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec int i, rc; int subpats_order; /* Order of subpattern matches */ int offset_capture; /* Capture match offsets: yes/no */ + unsigned char *mark = NULL; /* Target for MARK name */ + zval *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */ /* Overwrite the passed-in value for subpatterns with an empty array. */ if (subpats != NULL) { @@ -619,6 +621,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->mark = &mark; + extra->flags |= PCRE_EXTRA_MARK; +#endif /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats); @@ -695,6 +701,14 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + if (!marks) { + MAKE_STD_ZVAL(marks); + array_init(marks); + } + add_index_string(marks, matched - 1, (char *) mark, 1); + } /* * If the number of captured subpatterns on this run is * less than the total possible number, pad the result @@ -725,6 +739,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + add_assoc_string(result_set, "MARK", (char *) mark, 1); + } /* And add it to the output array */ zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL); } @@ -744,6 +762,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec offsets[(i<<1)+1] - offsets[i<<1], 1); } } + /* Add MARK, if available */ + if (mark) { + add_assoc_string(subpats, "MARK", (char *) mark, 1); + } } pcre_free((void *) stringlist); @@ -784,6 +806,10 @@ PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subjec zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL); } efree(match_sets); + + if (marks) { + add_assoc_zval(subpats, "MARK", marks); + } } efree(offsets); @@ -855,7 +881,7 @@ static int preg_get_backref(char **str, int *backref) /* {{{ preg_do_repl_func */ -static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, char **result TSRMLS_DC) +static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC) { zval *retval_ptr; /* Function return value */ zval **args[1]; /* Argument to pass to function */ @@ -871,6 +897,9 @@ static int preg_do_repl_func(zval *function, char *subject, int *offsets, char * } add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1); } + if (mark) { + add_assoc_string(subpats, "MARK", (char *) mark, 1); + } args[0] = &subpats; if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) { @@ -1032,6 +1061,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub *eval_result, /* Result of eval or custom function */ walk_last; /* Last walked character */ int rc; + unsigned char *mark = NULL; /* Target for MARK name */ if (extra == NULL) { extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION; @@ -1039,6 +1069,10 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->mark = &mark; + extra->flags |= PCRE_EXTRA_MARK; +#endif eval = pce->preg_options & PREG_REPLACE_EVAL; if (is_callable_replace) { @@ -1118,7 +1152,7 @@ PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int sub new_len += eval_result_len; } else if (is_callable_replace) { /* Use custom function to get replacement string and its length. */ - eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, &eval_result TSRMLS_CC); + eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC); new_len += eval_result_len; } else { /* do regular substitution */ walk = replace; @@ -1343,6 +1377,7 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl int limit_val = -1; long limit = -1; char *string_key; + uint string_key_len; ulong num_key; char *callback_name; int replace_count=0, old_replace_count; @@ -1394,10 +1429,10 @@ static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_repl if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) { if (!is_filter || replace_count > old_replace_count) { /* Add to return array */ - switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0)) + switch(zend_hash_get_current_key_ex(Z_ARRVAL_PP(subject), &string_key, &string_key_len, &num_key, 0, NULL)) { case HASH_KEY_IS_STRING: - add_assoc_stringl(return_value, string_key, result, result_len, 0); + add_assoc_stringl_ex(return_value, string_key, string_key_len, result, result_len, 0); break; case HASH_KEY_IS_LONG: @@ -1516,6 +1551,9 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subjec } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->flags &= ~PCRE_EXTRA_MARK; +#endif /* Initialize return value */ array_init(return_value); @@ -1770,6 +1808,7 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return int size_offsets; /* Size of the offsets array */ int count = 0; /* Count of matched subpatterns */ char *string_key; + uint string_key_len; ulong num_key; zend_bool invert; /* Whether to return non-matching entries */ @@ -1783,6 +1822,9 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return } extra->match_limit = PCRE_G(backtrack_limit); extra->match_limit_recursion = PCRE_G(recursion_limit); +#ifdef PCRE_EXTRA_MARK + extra->flags &= ~PCRE_EXTRA_MARK; +#endif /* Calculate the size of the offsets array, and allocate memory for it. */ rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets); @@ -1828,11 +1870,11 @@ PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return Z_ADDREF_PP(entry); /* Add to return array */ - switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0)) + switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL)) { case HASH_KEY_IS_STRING: zend_hash_update(Z_ARRVAL_P(return_value), string_key, - strlen(string_key)+1, entry, sizeof(zval *), NULL); + string_key_len, entry, sizeof(zval *), NULL); break; case HASH_KEY_IS_LONG: diff --git a/ext/pcre/tests/marks.phpt b/ext/pcre/tests/marks.phpt new file mode 100644 index 0000000000..8838a00500 --- /dev/null +++ b/ext/pcre/tests/marks.phpt @@ -0,0 +1,202 @@ +--TEST-- +Test support for PCRE marks +--SKIPIF-- +<?php +if (version_compare(PCRE_VERSION, '8.1', '<')) { + die('skip PCRE_VERSION >= 8.1 is required for MARK support'); +} +?> +--FILE-- +<?php + +$regex = <<<'REGEX' +/ + _ (a) (*MARK:A_MARK) _ + | _ (b) _ + | _ (c) (*MARK:C_MARK) _ + | _ (d) _ +/x +REGEX; + +var_dump(preg_match($regex, '_c_', $matches)); +var_dump($matches); + +var_dump(preg_match_all($regex, '_a__b__c__d_', $matches, PREG_PATTERN_ORDER)); +var_dump($matches); + +var_dump(preg_match_all($regex, '_a__b__c__d_', $matches, PREG_SET_ORDER)); +var_dump($matches); + +var_dump(preg_replace_callback($regex, function($matches) { + var_dump($matches); + return $matches[0]; +}, '_a__b__c__d_')); + +?> +--EXPECTF-- +int(1) +array(5) { + [0]=> + string(3) "_c_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(1) "c" + ["MARK"]=> + string(6) "C_MARK" +} +int(4) +array(6) { + [0]=> + array(4) { + [0]=> + string(3) "_a_" + [1]=> + string(3) "_b_" + [2]=> + string(3) "_c_" + [3]=> + string(3) "_d_" + } + [1]=> + array(4) { + [0]=> + string(1) "a" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + } + [2]=> + array(4) { + [0]=> + string(0) "" + [1]=> + string(1) "b" + [2]=> + string(0) "" + [3]=> + string(0) "" + } + [3]=> + array(4) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(1) "c" + [3]=> + string(0) "" + } + [4]=> + array(4) { + [0]=> + string(0) "" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(1) "d" + } + ["MARK"]=> + array(2) { + [0]=> + string(6) "A_MARK" + [2]=> + string(6) "C_MARK" + } +} +int(4) +array(4) { + [0]=> + array(3) { + [0]=> + string(3) "_a_" + [1]=> + string(1) "a" + ["MARK"]=> + string(6) "A_MARK" + } + [1]=> + array(3) { + [0]=> + string(3) "_b_" + [1]=> + string(0) "" + [2]=> + string(1) "b" + } + [2]=> + array(5) { + [0]=> + string(3) "_c_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(1) "c" + ["MARK"]=> + string(6) "C_MARK" + } + [3]=> + array(5) { + [0]=> + string(3) "_d_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + [4]=> + string(1) "d" + } +} +array(3) { + [0]=> + string(3) "_a_" + [1]=> + string(1) "a" + ["MARK"]=> + string(6) "A_MARK" +} +array(3) { + [0]=> + string(3) "_b_" + [1]=> + string(0) "" + [2]=> + string(1) "b" +} +array(5) { + [0]=> + string(3) "_c_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(1) "c" + ["MARK"]=> + string(6) "C_MARK" +} +array(5) { + [0]=> + string(3) "_d_" + [1]=> + string(0) "" + [2]=> + string(0) "" + [3]=> + string(0) "" + [4]=> + string(1) "d" +} +string(12) "_a__b__c__d_" |