diff options
author | ju1ius <ju1ius@laposte.net> | 2016-07-29 18:00:28 +0200 |
---|---|---|
committer | Nikita Popov <nikita.ppv@gmail.com> | 2018-07-06 23:34:54 +0200 |
commit | 212f56b7ca52304fe9acf586b65701f445d19c66 (patch) | |
tree | ffd411cd48cc14ff142de19055f171de1c9f0ff4 | |
parent | 69a49af0d320be6d62028bd6a6996b49d4f200bc (diff) | |
download | php-git-212f56b7ca52304fe9acf586b65701f445d19c66.tar.gz |
adds support for named captures to mb_ereg & mb_ereg_search
`mb_ereg`, `mb_ereg_search_regs` & `mb_ereg_search_getregs`
returned only numbered capturing groups.
Now they return both numbered and named capturing groups.
Fixes Bug #72704.
-rw-r--r-- | ext/mbstring/php_mbregex.c | 67 | ||||
-rw-r--r-- | ext/mbstring/tests/mb_ereg_dupnames.phpt | 37 | ||||
-rw-r--r-- | ext/mbstring/tests/mb_ereg_named_subpatterns.phpt | 50 | ||||
-rw-r--r-- | ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt | 31 |
4 files changed, 185 insertions, 0 deletions
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index b95f315c88..957ee484ba 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -646,6 +646,50 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option } /* }}} */ + +/* + * Callbacks for named subpatterns + */ + +/* {{{ struct mb_ereg_groups_iter_arg */ +typedef struct mb_regex_groups_iter_args { + zval *groups; + char *search_str; + int search_len; + OnigRegion *region; +} mb_regex_groups_iter_args; +/* }}} */ + +/* {{{ mb_ereg_groups_iter */ +static int +mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg) +{ + mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg; + int i, gn, ref, beg, end; + + for (i = 0; i < ngroup_num; i++) { + gn = group_nums[i]; + ref = onig_name_to_backref_number(reg, name, name_end, args->region); + if (ref != gn) { + /* + * In case of duplicate groups, keep only the last suceeding one + * to be consistent with preg_match with the PCRE_DUPNAMES option. + */ + continue; + } + beg = args->region->beg[gn]; + end = args->region->end[gn]; + if (beg >= 0 && beg < end && end <= args->search_len) { + add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg); + } else { + add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0); + } + } + + return 0; +} +/* }}} */ + /* * php functions */ @@ -762,6 +806,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) add_index_bool(array, i, 0); } } + + if (onig_number_of_names(re) > 0) { + mb_regex_groups_iter_args args = {array, string, string_len, regs}; + onig_foreach_name(re, mb_regex_groups_iter, &args); + } } if (match_len == 0) { @@ -1291,6 +1340,15 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode) add_index_bool(return_value, i, 0); } } + if (onig_number_of_names(MBREX(search_re)) > 0) { + mb_regex_groups_iter_args args = { + return_value, + Z_STRVAL(MBREX(search_str)), + Z_STRLEN(MBREX(search_str)), + MBREX(search_regs) + }; + onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args); + } break; default: RETVAL_TRUE; @@ -1417,6 +1475,15 @@ PHP_FUNCTION(mb_ereg_search_getregs) add_index_bool(return_value, i, 0); } } + if (onig_number_of_names(MBREX(search_re)) > 0) { + mb_regex_groups_iter_args args = { + return_value, + Z_STRVAL(MBREX(search_str)), + len, + MBREX(search_regs) + }; + onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args); + } } else { RETVAL_FALSE; } diff --git a/ext/mbstring/tests/mb_ereg_dupnames.phpt b/ext/mbstring/tests/mb_ereg_dupnames.phpt new file mode 100644 index 0000000000..fcc428c360 --- /dev/null +++ b/ext/mbstring/tests/mb_ereg_dupnames.phpt @@ -0,0 +1,37 @@ +--TEST-- +Testing mb_ereg() duplicate named groups +--SKIPIF-- +<?php +if (!extension_loaded('mbstring')) die('skip mbstring not enabled'); +function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build"); +?> +--FILE-- +<?php + mb_regex_encoding("UTF-8"); + $pattern = '\w+((?<punct>?)|(?<punct>!))'; + mb_ereg($pattern, '中?', $m); + var_dump($m); + mb_ereg($pattern, '中!', $m); + var_dump($m); +?> +--EXPECT-- +array(4) { + [0]=> + string(6) "中?" + [1]=> + string(3) "?" + [2]=> + bool(false) + ["punct"]=> + string(3) "?" +} +array(4) { + [0]=> + string(6) "中!" + [1]=> + bool(false) + [2]=> + string(3) "!" + ["punct"]=> + string(3) "!" +} diff --git a/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt b/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt new file mode 100644 index 0000000000..ed0f85baa3 --- /dev/null +++ b/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt @@ -0,0 +1,50 @@ +--TEST-- +Testing mb_ereg() named subpatterns +--SKIPIF-- +<?php +if (!extension_loaded('mbstring')) die('skip mbstring not enabled'); +function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build"); +?> +--FILE-- +<?php + mb_regex_encoding("UTF-8"); + mb_ereg('(?<wsp>\s*)(?<word>\w+)', ' 中国', $m); + var_dump($m); + mb_ereg('(?<wsp>\s*)(?<word>\w+)', '国', $m); + var_dump($m); + mb_ereg('(\s*)(?<word>\w+)', ' 中国', $m); + var_dump($m); +?> +--EXPECT-- +array(5) { + [0]=> + string(8) " 中国" + [1]=> + string(2) " " + [2]=> + string(6) "中国" + ["wsp"]=> + string(2) " " + ["word"]=> + string(6) "中国" +} +array(5) { + [0]=> + string(3) "国" + [1]=> + bool(false) + [2]=> + string(3) "国" + ["wsp"]=> + bool(false) + ["word"]=> + string(3) "国" +} +array(3) { + [0]=> + string(8) " 中国" + [1]=> + string(6) "中国" + ["word"]=> + string(6) "中国" +} diff --git a/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt b/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt new file mode 100644 index 0000000000..f899e9beeb --- /dev/null +++ b/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt @@ -0,0 +1,31 @@ +--TEST-- +Testing mb_ereg_search() named capture groups +--SKIPIF-- +<?php +if (!extension_loaded('mbstring')) die('skip mbstring not enabled'); +function_exists('mb_ereg_search') or die("skip mb_ereg_search() is not available in this build"); +?> +--FILE-- +<?php + mb_regex_encoding("UTF-8"); + mb_ereg_search_init(' 中国?'); + mb_ereg_search('(?<wsp>\s*)(?<word>\w+)(?<punct>[?!])'); + var_dump(mb_ereg_search_getregs()); +?> +--EXPECT-- +array(7) { + [0]=> + string(11) " 中国?" + [1]=> + string(2) " " + [2]=> + string(6) "中国" + [3]=> + string(3) "?" + ["punct"]=> + string(3) "?" + ["wsp"]=> + string(2) " " + ["word"]=> + string(6) "中国" +} |