summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorju1ius <ju1ius@laposte.net>2016-07-29 18:00:28 +0200
committerNikita Popov <nikita.ppv@gmail.com>2018-07-06 23:34:54 +0200
commit212f56b7ca52304fe9acf586b65701f445d19c66 (patch)
treeffd411cd48cc14ff142de19055f171de1c9f0ff4
parent69a49af0d320be6d62028bd6a6996b49d4f200bc (diff)
downloadphp-git-212f56b7ca52304fe9acf586b65701f445d19c66.tar.gz
adds support for named captures to mb_ereg & mb_ereg_search
`mb_ereg`, `mb_ereg_search_regs` & `mb_ereg_search_getregs` returned only numbered capturing groups. Now they return both numbered and named capturing groups. Fixes Bug #72704.
-rw-r--r--ext/mbstring/php_mbregex.c67
-rw-r--r--ext/mbstring/tests/mb_ereg_dupnames.phpt37
-rw-r--r--ext/mbstring/tests/mb_ereg_named_subpatterns.phpt50
-rw-r--r--ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt31
4 files changed, 185 insertions, 0 deletions
diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c
index b95f315c88..957ee484ba 100644
--- a/ext/mbstring/php_mbregex.c
+++ b/ext/mbstring/php_mbregex.c
@@ -646,6 +646,50 @@ _php_mb_regex_init_options(const char *parg, size_t narg, OnigOptionType *option
}
/* }}} */
+
+/*
+ * Callbacks for named subpatterns
+ */
+
+/* {{{ struct mb_ereg_groups_iter_arg */
+typedef struct mb_regex_groups_iter_args {
+ zval *groups;
+ char *search_str;
+ int search_len;
+ OnigRegion *region;
+} mb_regex_groups_iter_args;
+/* }}} */
+
+/* {{{ mb_ereg_groups_iter */
+static int
+mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* parg)
+{
+ mb_regex_groups_iter_args *args = (mb_regex_groups_iter_args *) parg;
+ int i, gn, ref, beg, end;
+
+ for (i = 0; i < ngroup_num; i++) {
+ gn = group_nums[i];
+ ref = onig_name_to_backref_number(reg, name, name_end, args->region);
+ if (ref != gn) {
+ /*
+ * In case of duplicate groups, keep only the last suceeding one
+ * to be consistent with preg_match with the PCRE_DUPNAMES option.
+ */
+ continue;
+ }
+ beg = args->region->beg[gn];
+ end = args->region->end[gn];
+ if (beg >= 0 && beg < end && end <= args->search_len) {
+ add_assoc_stringl_ex(args->groups, (char *)name, name_end - name, &args->search_str[beg], end - beg);
+ } else {
+ add_assoc_bool_ex(args->groups, (char *)name, name_end - name, 0);
+ }
+ }
+
+ return 0;
+}
+/* }}} */
+
/*
* php functions
*/
@@ -762,6 +806,11 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
add_index_bool(array, i, 0);
}
}
+
+ if (onig_number_of_names(re) > 0) {
+ mb_regex_groups_iter_args args = {array, string, string_len, regs};
+ onig_foreach_name(re, mb_regex_groups_iter, &args);
+ }
}
if (match_len == 0) {
@@ -1291,6 +1340,15 @@ _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
add_index_bool(return_value, i, 0);
}
}
+ if (onig_number_of_names(MBREX(search_re)) > 0) {
+ mb_regex_groups_iter_args args = {
+ return_value,
+ Z_STRVAL(MBREX(search_str)),
+ Z_STRLEN(MBREX(search_str)),
+ MBREX(search_regs)
+ };
+ onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
+ }
break;
default:
RETVAL_TRUE;
@@ -1417,6 +1475,15 @@ PHP_FUNCTION(mb_ereg_search_getregs)
add_index_bool(return_value, i, 0);
}
}
+ if (onig_number_of_names(MBREX(search_re)) > 0) {
+ mb_regex_groups_iter_args args = {
+ return_value,
+ Z_STRVAL(MBREX(search_str)),
+ len,
+ MBREX(search_regs)
+ };
+ onig_foreach_name(MBREX(search_re), mb_regex_groups_iter, &args);
+ }
} else {
RETVAL_FALSE;
}
diff --git a/ext/mbstring/tests/mb_ereg_dupnames.phpt b/ext/mbstring/tests/mb_ereg_dupnames.phpt
new file mode 100644
index 0000000000..fcc428c360
--- /dev/null
+++ b/ext/mbstring/tests/mb_ereg_dupnames.phpt
@@ -0,0 +1,37 @@
+--TEST--
+Testing mb_ereg() duplicate named groups
+--SKIPIF--
+<?php
+if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
+function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build");
+?>
+--FILE--
+<?php
+ mb_regex_encoding("UTF-8");
+ $pattern = '\w+((?<punct>?)|(?<punct>!))';
+ mb_ereg($pattern, '中?', $m);
+ var_dump($m);
+ mb_ereg($pattern, '中!', $m);
+ var_dump($m);
+?>
+--EXPECT--
+array(4) {
+ [0]=>
+ string(6) "中?"
+ [1]=>
+ string(3) "?"
+ [2]=>
+ bool(false)
+ ["punct"]=>
+ string(3) "?"
+}
+array(4) {
+ [0]=>
+ string(6) "中!"
+ [1]=>
+ bool(false)
+ [2]=>
+ string(3) "!"
+ ["punct"]=>
+ string(3) "!"
+}
diff --git a/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt b/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt
new file mode 100644
index 0000000000..ed0f85baa3
--- /dev/null
+++ b/ext/mbstring/tests/mb_ereg_named_subpatterns.phpt
@@ -0,0 +1,50 @@
+--TEST--
+Testing mb_ereg() named subpatterns
+--SKIPIF--
+<?php
+if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
+function_exists('mb_ereg') or die("skip mb_ereg() is not available in this build");
+?>
+--FILE--
+<?php
+ mb_regex_encoding("UTF-8");
+ mb_ereg('(?<wsp>\s*)(?<word>\w+)', ' 中国', $m);
+ var_dump($m);
+ mb_ereg('(?<wsp>\s*)(?<word>\w+)', '国', $m);
+ var_dump($m);
+ mb_ereg('(\s*)(?<word>\w+)', ' 中国', $m);
+ var_dump($m);
+?>
+--EXPECT--
+array(5) {
+ [0]=>
+ string(8) " 中国"
+ [1]=>
+ string(2) " "
+ [2]=>
+ string(6) "中国"
+ ["wsp"]=>
+ string(2) " "
+ ["word"]=>
+ string(6) "中国"
+}
+array(5) {
+ [0]=>
+ string(3) "国"
+ [1]=>
+ bool(false)
+ [2]=>
+ string(3) "国"
+ ["wsp"]=>
+ bool(false)
+ ["word"]=>
+ string(3) "国"
+}
+array(3) {
+ [0]=>
+ string(8) " 中国"
+ [1]=>
+ string(6) "中国"
+ ["word"]=>
+ string(6) "中国"
+}
diff --git a/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt b/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt
new file mode 100644
index 0000000000..f899e9beeb
--- /dev/null
+++ b/ext/mbstring/tests/mb_ereg_search_named_subpatterns.phpt
@@ -0,0 +1,31 @@
+--TEST--
+Testing mb_ereg_search() named capture groups
+--SKIPIF--
+<?php
+if (!extension_loaded('mbstring')) die('skip mbstring not enabled');
+function_exists('mb_ereg_search') or die("skip mb_ereg_search() is not available in this build");
+?>
+--FILE--
+<?php
+ mb_regex_encoding("UTF-8");
+ mb_ereg_search_init(' 中国?');
+ mb_ereg_search('(?<wsp>\s*)(?<word>\w+)(?<punct>[?!])');
+ var_dump(mb_ereg_search_getregs());
+?>
+--EXPECT--
+array(7) {
+ [0]=>
+ string(11) " 中国?"
+ [1]=>
+ string(2) " "
+ [2]=>
+ string(6) "中国"
+ [3]=>
+ string(3) "?"
+ ["punct"]=>
+ string(3) "?"
+ ["wsp"]=>
+ string(2) " "
+ ["word"]=>
+ string(6) "中国"
+}