summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelipe Pena <felipe@php.net>2010-10-03 16:01:38 +0000
committerFelipe Pena <felipe@php.net>2010-10-03 16:01:38 +0000
commit090a9b33316a448f1af9ba865484fa5dafeda4a7 (patch)
treec9a293b2fe14068c177e60781222929ffd0e98fe
parent4b0927b042a3b30f1dfad9ed89fe7e132ac8a040 (diff)
downloadphp-git-090a9b33316a448f1af9ba865484fa5dafeda4a7.tar.gz
- Fixed bug #52971 (PCRE-Meta-Characters not working with utf-8)
# In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII # characters, even in UTF-8 mode. However, this can be changed by setting # the PCRE_UCP option.
-rw-r--r--NEWS1
-rw-r--r--ext/pcre/php_pcre.c9
-rw-r--r--ext/pcre/tests/bug52971.phpt43
3 files changed, 52 insertions, 1 deletions
diff --git a/NEWS b/NEWS
index bcd560c8db..9e47361f74 100644
--- a/NEWS
+++ b/NEWS
@@ -22,6 +22,7 @@
- Fixed possible crash in mssql_fetch_batch(). (Kalle)
- Fixed inconsistent backlog default value (-1) in FPM on many systems. (fat)
+- Fixed bug #52971 (PCRE-Meta-Characters not working with utf-8). (Felipe)
- Fixed bug #52947 (segfault when ssl stream option capture_peer_cert_chain
used). (Felipe)
- Fixed bug #52944 (Invalid write on second and subsequent reads with an
diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c
index dcbc98fb85..ccb0a51c0e 100644
--- a/ext/pcre/php_pcre.c
+++ b/ext/pcre/php_pcre.c
@@ -350,7 +350,14 @@ PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_le
case 'S': do_study = 1; break;
case 'U': coptions |= PCRE_UNGREEDY; break;
case 'X': coptions |= PCRE_EXTRA; break;
- case 'u': coptions |= PCRE_UTF8; break;
+ case 'u': coptions |= PCRE_UTF8;
+ /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
+ characters, even in UTF-8 mode. However, this can be changed by setting
+ the PCRE_UCP option. */
+#ifdef PCRE_UCP
+ coptions |= PCRE_UCP;
+#endif
+ break;
/* Custom preg options */
case 'e': poptions |= PREG_REPLACE_EVAL; break;
diff --git a/ext/pcre/tests/bug52971.phpt b/ext/pcre/tests/bug52971.phpt
new file mode 100644
index 0000000000..5949cb220c
--- /dev/null
+++ b/ext/pcre/tests/bug52971.phpt
@@ -0,0 +1,43 @@
+--TEST--
+Bug #52971 (PCRE-Meta-Characters not working with utf-8)
+--SKIPIF--
+<?php if ((double)PCRE_VERSION < 8.1) die('skip PCRE_VERSION >= 8.1 is required!'); ?>
+--FILE--
+<?php
+
+$message = 'Der ist ein Süßwasserpool Süsswasserpool ... verschiedene Wassersportmöglichkeiten bei ...';
+
+$pattern = '/\bwasser/iu';
+preg_match_all($pattern, $message, $match, PREG_OFFSET_CAPTURE);
+var_dump($match);
+
+$pattern = '/[^\w]wasser/iu';
+preg_match_all($pattern, $message, $match, PREG_OFFSET_CAPTURE);
+var_dump($match);
+
+?>
+--EXPECTF--
+array(1) {
+ [0]=>
+ array(1) {
+ [0]=>
+ array(2) {
+ [0]=>
+ string(6) "Wasser"
+ [1]=>
+ int(61)
+ }
+ }
+}
+array(1) {
+ [0]=>
+ array(1) {
+ [0]=>
+ array(2) {
+ [0]=>
+ string(7) " Wasser"
+ [1]=>
+ int(60)
+ }
+ }
+}