summaryrefslogtreecommitdiff
path: root/ext/pcre/tests
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2020-02-07 16:39:06 +0100
committerNikita Popov <nikita.ppv@gmail.com>2020-02-07 16:49:28 +0100
commitc9e78e6d338cc46dcadb39b3e2df119fa969e72b (patch)
tree90fdef22e19c9e9dbe0ea105512399902a01448b /ext/pcre/tests
parent0d49cf4ed25e406f00abefca0e2e3e8fd919bf94 (diff)
downloadphp-git-c9e78e6d338cc46dcadb39b3e2df119fa969e72b.tar.gz
PCRE: Check whether start offset is on char boundary
We need not just the whole string to be UTF-8, but the start position to be on a character boundary as well. Check this by looking for a continuation byte.
Diffstat (limited to 'ext/pcre/tests')
-rw-r--r--ext/pcre/tests/bug79241.phpt22
1 files changed, 22 insertions, 0 deletions
diff --git a/ext/pcre/tests/bug79241.phpt b/ext/pcre/tests/bug79241.phpt
new file mode 100644
index 0000000000..92e5253735
--- /dev/null
+++ b/ext/pcre/tests/bug79241.phpt
@@ -0,0 +1,22 @@
+--TEST--
+Bug #79241: Segmentation fault on preg_match()
+--FILE--
+<?php
+
+// if "’" string is used directly without json_decode,
+// the issue does not reproduce
+$text = json_decode('"’"');
+
+$pattern = '/\b/u';
+
+// it has to be exact two calls to preg_match(),
+// with the second call offsetting after the tick symbol
+var_dump(preg_match($pattern, $text, $matches, 0, 0));
+var_dump(preg_match($pattern, $text, $matches, 0, 1));
+var_dump(preg_last_error() == PREG_BAD_UTF8_OFFSET_ERROR);
+
+?>
+--EXPECT--
+int(0)
+bool(false)
+bool(true)