summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2018-02-19 17:26:33 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2018-02-19 17:26:33 +0000
commitea6f7a508aaa2fd61eb60d7759fe00713f46cd5c (patch)
tree5e69798e3545ef661c3c7f997cc4a620dd72fa4b
parente3ac8929b8152e6a30eff90f791b76339e44d91b (diff)
downloadpcre2-ea6f7a508aaa2fd61eb60d7759fe00713f46cd5c.tar.gz
Fix \C bug with repeated character classes in UTF-8 mode.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@918 6239d852-aaf2-0410-a92c-79f79f948069
-rw-r--r--ChangeLog5
-rw-r--r--src/pcre2_match.c16
-rw-r--r--testdata/testinput223
-rw-r--r--testdata/testoutput22-164
-rw-r--r--testdata/testoutput22-324
-rw-r--r--testdata/testoutput22-84
6 files changed, 32 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 8651e6a..17e7966 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -20,6 +20,11 @@ Unicode newlines" in the default case when --enable-bsr-anycrlf has not been
specified. Similarly, running "pcfre2test -C bsr" never produced the result
ANY.
+4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing
+multi-code-unit characters caused bad behaviour and possibly a crash. This
+issue was fixed for other kinds of repeat in release 10.20 by change 19, but
+repeating character classes were overlooked.
+
Version 10.31 12-February-2018
------------------------------
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 79cc93f..ce96016 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
+
for (;;)
{
RMATCH(Fecode, RM201);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
BACKCHAR(Feptr);
}
}
@@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
+ /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
+
for(;;)
{
RMATCH(Fecode, RM101);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
- if (Feptr-- == Lstart_eptr) break; /* Tried at original position */
+ if (Feptr-- <= Lstart_eptr) break; /* Tried at original position */
#ifdef SUPPORT_UNICODE
if (utf) BACKCHAR(Feptr);
#endif
@@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (reptype == REPTYPE_POS) continue; /* No backtracking */
/* After \C in UTF mode, Lstart_eptr might be in the middle of a
- Unicode character. Use <= pp to ensure backtracking doesn't go too far.
- */
+ Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+ go too far. */
for(;;)
{
diff --git a/testdata/testinput22 b/testdata/testinput22
index e6d4053..c218ea6 100644
--- a/testdata/testinput22
+++ b/testdata/testinput22
@@ -98,4 +98,7 @@
\= Expect no match - tests \C at end of subject
ab
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+
# End of testinput22
diff --git a/testdata/testoutput22-16 b/testdata/testoutput22-16
index 88f827c..5e23611 100644
--- a/testdata/testoutput22-16
+++ b/testdata/testoutput22-16
@@ -171,4 +171,8 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22
diff --git a/testdata/testoutput22-32 b/testdata/testoutput22-32
index ac485fc..8576f31 100644
--- a/testdata/testoutput22-32
+++ b/testdata/testoutput22-32
@@ -169,4 +169,8 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22
diff --git a/testdata/testoutput22-8 b/testdata/testoutput22-8
index 3d31fbc..8543652 100644
--- a/testdata/testoutput22-8
+++ b/testdata/testoutput22-8
@@ -173,4 +173,8 @@ No match
ab
No match
+/\C[^\v]+\x80/utf
+ [AΏBŀC]
+No match
+
# End of testinput22