summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-01 17:38:47 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-01 17:38:47 +0000
commit6b343554a0bc93c892c11365221c7094aaa4f5ff (patch)
treebc8ff6b63c895ccb72bd47c94cb10ff73408af8c
parenta5b453e62eb0aa93248d87650fd7bcf6b8153c17 (diff)
downloadpcre-6b343554a0bc93c892c11365221c7094aaa4f5ff.tar.gz
Fix bug with caseless matching of characters of different lengths when the
shorter is right at the end of the subject. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@778 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog4
-rw-r--r--pcre_exec.c32
-rw-r--r--testdata/testinput614
-rw-r--r--testdata/testoutput622
4 files changed, 56 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 1b62901..d0db5af 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -89,6 +89,10 @@ Version 8.21
21. Retrieve executable code size support for the JIT compiler and fixing
some warnings.
+
+22. A caseless match of a UTF-8 character whose other case uses fewer bytes did
+ not work when the shorter character appeared right at the end of the
+ subject string.
Version 8.20 21-Oct-2011
diff --git a/pcre_exec.c b/pcre_exec.c
index a586c80..2dd8a55 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -417,7 +417,7 @@ returns a negative (error) response, the outer incarnation must also return the
same response. */
/* These macros pack up tests that are used for partial matching, and which
-appears several times in the code. We set the "hit end" flag if the pointer is
+appear several times in the code. We set the "hit end" flag if the pointer is
at the end of the subject and also past the start of the subject (i.e.
something has been matched). For hard partial matching, we then return
immediately. The second one is used when we already know we are past the end of
@@ -3037,31 +3037,36 @@ for (;;)
}
break;
- /* Match a single character, caselessly */
+ /* Match a single character, caselessly. If we are at the end of the
+ subject, give up immediately. */
case OP_CHARI:
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+
#ifdef SUPPORT_UTF8
if (utf8)
{
length = 1;
ecode++;
GETCHARLEN(fc, ecode, length);
-
- if (length > md->end_subject - eptr)
- {
- CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
-
+
/* If the pattern character's value is < 128, we have only one byte, and
- can use the fast lookup table. */
+ we know that its other case must also be one byte long, so we can use the
+ fast lookup table. We know that there is at least one byte left in the
+ subject. */
if (fc < 128)
{
if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
}
- /* Otherwise we must pick up the subject character */
+ /* Otherwise we must pick up the subject character. Note that we cannot
+ use the value of "length" to check for sufficient bytes left, because the
+ other case of the character may have more or fewer bytes. */
else
{
@@ -3086,11 +3091,6 @@ for (;;)
/* Non-UTF-8 mode */
{
- if (md->end_subject - eptr < 1)
- {
- SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
ecode += 2;
}
diff --git a/testdata/testinput6 b/testdata/testinput6
index e5fc0e9..6b0d2f7 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -802,4 +802,18 @@
** Failers
a\xFCb
+/ⱥ/8i
+ ⱥ
+ Ⱥx
+ Ⱥ
+
+/[ⱥ]/8i
+ ⱥ
+ Ⱥx
+ Ⱥ
+
+/Ⱥ/8i
+ Ⱥ
+ ⱥ
+
/-- End of testinput6 --/
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 1acaa23..68c0a46 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -1353,4 +1353,26 @@ No match
a\xFCb
No match
+/ⱥ/8i
+ ⱥ
+ 0: \x{2c65}
+ Ⱥx
+ 0: \x{23a}
+ Ⱥ
+ 0: \x{23a}
+
+/[ⱥ]/8i
+ ⱥ
+ 0: \x{2c65}
+ Ⱥx
+ 0: \x{23a}
+ Ⱥ
+ 0: \x{23a}
+
+/Ⱥ/8i
+ Ⱥ
+ 0: \x{23a}
+ ⱥ
+ 0: \x{2c65}
+
/-- End of testinput6 --/