summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--pcre_exec.c32
-rw-r--r--testdata/testinput614
-rw-r--r--testdata/testoutput622
4 files changed, 56 insertions, 16 deletions
diff --git a/ChangeLog b/ChangeLog
index 1b62901..d0db5af 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -89,6 +89,10 @@ Version 8.21
21. Retrieve executable code size support for the JIT compiler and fixing
some warnings.
+
+22. A caseless match of a UTF-8 character whose other case uses fewer bytes did
+ not work when the shorter character appeared right at the end of the
+ subject string.
Version 8.20 21-Oct-2011
diff --git a/pcre_exec.c b/pcre_exec.c
index a586c80..2dd8a55 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -417,7 +417,7 @@ returns a negative (error) response, the outer incarnation must also return the
same response. */
/* These macros pack up tests that are used for partial matching, and which
-appears several times in the code. We set the "hit end" flag if the pointer is
+appear several times in the code. We set the "hit end" flag if the pointer is
at the end of the subject and also past the start of the subject (i.e.
something has been matched). For hard partial matching, we then return
immediately. The second one is used when we already know we are past the end of
@@ -3037,31 +3037,36 @@ for (;;)
}
break;
- /* Match a single character, caselessly */
+ /* Match a single character, caselessly. If we are at the end of the
+ subject, give up immediately. */
case OP_CHARI:
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ RRETURN(MATCH_NOMATCH);
+ }
+
#ifdef SUPPORT_UTF8
if (utf8)
{
length = 1;
ecode++;
GETCHARLEN(fc, ecode, length);
-
- if (length > md->end_subject - eptr)
- {
- CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
-
+
/* If the pattern character's value is < 128, we have only one byte, and
- can use the fast lookup table. */
+ we know that its other case must also be one byte long, so we can use the
+ fast lookup table. We know that there is at least one byte left in the
+ subject. */
if (fc < 128)
{
if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
}
- /* Otherwise we must pick up the subject character */
+ /* Otherwise we must pick up the subject character. Note that we cannot
+ use the value of "length" to check for sufficient bytes left, because the
+ other case of the character may have more or fewer bytes. */
else
{
@@ -3086,11 +3091,6 @@ for (;;)
/* Non-UTF-8 mode */
{
- if (md->end_subject - eptr < 1)
- {
- SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
- RRETURN(MATCH_NOMATCH);
- }
if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
ecode += 2;
}
diff --git a/testdata/testinput6 b/testdata/testinput6
index e5fc0e9..6b0d2f7 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -802,4 +802,18 @@
** Failers
a\xFCb
+/ⱥ/8i
+ ⱥ
+ Ⱥx
+ Ⱥ
+
+/[ⱥ]/8i
+ ⱥ
+ Ⱥx
+ Ⱥ
+
+/Ⱥ/8i
+ Ⱥ
+ ⱥ
+
/-- End of testinput6 --/
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 1acaa23..68c0a46 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -1353,4 +1353,26 @@ No match
a\xFCb
No match
+/ⱥ/8i
+ ⱥ
+ 0: \x{2c65}
+ Ⱥx
+ 0: \x{23a}
+ Ⱥ
+ 0: \x{23a}
+
+/[ⱥ]/8i
+ ⱥ
+ 0: \x{2c65}
+ Ⱥx
+ 0: \x{23a}
+ Ⱥ
+ 0: \x{23a}
+
+/Ⱥ/8i
+ Ⱥ
+ 0: \x{23a}
+ ⱥ
+ 0: \x{2c65}
+
/-- End of testinput6 --/