summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2014-01-10 16:13:10 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2014-01-10 16:13:10 +0000
commit01746cfc6c30e4988c6e753f1ca0ef22e0f756d9 (patch)
treea0588d0c943b20f72d00bc89497a3635affd6cac
parent5b7e7f16a6cbce89259fd8e31b941c9757ccef15 (diff)
downloadpcre-01746cfc6c30e4988c6e753f1ca0ef22e0f756d9.tar.gz
Fix caseless character class bug for characters within a range that have more
than one other case. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1438 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog6
-rw-r--r--pcre_compile.c14
-rw-r--r--testdata/testinput69
-rw-r--r--testdata/testinput76
-rw-r--r--testdata/testoutput612
-rw-r--r--testdata/testoutput728
6 files changed, 68 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index e058884..506b2c7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -53,6 +53,12 @@ Version 8.35-RC1 xx-xxxx-201x
11. Empty match is not possible, when the minimum length is greater than zero,
and there is no \K in the pattern. Remove these unnecessary checks form JIT.
+
+12. In a caseless character class with UCP support, when a character with more
+ than one alternative case was not the first character of a range, not all
+ the alternative cases were added to the class. For example, s and \x{17f}
+ are both alternative cases for S: the class [RST] was handled correctly,
+ but [R-T] was not.
Version 8.34 15-December-2013
diff --git a/pcre_compile.c b/pcre_compile.c
index a3d2870..dd065a3 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2013 University of Cambridge
+ Copyright (c) 1997-2014 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -4077,12 +4077,16 @@ for (c = *cptr; c <= d; c++)
if (c > d) return -1; /* Reached end of range */
+/* Found a character that has a single other case. Search for the end of the
+range, which is either the end of the input range, or a character that has zero
+or more than one other cases. */
+
*ocptr = othercase;
next = othercase + 1;
for (++c; c <= d; c++)
{
- if (UCD_OTHERCASE(c) != next) break;
+ if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break;
next++;
}
@@ -4138,7 +4142,7 @@ if ((options & PCRE_CASELESS) != 0)
options &= ~PCRE_CASELESS; /* Remove for recursive calls */
c = start;
-
+
while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
{
/* Handle a single character that has more than one other case. */
@@ -4201,9 +4205,9 @@ for (c = start; c <= classbits_end; c++)
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
if (start <= 0xff) start = 0xff + 1;
-if (end >= start) {
+if (end >= start)
+ {
pcre_uchar *uchardata = *uchardptr;
-
#ifdef SUPPORT_UTF
if ((options & PCRE_UTF8) != 0) /* All UTFs use the same flag bit */
{
diff --git a/testdata/testinput6 b/testdata/testinput6
index 1e450be..7a6a53f 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -1484,4 +1484,13 @@
\x{a1}\x{a7}
\x{37e}
+/[RST]+/8iW
+ Ss\x{17f}
+
+/[R-T]+/8iW
+ Ss\x{17f}
+
+/[q-u]+/8iW
+ Ss\x{17f}
+
/-- End of testinput6 --/
diff --git a/testdata/testinput7 b/testdata/testinput7
index 9d14543..6bd0586 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -829,4 +829,10 @@ of case for anything other than the ASCII letters. --/
/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ
+/[RST]+/8iWBZ
+
+/[R-T]+/8iWBZ
+
+/[Q-U]+/8iWBZ
+
/-- End of testinput7 --/
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index 6c42fce..f355e60 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -2445,4 +2445,16 @@ No match
\x{37e}
No match
+/[RST]+/8iW
+ Ss\x{17f}
+ 0: Ss\x{17f}
+
+/[R-T]+/8iW
+ Ss\x{17f}
+ 0: Ss\x{17f}
+
+/[q-u]+/8iW
+ Ss\x{17f}
+ 0: Ss\x{17f}
+
/-- End of testinput6 --/
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 8970d7b..c64e049 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -124,7 +124,7 @@ No match
/[z-\x{100}]/8iDZ
------------------------------------------------------------------
Bra
- [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}]
+ [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
Ket
End
------------------------------------------------------------------
@@ -162,7 +162,7 @@ No match
/[z-\x{100}]/8DZi
------------------------------------------------------------------
Bra
- [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}]
+ [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
Ket
End
------------------------------------------------------------------
@@ -2263,4 +2263,28 @@ No match
End
------------------------------------------------------------------
+/[RST]+/8iWBZ
+------------------------------------------------------------------
+ Bra
+ [R-Tr-t\x{17f}]++
+ Ket
+ End
+------------------------------------------------------------------
+
+/[R-T]+/8iWBZ
+------------------------------------------------------------------
+ Bra
+ [R-Tr-t\x{17f}]++
+ Ket
+ End
+------------------------------------------------------------------
+
+/[Q-U]+/8iWBZ
+------------------------------------------------------------------
+ Bra
+ [Q-Uq-u\x{17f}]++
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput7 --/