diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2014-01-10 16:13:10 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2014-01-10 16:13:10 +0000 |
commit | 01746cfc6c30e4988c6e753f1ca0ef22e0f756d9 (patch) | |
tree | a0588d0c943b20f72d00bc89497a3635affd6cac | |
parent | 5b7e7f16a6cbce89259fd8e31b941c9757ccef15 (diff) | |
download | pcre-01746cfc6c30e4988c6e753f1ca0ef22e0f756d9.tar.gz |
Fix caseless character class bug for characters within a range that have more
than one other case.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1438 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | pcre_compile.c | 14 | ||||
-rw-r--r-- | testdata/testinput6 | 9 | ||||
-rw-r--r-- | testdata/testinput7 | 6 | ||||
-rw-r--r-- | testdata/testoutput6 | 12 | ||||
-rw-r--r-- | testdata/testoutput7 | 28 |
6 files changed, 68 insertions, 7 deletions
@@ -53,6 +53,12 @@ Version 8.35-RC1 xx-xxxx-201x 11. Empty match is not possible, when the minimum length is greater than zero, and there is no \K in the pattern. Remove these unnecessary checks form JIT. + +12. In a caseless character class with UCP support, when a character with more + than one alternative case was not the first character of a range, not all + the alternative cases were added to the class. For example, s and \x{17f} + are both alternative cases for S: the class [RST] was handled correctly, + but [R-T] was not. Version 8.34 15-December-2013 diff --git a/pcre_compile.c b/pcre_compile.c index a3d2870..dd065a3 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2013 University of Cambridge + Copyright (c) 1997-2014 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -4077,12 +4077,16 @@ for (c = *cptr; c <= d; c++) if (c > d) return -1; /* Reached end of range */ +/* Found a character that has a single other case. Search for the end of the +range, which is either the end of the input range, or a character that has zero +or more than one other cases. */ + *ocptr = othercase; next = othercase + 1; for (++c; c <= d; c++) { - if (UCD_OTHERCASE(c) != next) break; + if ((co = UCD_CASESET(c)) != 0 || UCD_OTHERCASE(c) != next) break; next++; } @@ -4138,7 +4142,7 @@ if ((options & PCRE_CASELESS) != 0) options &= ~PCRE_CASELESS; /* Remove for recursive calls */ c = start; - + while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0) { /* Handle a single character that has more than one other case. */ @@ -4201,9 +4205,9 @@ for (c = start; c <= classbits_end; c++) #if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (start <= 0xff) start = 0xff + 1; -if (end >= start) { +if (end >= start) + { pcre_uchar *uchardata = *uchardptr; - #ifdef SUPPORT_UTF if ((options & PCRE_UTF8) != 0) /* All UTFs use the same flag bit */ { diff --git a/testdata/testinput6 b/testdata/testinput6 index 1e450be..7a6a53f 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -1484,4 +1484,13 @@ \x{a1}\x{a7} \x{37e} +/[RST]+/8iW + Ss\x{17f} + +/[R-T]+/8iW + Ss\x{17f} + +/[q-u]+/8iW + Ss\x{17f} + /-- End of testinput6 --/ diff --git a/testdata/testinput7 b/testdata/testinput7 index 9d14543..6bd0586 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -829,4 +829,10 @@ of case for anything other than the ASCII letters. --/ /\d+\s{0,5}=\s*\S?=\w{0,4}\W*/8WBZ +/[RST]+/8iWBZ + +/[R-T]+/8iWBZ + +/[Q-U]+/8iWBZ + /-- End of testinput7 --/ diff --git a/testdata/testoutput6 b/testdata/testoutput6 index 6c42fce..f355e60 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -2445,4 +2445,16 @@ No match \x{37e} No match +/[RST]+/8iW + Ss\x{17f} + 0: Ss\x{17f} + +/[R-T]+/8iW + Ss\x{17f} + 0: Ss\x{17f} + +/[q-u]+/8iW + Ss\x{17f} + 0: Ss\x{17f} + /-- End of testinput6 --/ diff --git a/testdata/testoutput7 b/testdata/testoutput7 index 8970d7b..c64e049 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -124,7 +124,7 @@ No match /[z-\x{100}]/8iDZ ------------------------------------------------------------------ Bra - [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}] + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] Ket End ------------------------------------------------------------------ @@ -162,7 +162,7 @@ No match /[z-\x{100}]/8DZi ------------------------------------------------------------------ Bra - [Zz-\xff\x{39c}\x{3bc}\x{1e9e}\x{178}\x{100}-\x{101}] + [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}] Ket End ------------------------------------------------------------------ @@ -2263,4 +2263,28 @@ No match End ------------------------------------------------------------------ +/[RST]+/8iWBZ +------------------------------------------------------------------ + Bra + [R-Tr-t\x{17f}]++ + Ket + End +------------------------------------------------------------------ + +/[R-T]+/8iWBZ +------------------------------------------------------------------ + Bra + [R-Tr-t\x{17f}]++ + Ket + End +------------------------------------------------------------------ + +/[Q-U]+/8iWBZ +------------------------------------------------------------------ + Bra + [Q-Uq-u\x{17f}]++ + Ket + End +------------------------------------------------------------------ + /-- End of testinput7 --/ |