diff options
author | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-27 09:42:33 +0000 |
---|---|---|
committer | zherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-12-27 09:42:33 +0000 |
commit | f66b79f11b7947f4d36cf78abbdaa0451e5f7bc2 (patch) | |
tree | 4c02b27e8ee3cd94df3dae159c72d97fb004ba58 | |
parent | 5fa1a51657506bf743cd560f15726a3bafeb8e6d (diff) | |
download | pcre-f66b79f11b7947f4d36cf78abbdaa0451e5f7bc2.tar.gz |
fix horizontal and vertical white space ranges in 16 bit mode
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@826 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | pcre_compile.c | 72 | ||||
-rw-r--r-- | pcre_printint.c | 5 | ||||
-rw-r--r-- | testdata/testinput14 | 22 | ||||
-rw-r--r-- | testdata/testinput16 | 2 | ||||
-rw-r--r-- | testdata/testinput17 | 16 | ||||
-rw-r--r-- | testdata/testinput19 | 2 | ||||
-rw-r--r-- | testdata/testinput2 | 16 | ||||
-rw-r--r-- | testdata/testinput7 | 2 | ||||
-rw-r--r-- | testdata/testoutput14 | 84 | ||||
-rw-r--r-- | testdata/testoutput16 | 8 | ||||
-rw-r--r-- | testdata/testoutput17 | 60 | ||||
-rw-r--r-- | testdata/testoutput19 | 8 | ||||
-rw-r--r-- | testdata/testoutput2 | 60 | ||||
-rw-r--r-- | testdata/testoutput5 | 2 | ||||
-rw-r--r-- | testdata/testoutput7 | 8 |
15 files changed, 273 insertions, 94 deletions
diff --git a/pcre_compile.c b/pcre_compile.c index 031becb..7afa423 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -4023,7 +4023,22 @@ for (;; ptr++) SETBIT(classbits, 0x09); /* VT */ SETBIT(classbits, 0x20); /* SPACE */ SETBIT(classbits, 0xa0); /* NSBP */ -#ifdef SUPPORT_UTF +#ifndef COMPILE_PCRE8 + xclass = TRUE; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x1680; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x180e; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x2000; + *class_uchardata++ = 0x200a; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x202f; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x205f; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x3000; +#elif defined SUPPORT_UTF if (utf) { xclass = TRUE; @@ -4033,7 +4048,7 @@ for (;; ptr++) class_uchardata += PRIV(ord2utf)(0x180e, class_uchardata); *class_uchardata++ = XCL_RANGE; class_uchardata += PRIV(ord2utf)(0x2000, class_uchardata); - class_uchardata += PRIV(ord2utf)(0x200A, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x200a, class_uchardata); *class_uchardata++ = XCL_SINGLE; class_uchardata += PRIV(ord2utf)(0x202f, class_uchardata); *class_uchardata++ = XCL_SINGLE; @@ -4057,9 +4072,36 @@ for (;; ptr++) } classbits[c] |= x; } - +#ifndef COMPILE_PCRE8 + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x0100; + *class_uchardata++ = 0x167f; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x1681; + *class_uchardata++ = 0x180d; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x180f; + *class_uchardata++ = 0x1fff; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x200b; + *class_uchardata++ = 0x202e; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x2030; + *class_uchardata++ = 0x205e; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x2060; + *class_uchardata++ = 0x2fff; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x3001; #ifdef SUPPORT_UTF if (utf) + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); + else +#endif + *class_uchardata++ = 0xffff; +#elif defined SUPPORT_UTF + if (utf) { xclass = TRUE; *class_uchardata++ = XCL_RANGE; @@ -4072,7 +4114,7 @@ for (;; ptr++) class_uchardata += PRIV(ord2utf)(0x180f, class_uchardata); class_uchardata += PRIV(ord2utf)(0x1fff, class_uchardata); *class_uchardata++ = XCL_RANGE; - class_uchardata += PRIV(ord2utf)(0x200B, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x200b, class_uchardata); class_uchardata += PRIV(ord2utf)(0x202e, class_uchardata); *class_uchardata++ = XCL_RANGE; class_uchardata += PRIV(ord2utf)(0x2030, class_uchardata); @@ -4093,7 +4135,12 @@ for (;; ptr++) SETBIT(classbits, 0x0c); /* FF */ SETBIT(classbits, 0x0d); /* CR */ SETBIT(classbits, 0x85); /* NEL */ -#ifdef SUPPORT_UTF +#ifndef COMPILE_PCRE8 + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x2028; + *class_uchardata++ = 0x2029; +#elif defined SUPPORT_UTF if (utf) { xclass = TRUE; @@ -4121,15 +4168,28 @@ for (;; ptr++) classbits[c] |= x; } +#ifndef COMPILE_PCRE8 + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x0100; + *class_uchardata++ = 0x2027; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x202a; #ifdef SUPPORT_UTF if (utf) + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); + else +#endif + *class_uchardata++ = 0xffff; +#elif defined SUPPORT_UTF + if (utf) { xclass = TRUE; *class_uchardata++ = XCL_RANGE; class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata); class_uchardata += PRIV(ord2utf)(0x2027, class_uchardata); *class_uchardata++ = XCL_RANGE; - class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x202a, class_uchardata); class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); } #endif diff --git a/pcre_printint.c b/pcre_printint.c index 8d504ce..fe5e548 100644 --- a/pcre_printint.c +++ b/pcre_printint.c @@ -114,8 +114,11 @@ print_char(FILE *f, pcre_uchar *ptr, BOOL utf) int c = *ptr; #ifndef SUPPORT_UTF + (void)utf; /* Avoid compiler warning */ -if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); +if (PRINTABLE(c)) fprintf(f, "%c", c); +else if (c <= 0xff) fprintf(f, "\\x%02x", c); +else fprintf(f, "\\x{%x}", c); return 0; #else diff --git a/testdata/testinput14 b/testdata/testinput14 index 32ae5ea..b672996 100644 --- a/testdata/testinput14 +++ b/testdata/testinput14 @@ -283,4 +283,26 @@ \) )* # optional trailing comment /xSI +/\h/SI + +/\v/SI + +/\R/SI + +/[\h]/BZ + >\x09< + +/[\h]+/BZ + >\x09\x20\xa0< + +/[\v]/BZ + +/[\H]/BZ + +/[^\h]/BZ + +/[\V]/BZ + +/[\x0a\V]/BZ + /-- End of testinput14 --/ diff --git a/testdata/testinput16 b/testdata/testinput16 index 26f53f8..e7a05ae 100644 --- a/testdata/testinput16 +++ b/testdata/testinput16 @@ -30,4 +30,6 @@ /\R/SI +/[[:blank:]]/WBZ + /-- End of testinput16 --/ diff --git a/testdata/testinput17 b/testdata/testinput17 index a9fc089..38dc556 100644 --- a/testdata/testinput17 +++ b/testdata/testinput17 @@ -219,4 +219,20 @@ /\R/SI +/[\h]/BZ + >\x09< + +/[\h]+/BZ + >\x09\x20\xa0< + +/[\v]/BZ + +/[\H]/BZ + +/[^\h]/BZ + +/[\V]/BZ + +/[\x0a\V]/BZ + /-- End of testinput17 --/ diff --git a/testdata/testinput19 b/testdata/testinput19 index 3af5f00..4b002f4 100644 --- a/testdata/testinput19 +++ b/testdata/testinput19 @@ -17,4 +17,6 @@ /[^ⱥ]/8iBZ +/[[:blank:]]/WBZ + /-- End of testinput19 --/ diff --git a/testdata/testinput2 b/testdata/testinput2 index 5ff06d9..7d30866 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -2173,22 +2173,6 @@ a random value. /Ix xabcpqrx xxyzx -/[\h]/BZ - >\x09< - -/[\h]+/BZ - >\x09\x20\xa0< - -/[\v]/BZ - -/[\H]/BZ - -/[^\h]/BZ - -/[\V]/BZ - -/[\x0a\V]/BZ - /\H++X/BZ ** Failers XXXX diff --git a/testdata/testinput7 b/testdata/testinput7 index 3e6a10b..abbfe66 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -397,8 +397,6 @@ of case for anything other than the ASCII letters. --/ /[[:ascii:]]/WBZ -/[[:blank:]]/WBZ - /[[:cntrl:]]/WBZ /[[:digit:]]/WBZ diff --git a/testdata/testoutput14 b/testdata/testoutput14 index 4830667..171bd17 100644 --- a/testdata/testoutput14 +++ b/testdata/testoutput14 @@ -355,4 +355,88 @@ Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8 9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f +/\h/SI +Capturing subpattern count = 0 +No options +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \x09 \x20 \xa0 + +/\v/SI +Capturing subpattern count = 0 +No options +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \x0a \x0b \x0c \x0d \x85 + +/\R/SI +Capturing subpattern count = 0 +No options +No first char +No need char +Subject length lower bound = 1 +Starting byte set: \x0a \x0b \x0c \x0d \x85 + +/[\h]/BZ +------------------------------------------------------------------ + Bra + [\x09 \xa0] + Ket + End +------------------------------------------------------------------ + >\x09< + 0: \x09 + +/[\h]+/BZ +------------------------------------------------------------------ + Bra + [\x09 \xa0]+ + Ket + End +------------------------------------------------------------------ + >\x09\x20\xa0< + 0: \x09 \xa0 + +/[\v]/BZ +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85] + Ket + End +------------------------------------------------------------------ + +/[\H]/BZ +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] + Ket + End +------------------------------------------------------------------ + +/[^\h]/BZ +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg) + Ket + End +------------------------------------------------------------------ + +/[\V]/BZ +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff] + Ket + End +------------------------------------------------------------------ + +/[\x0a\V]/BZ +------------------------------------------------------------------ + Bra + [\x00-\x0a\x0e-\x84\x86-\xff] + Ket + End +------------------------------------------------------------------ + /-- End of testinput14 --/ diff --git a/testdata/testoutput16 b/testdata/testoutput16 index dde5399..921df78 100644 --- a/testdata/testoutput16 +++ b/testdata/testoutput16 @@ -110,4 +110,12 @@ No need char Subject length lower bound = 1 Starting byte set: \x0a \x0b \x0c \x0d \x85 +/[[:blank:]]/WBZ +------------------------------------------------------------------ + Bra + [\x09 \xa0] + Ket + End +------------------------------------------------------------------ + /-- End of testinput16 --/ diff --git a/testdata/testoutput17 b/testdata/testoutput17 index 9fc98e1..4bb9986 100644 --- a/testdata/testoutput17 +++ b/testdata/testoutput17 @@ -268,4 +268,64 @@ No need char Subject length lower bound = 1 Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff +/[\h]/BZ +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + >\x09< + 0: \x09 + +/[\h]+/BZ +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]+ + Ket + End +------------------------------------------------------------------ + >\x09\x20\xa0< + 0: \x09 \xa0 + +/[\v]/BZ +------------------------------------------------------------------ + Bra + [\x0a-\x0d\x85\x{2028}-\x{2029}] + Ket + End +------------------------------------------------------------------ + +/[\H]/BZ +------------------------------------------------------------------ + Bra + [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/[^\h]/BZ +------------------------------------------------------------------ + Bra + [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + +/[\V]/BZ +------------------------------------------------------------------ + Bra + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + +/[\x0a\V]/BZ +------------------------------------------------------------------ + Bra + [\x00-\x0a\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}] + Ket + End +------------------------------------------------------------------ + /-- End of testinput17 --/ diff --git a/testdata/testoutput19 b/testdata/testoutput19 index d7dc9df..b3cfb9b 100644 --- a/testdata/testoutput19 +++ b/testdata/testoutput19 @@ -77,4 +77,12 @@ No set of starting bytes End ------------------------------------------------------------------ +/[[:blank:]]/WBZ +------------------------------------------------------------------ + Bra + [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}] + Ket + End +------------------------------------------------------------------ + /-- End of testinput19 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index aac0fe3..9f5134d 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -8383,66 +8383,6 @@ Failed: syntax error in subpattern name (missing terminator) at offset 4 3: <unset> 4: x -/[\h]/BZ ------------------------------------------------------------------- - Bra - [\x09 \xa0] - Ket - End ------------------------------------------------------------------- - >\x09< - 0: \x09 - -/[\h]+/BZ ------------------------------------------------------------------- - Bra - [\x09 \xa0]+ - Ket - End ------------------------------------------------------------------- - >\x09\x20\xa0< - 0: \x09 \xa0 - -/[\v]/BZ ------------------------------------------------------------------- - Bra - [\x0a-\x0d\x85] - Ket - End ------------------------------------------------------------------- - -/[\H]/BZ ------------------------------------------------------------------- - Bra - [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] - Ket - End ------------------------------------------------------------------- - -/[^\h]/BZ ------------------------------------------------------------------- - Bra - [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg) - Ket - End ------------------------------------------------------------------- - -/[\V]/BZ ------------------------------------------------------------------- - Bra - [\x00-\x09\x0e-\x84\x86-\xff] - Ket - End ------------------------------------------------------------------- - -/[\x0a\V]/BZ ------------------------------------------------------------------- - Bra - [\x00-\x0a\x0e-\x84\x86-\xff] - Ket - End ------------------------------------------------------------------- - /\H++X/BZ ------------------------------------------------------------------ Bra diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 559ab7b..f2def0c 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -797,7 +797,7 @@ No match /[\V]/8BZ ------------------------------------------------------------------ Bra - [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{2029}-\x{10ffff}] + [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}] Ket End ------------------------------------------------------------------ diff --git a/testdata/testoutput7 b/testdata/testoutput7 index d71a1e2..982e8a7 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -815,14 +815,6 @@ No match End ------------------------------------------------------------------ -/[[:blank:]]/WBZ ------------------------------------------------------------------- - Bra - [\x09 \xa0] - Ket - End ------------------------------------------------------------------- - /[[:cntrl:]]/WBZ ------------------------------------------------------------------ Bra |