summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-27 09:42:33 +0000
committerzherczeg <zherczeg@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-12-27 09:42:33 +0000
commitf66b79f11b7947f4d36cf78abbdaa0451e5f7bc2 (patch)
tree4c02b27e8ee3cd94df3dae159c72d97fb004ba58
parent5fa1a51657506bf743cd560f15726a3bafeb8e6d (diff)
downloadpcre-f66b79f11b7947f4d36cf78abbdaa0451e5f7bc2.tar.gz
fix horizontal and vertical white space ranges in 16 bit mode
git-svn-id: svn://vcs.exim.org/pcre/code/branches/pcre16@826 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_compile.c72
-rw-r--r--pcre_printint.c5
-rw-r--r--testdata/testinput1422
-rw-r--r--testdata/testinput162
-rw-r--r--testdata/testinput1716
-rw-r--r--testdata/testinput192
-rw-r--r--testdata/testinput216
-rw-r--r--testdata/testinput72
-rw-r--r--testdata/testoutput1484
-rw-r--r--testdata/testoutput168
-rw-r--r--testdata/testoutput1760
-rw-r--r--testdata/testoutput198
-rw-r--r--testdata/testoutput260
-rw-r--r--testdata/testoutput52
-rw-r--r--testdata/testoutput78
15 files changed, 273 insertions, 94 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index 031becb..7afa423 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -4023,7 +4023,22 @@ for (;; ptr++)
SETBIT(classbits, 0x09); /* VT */
SETBIT(classbits, 0x20); /* SPACE */
SETBIT(classbits, 0xa0); /* NSBP */
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+ xclass = TRUE;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x1680;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x180e;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x2000;
+ *class_uchardata++ = 0x200a;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x202f;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x205f;
+ *class_uchardata++ = XCL_SINGLE;
+ *class_uchardata++ = 0x3000;
+#elif defined SUPPORT_UTF
if (utf)
{
xclass = TRUE;
@@ -4033,7 +4048,7 @@ for (;; ptr++)
class_uchardata += PRIV(ord2utf)(0x180e, class_uchardata);
*class_uchardata++ = XCL_RANGE;
class_uchardata += PRIV(ord2utf)(0x2000, class_uchardata);
- class_uchardata += PRIV(ord2utf)(0x200A, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x200a, class_uchardata);
*class_uchardata++ = XCL_SINGLE;
class_uchardata += PRIV(ord2utf)(0x202f, class_uchardata);
*class_uchardata++ = XCL_SINGLE;
@@ -4057,9 +4072,36 @@ for (;; ptr++)
}
classbits[c] |= x;
}
-
+#ifndef COMPILE_PCRE8
+ xclass = TRUE;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x0100;
+ *class_uchardata++ = 0x167f;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x1681;
+ *class_uchardata++ = 0x180d;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x180f;
+ *class_uchardata++ = 0x1fff;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x200b;
+ *class_uchardata++ = 0x202e;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x2030;
+ *class_uchardata++ = 0x205e;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x2060;
+ *class_uchardata++ = 0x2fff;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x3001;
#ifdef SUPPORT_UTF
if (utf)
+ class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+ else
+#endif
+ *class_uchardata++ = 0xffff;
+#elif defined SUPPORT_UTF
+ if (utf)
{
xclass = TRUE;
*class_uchardata++ = XCL_RANGE;
@@ -4072,7 +4114,7 @@ for (;; ptr++)
class_uchardata += PRIV(ord2utf)(0x180f, class_uchardata);
class_uchardata += PRIV(ord2utf)(0x1fff, class_uchardata);
*class_uchardata++ = XCL_RANGE;
- class_uchardata += PRIV(ord2utf)(0x200B, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x200b, class_uchardata);
class_uchardata += PRIV(ord2utf)(0x202e, class_uchardata);
*class_uchardata++ = XCL_RANGE;
class_uchardata += PRIV(ord2utf)(0x2030, class_uchardata);
@@ -4093,7 +4135,12 @@ for (;; ptr++)
SETBIT(classbits, 0x0c); /* FF */
SETBIT(classbits, 0x0d); /* CR */
SETBIT(classbits, 0x85); /* NEL */
-#ifdef SUPPORT_UTF
+#ifndef COMPILE_PCRE8
+ xclass = TRUE;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x2028;
+ *class_uchardata++ = 0x2029;
+#elif defined SUPPORT_UTF
if (utf)
{
xclass = TRUE;
@@ -4121,15 +4168,28 @@ for (;; ptr++)
classbits[c] |= x;
}
+#ifndef COMPILE_PCRE8
+ xclass = TRUE;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x0100;
+ *class_uchardata++ = 0x2027;
+ *class_uchardata++ = XCL_RANGE;
+ *class_uchardata++ = 0x202a;
#ifdef SUPPORT_UTF
if (utf)
+ class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
+ else
+#endif
+ *class_uchardata++ = 0xffff;
+#elif defined SUPPORT_UTF
+ if (utf)
{
xclass = TRUE;
*class_uchardata++ = XCL_RANGE;
class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata);
class_uchardata += PRIV(ord2utf)(0x2027, class_uchardata);
*class_uchardata++ = XCL_RANGE;
- class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata);
+ class_uchardata += PRIV(ord2utf)(0x202a, class_uchardata);
class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
}
#endif
diff --git a/pcre_printint.c b/pcre_printint.c
index 8d504ce..fe5e548 100644
--- a/pcre_printint.c
+++ b/pcre_printint.c
@@ -114,8 +114,11 @@ print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
int c = *ptr;
#ifndef SUPPORT_UTF
+
(void)utf; /* Avoid compiler warning */
-if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
+if (PRINTABLE(c)) fprintf(f, "%c", c);
+else if (c <= 0xff) fprintf(f, "\\x%02x", c);
+else fprintf(f, "\\x{%x}", c);
return 0;
#else
diff --git a/testdata/testinput14 b/testdata/testinput14
index 32ae5ea..b672996 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -283,4 +283,26 @@
\) )* # optional trailing comment
/xSI
+/\h/SI
+
+/\v/SI
+
+/\R/SI
+
+/[\h]/BZ
+ >\x09<
+
+/[\h]+/BZ
+ >\x09\x20\xa0<
+
+/[\v]/BZ
+
+/[\H]/BZ
+
+/[^\h]/BZ
+
+/[\V]/BZ
+
+/[\x0a\V]/BZ
+
/-- End of testinput14 --/
diff --git a/testdata/testinput16 b/testdata/testinput16
index 26f53f8..e7a05ae 100644
--- a/testdata/testinput16
+++ b/testdata/testinput16
@@ -30,4 +30,6 @@
/\R/SI
+/[[:blank:]]/WBZ
+
/-- End of testinput16 --/
diff --git a/testdata/testinput17 b/testdata/testinput17
index a9fc089..38dc556 100644
--- a/testdata/testinput17
+++ b/testdata/testinput17
@@ -219,4 +219,20 @@
/\R/SI
+/[\h]/BZ
+ >\x09<
+
+/[\h]+/BZ
+ >\x09\x20\xa0<
+
+/[\v]/BZ
+
+/[\H]/BZ
+
+/[^\h]/BZ
+
+/[\V]/BZ
+
+/[\x0a\V]/BZ
+
/-- End of testinput17 --/
diff --git a/testdata/testinput19 b/testdata/testinput19
index 3af5f00..4b002f4 100644
--- a/testdata/testinput19
+++ b/testdata/testinput19
@@ -17,4 +17,6 @@
/[^ⱥ]/8iBZ
+/[[:blank:]]/WBZ
+
/-- End of testinput19 --/
diff --git a/testdata/testinput2 b/testdata/testinput2
index 5ff06d9..7d30866 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2173,22 +2173,6 @@ a random value. /Ix
xabcpqrx
xxyzx
-/[\h]/BZ
- >\x09<
-
-/[\h]+/BZ
- >\x09\x20\xa0<
-
-/[\v]/BZ
-
-/[\H]/BZ
-
-/[^\h]/BZ
-
-/[\V]/BZ
-
-/[\x0a\V]/BZ
-
/\H++X/BZ
** Failers
XXXX
diff --git a/testdata/testinput7 b/testdata/testinput7
index 3e6a10b..abbfe66 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -397,8 +397,6 @@ of case for anything other than the ASCII letters. --/
/[[:ascii:]]/WBZ
-/[[:blank:]]/WBZ
-
/[[:cntrl:]]/WBZ
/[[:digit:]]/WBZ
diff --git a/testdata/testoutput14 b/testdata/testoutput14
index 4830667..171bd17 100644
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@@ -355,4 +355,88 @@ Starting byte set: \x09 \x20 ! " # $ % & ' ( * + - / 0 1 2 3 4 5 6 7 8
9 = ? A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ^ _ ` a b c d e
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
+/\h/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x09 \x20 \xa0
+
+/\v/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d \x85
+
+/\R/SI
+Capturing subpattern count = 0
+No options
+No first char
+No need char
+Subject length lower bound = 1
+Starting byte set: \x0a \x0b \x0c \x0d \x85
+
+/[\h]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0]
+ Ket
+ End
+------------------------------------------------------------------
+ >\x09<
+ 0: \x09
+
+/[\h]+/BZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0]+
+ Ket
+ End
+------------------------------------------------------------------
+ >\x09\x20\xa0<
+ 0: \x09 \xa0
+
+/[\v]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x0a-\x0d\x85]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\H]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\h]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\V]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x09\x0e-\x84\x86-\xff]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\x0a\V]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x0a\x0e-\x84\x86-\xff]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput14 --/
diff --git a/testdata/testoutput16 b/testdata/testoutput16
index dde5399..921df78 100644
--- a/testdata/testoutput16
+++ b/testdata/testoutput16
@@ -110,4 +110,12 @@ No need char
Subject length lower bound = 1
Starting byte set: \x0a \x0b \x0c \x0d \x85
+/[[:blank:]]/WBZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput16 --/
diff --git a/testdata/testoutput17 b/testdata/testoutput17
index 9fc98e1..4bb9986 100644
--- a/testdata/testoutput17
+++ b/testdata/testoutput17
@@ -268,4 +268,64 @@ No need char
Subject length lower bound = 1
Starting byte set: \x0a \x0b \x0c \x0d \x85 \xff
+/[\h]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+ Ket
+ End
+------------------------------------------------------------------
+ >\x09<
+ 0: \x09
+
+/[\h]+/BZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]+
+ Ket
+ End
+------------------------------------------------------------------
+ >\x09\x20\xa0<
+ 0: \x09 \xa0
+
+/[\v]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x0a-\x0d\x85\x{2028}-\x{2029}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\H]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff\x{100}-\x{167f}\x{1681}-\x{180d}\x{180f}-\x{1fff}\x{200b}-\x{202e}\x{2030}-\x{205e}\x{2060}-\x{2fff}\x{3001}-\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[^\h]/BZ
+------------------------------------------------------------------
+ Bra
+ [^\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\V]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
+/[\x0a\V]/BZ
+------------------------------------------------------------------
+ Bra
+ [\x00-\x0a\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{ffff}]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput17 --/
diff --git a/testdata/testoutput19 b/testdata/testoutput19
index d7dc9df..b3cfb9b 100644
--- a/testdata/testoutput19
+++ b/testdata/testoutput19
@@ -77,4 +77,12 @@ No set of starting bytes
End
------------------------------------------------------------------
+/[[:blank:]]/WBZ
+------------------------------------------------------------------
+ Bra
+ [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
+ Ket
+ End
+------------------------------------------------------------------
+
/-- End of testinput19 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index aac0fe3..9f5134d 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -8383,66 +8383,6 @@ Failed: syntax error in subpattern name (missing terminator) at offset 4
3: <unset>
4: x
-/[\h]/BZ
-------------------------------------------------------------------
- Bra
- [\x09 \xa0]
- Ket
- End
-------------------------------------------------------------------
- >\x09<
- 0: \x09
-
-/[\h]+/BZ
-------------------------------------------------------------------
- Bra
- [\x09 \xa0]+
- Ket
- End
-------------------------------------------------------------------
- >\x09\x20\xa0<
- 0: \x09 \xa0
-
-/[\v]/BZ
-------------------------------------------------------------------
- Bra
- [\x0a-\x0d\x85]
- Ket
- End
-------------------------------------------------------------------
-
-/[\H]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff]
- Ket
- End
-------------------------------------------------------------------
-
-/[^\h]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x08\x0a-\x1f!-\x9f\xa1-\xff] (neg)
- Ket
- End
-------------------------------------------------------------------
-
-/[\V]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x09\x0e-\x84\x86-\xff]
- Ket
- End
-------------------------------------------------------------------
-
-/[\x0a\V]/BZ
-------------------------------------------------------------------
- Bra
- [\x00-\x0a\x0e-\x84\x86-\xff]
- Ket
- End
-------------------------------------------------------------------
-
/\H++X/BZ
------------------------------------------------------------------
Bra
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 559ab7b..f2def0c 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -797,7 +797,7 @@ No match
/[\V]/8BZ
------------------------------------------------------------------
Bra
- [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{2029}-\x{10ffff}]
+ [\x00-\x09\x0e-\x84\x86-\xff\x{100}-\x{2027}\x{202a}-\x{10ffff}]
Ket
End
------------------------------------------------------------------
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index d71a1e2..982e8a7 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -815,14 +815,6 @@ No match
End
------------------------------------------------------------------
-/[[:blank:]]/WBZ
-------------------------------------------------------------------
- Bra
- [\x09 \xa0]
- Ket
- End
-------------------------------------------------------------------
-
/[[:cntrl:]]/WBZ
------------------------------------------------------------------
Bra